aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c2
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/affs/file.c21
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c212
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c114
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/tests/inode-tests.c197
-rw-r--r--fs/btrfs/transaction.c39
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/connect.c19
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/smb2misc.c2
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/ecryptfs/file.c7
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/indirect.c2
-rw-r--r--fs/ext4/inode.c1
-rw-r--r--fs/ext4/page-io.c1
-rw-r--r--fs/f2fs/data.c2
-rw-r--r--fs/fat/inode.c1
-rw-r--r--fs/fs-writeback.c93
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c20
-rw-r--r--fs/fuse/file.c55
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c102
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/quota.c62
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/brec.c20
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/kernfs/file.c1
-rw-r--r--fs/locks.c5
-rw-r--r--fs/namei.c168
-rw-r--r--fs/nfs/direct.c4
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/blocklayoutxdr.c6
-rw-r--r--fs/nfsd/nfs4layouts.c12
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfs4state.c4
-rw-r--r--fs/nfsd/nfs4xdr.c20
-rw-r--r--fs/nfsd/nfscache.c6
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c784
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ocfs2/alloc.c48
-rw-r--r--fs/ocfs2/aops.c156
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c42
-rw-r--r--fs/ocfs2/cluster/masklog.h5
-rw-r--r--fs/ocfs2/dir.c15
-rw-r--r--fs/ocfs2/dlmglue.c7
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/file.c25
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c46
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/open.c5
-rw-r--r--fs/overlayfs/super.c33
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/read_write.c117
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/splice.c28
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/group.c11
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/udf/file.c4
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/xfs/xfs_aops.c1
-rw-r--r--fs/xfs/xfs_file.c1
117 files changed, 2336 insertions, 1194 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e055ea83..ff1a5bac4200 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/aio.h> 36#include <linux/uio.h>
37#include <net/9p/9p.h> 37#include <net/9p/9p.h>
38#include <net/9p/client.h> 38#include <net/9p/client.h>
39 39
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 270c48148f79..2d0cbbd14cfc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF
27 bool 27 bool
28 depends on COMPAT && BINFMT_ELF 28 depends on COMPAT && BINFMT_ELF
29 29
30config ARCH_BINFMT_ELF_RANDOMIZE_PIE
31 bool
32
33config ARCH_BINFMT_ELF_STATE 30config ARCH_BINFMT_ELF_STATE
34 bool 31 bool
35 32
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
118obj-$(CONFIG_HPPFS) += hppfs/ 118obj-$(CONFIG_HPPFS) += hppfs/
119obj-$(CONFIG_CACHEFILES) += cachefiles/ 119obj-$(CONFIG_CACHEFILES) += cachefiles/
120obj-$(CONFIG_DEBUG_FS) += debugfs/ 120obj-$(CONFIG_DEBUG_FS) += debugfs/
121obj-$(CONFIG_TRACING) += tracefs/
121obj-$(CONFIG_OCFS2_FS) += ocfs2/ 122obj-$(CONFIG_OCFS2_FS) += ocfs2/
122obj-$(CONFIG_BTRFS_FS) += btrfs/ 123obj-$(CONFIG_BTRFS_FS) += btrfs/
123obj-$(CONFIG_GFS2_FS) += gfs2/ 124obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf95669..3aa7eb66547e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
12 * affs regular file handling primitives 12 * affs regular file handling primitives
13 */ 13 */
14 14
15#include <linux/aio.h> 15#include <linux/uio.h>
16#include "affs.h" 16#include "affs.h"
17 17
18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); 18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
699 boff = tmp % bsize; 699 boff = tmp % bsize;
700 if (boff) { 700 if (boff) {
701 bh = affs_bread_ino(inode, bidx, 0); 701 bh = affs_bread_ino(inode, bidx, 0);
702 if (IS_ERR(bh)) 702 if (IS_ERR(bh)) {
703 return PTR_ERR(bh); 703 written = PTR_ERR(bh);
704 goto err_first_bh;
705 }
704 tmp = min(bsize - boff, to - from); 706 tmp = min(bsize - boff, to - from);
705 BUG_ON(boff + tmp > bsize || tmp > bsize); 707 BUG_ON(boff + tmp > bsize || tmp > bsize);
706 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 708 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
712 bidx++; 714 bidx++;
713 } else if (bidx) { 715 } else if (bidx) {
714 bh = affs_bread_ino(inode, bidx - 1, 0); 716 bh = affs_bread_ino(inode, bidx - 1, 0);
715 if (IS_ERR(bh)) 717 if (IS_ERR(bh)) {
716 return PTR_ERR(bh); 718 written = PTR_ERR(bh);
719 goto err_first_bh;
720 }
717 } 721 }
718 while (from + bsize <= to) { 722 while (from + bsize <= to) {
719 prev_bh = bh; 723 prev_bh = bh;
720 bh = affs_getemptyblk_ino(inode, bidx); 724 bh = affs_getemptyblk_ino(inode, bidx);
721 if (IS_ERR(bh)) 725 if (IS_ERR(bh))
722 goto out; 726 goto err_bh;
723 memcpy(AFFS_DATA(bh), data + from, bsize); 727 memcpy(AFFS_DATA(bh), data + from, bsize);
724 if (buffer_new(bh)) { 728 if (buffer_new(bh)) {
725 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 729 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
751 prev_bh = bh; 755 prev_bh = bh;
752 bh = affs_bread_ino(inode, bidx, 1); 756 bh = affs_bread_ino(inode, bidx, 1);
753 if (IS_ERR(bh)) 757 if (IS_ERR(bh))
754 goto out; 758 goto err_bh;
755 tmp = min(bsize, to - from); 759 tmp = min(bsize, to - from);
756 BUG_ON(tmp > bsize); 760 BUG_ON(tmp > bsize);
757 memcpy(AFFS_DATA(bh), data + from, tmp); 761 memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +794,13 @@ done:
790 if (tmp > inode->i_size) 794 if (tmp > inode->i_size)
791 inode->i_size = AFFS_I(inode)->mmu_private = tmp; 795 inode->i_size = AFFS_I(inode)->mmu_private = tmp;
792 796
797err_first_bh:
793 unlock_page(page); 798 unlock_page(page);
794 page_cache_release(page); 799 page_cache_release(page);
795 800
796 return written; 801 return written;
797 802
798out: 803err_bh:
799 bh = prev_bh; 804 bh = prev_bh;
800 if (!written) 805 if (!written)
801 written = PTR_ERR(bh); 806 written = PTR_ERR(bh);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08964ed..0714abcd7f32 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/pagevec.h> 16#include <linux/pagevec.h>
17#include <linux/aio.h>
18#include "internal.h" 17#include "internal.h"
19 18
20static int afs_write_back_from_locked_page(struct afs_writeback *wb, 19static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index f8e52a1854c1..1ab60010cf6c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -151,6 +151,38 @@ struct kioctx {
151 unsigned id; 151 unsigned id;
152}; 152};
153 153
154/*
155 * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
156 * cancelled or completed (this makes a certain amount of sense because
157 * successful cancellation - io_cancel() - does deliver the completion to
158 * userspace).
159 *
160 * And since most things don't implement kiocb cancellation and we'd really like
161 * kiocb completion to be lockless when possible, we use ki_cancel to
162 * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
163 * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
164 */
165#define KIOCB_CANCELLED ((void *) (~0ULL))
166
167struct aio_kiocb {
168 struct kiocb common;
169
170 struct kioctx *ki_ctx;
171 kiocb_cancel_fn *ki_cancel;
172
173 struct iocb __user *ki_user_iocb; /* user's aiocb */
174 __u64 ki_user_data; /* user's data for completion */
175
176 struct list_head ki_list; /* the aio core uses this
177 * for cancellation */
178
179 /*
180 * If the aio_resfd field of the userspace iocb is not zero,
181 * this is the underlying eventfd context to deliver events to.
182 */
183 struct eventfd_ctx *ki_eventfd;
184};
185
154/*------ sysctl variables----*/ 186/*------ sysctl variables----*/
155static DEFINE_SPINLOCK(aio_nr_lock); 187static DEFINE_SPINLOCK(aio_nr_lock);
156unsigned long aio_nr; /* current system wide number of aio requests */ 188unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
220 if (IS_ERR(aio_mnt)) 252 if (IS_ERR(aio_mnt))
221 panic("Failed to create aio fs mount."); 253 panic("Failed to create aio fs mount.");
222 254
223 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 255 kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
224 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 256 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
225 257
226 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); 258 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -278,11 +310,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
278 return 0; 310 return 0;
279} 311}
280 312
281static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) 313static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
282{ 314{
283 struct mm_struct *mm = vma->vm_mm; 315 struct mm_struct *mm = vma->vm_mm;
284 struct kioctx_table *table; 316 struct kioctx_table *table;
285 int i; 317 int i, res = -EINVAL;
286 318
287 spin_lock(&mm->ioctx_lock); 319 spin_lock(&mm->ioctx_lock);
288 rcu_read_lock(); 320 rcu_read_lock();
@@ -292,13 +324,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
292 324
293 ctx = table->table[i]; 325 ctx = table->table[i];
294 if (ctx && ctx->aio_ring_file == file) { 326 if (ctx && ctx->aio_ring_file == file) {
295 ctx->user_id = ctx->mmap_base = vma->vm_start; 327 if (!atomic_read(&ctx->dead)) {
328 ctx->user_id = ctx->mmap_base = vma->vm_start;
329 res = 0;
330 }
296 break; 331 break;
297 } 332 }
298 } 333 }
299 334
300 rcu_read_unlock(); 335 rcu_read_unlock();
301 spin_unlock(&mm->ioctx_lock); 336 spin_unlock(&mm->ioctx_lock);
337 return res;
302} 338}
303 339
304static const struct file_operations aio_ring_fops = { 340static const struct file_operations aio_ring_fops = {
@@ -480,8 +516,9 @@ static int aio_setup_ring(struct kioctx *ctx)
480#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 516#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
481#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 517#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
482 518
483void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) 519void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
484{ 520{
521 struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
485 struct kioctx *ctx = req->ki_ctx; 522 struct kioctx *ctx = req->ki_ctx;
486 unsigned long flags; 523 unsigned long flags;
487 524
@@ -496,7 +533,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
496} 533}
497EXPORT_SYMBOL(kiocb_set_cancel_fn); 534EXPORT_SYMBOL(kiocb_set_cancel_fn);
498 535
499static int kiocb_cancel(struct kiocb *kiocb) 536static int kiocb_cancel(struct aio_kiocb *kiocb)
500{ 537{
501 kiocb_cancel_fn *old, *cancel; 538 kiocb_cancel_fn *old, *cancel;
502 539
@@ -514,7 +551,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
514 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); 551 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
515 } while (cancel != old); 552 } while (cancel != old);
516 553
517 return cancel(kiocb); 554 return cancel(&kiocb->common);
518} 555}
519 556
520static void free_ioctx(struct work_struct *work) 557static void free_ioctx(struct work_struct *work)
@@ -550,13 +587,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
550static void free_ioctx_users(struct percpu_ref *ref) 587static void free_ioctx_users(struct percpu_ref *ref)
551{ 588{
552 struct kioctx *ctx = container_of(ref, struct kioctx, users); 589 struct kioctx *ctx = container_of(ref, struct kioctx, users);
553 struct kiocb *req; 590 struct aio_kiocb *req;
554 591
555 spin_lock_irq(&ctx->ctx_lock); 592 spin_lock_irq(&ctx->ctx_lock);
556 593
557 while (!list_empty(&ctx->active_reqs)) { 594 while (!list_empty(&ctx->active_reqs)) {
558 req = list_first_entry(&ctx->active_reqs, 595 req = list_first_entry(&ctx->active_reqs,
559 struct kiocb, ki_list); 596 struct aio_kiocb, ki_list);
560 597
561 list_del_init(&req->ki_list); 598 list_del_init(&req->ki_list);
562 kiocb_cancel(req); 599 kiocb_cancel(req);
@@ -727,6 +764,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
727err_cleanup: 764err_cleanup:
728 aio_nr_sub(ctx->max_reqs); 765 aio_nr_sub(ctx->max_reqs);
729err_ctx: 766err_ctx:
767 atomic_set(&ctx->dead, 1);
768 if (ctx->mmap_size)
769 vm_munmap(ctx->mmap_base, ctx->mmap_size);
730 aio_free_ring(ctx); 770 aio_free_ring(ctx);
731err: 771err:
732 mutex_unlock(&ctx->ring_lock); 772 mutex_unlock(&ctx->ring_lock);
@@ -748,11 +788,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
748{ 788{
749 struct kioctx_table *table; 789 struct kioctx_table *table;
750 790
751 if (atomic_xchg(&ctx->dead, 1)) 791 spin_lock(&mm->ioctx_lock);
792 if (atomic_xchg(&ctx->dead, 1)) {
793 spin_unlock(&mm->ioctx_lock);
752 return -EINVAL; 794 return -EINVAL;
795 }
753 796
754
755 spin_lock(&mm->ioctx_lock);
756 table = rcu_dereference_raw(mm->ioctx_table); 797 table = rcu_dereference_raw(mm->ioctx_table);
757 WARN_ON(ctx != table->table[ctx->id]); 798 WARN_ON(ctx != table->table[ctx->id]);
758 table->table[ctx->id] = NULL; 799 table->table[ctx->id] = NULL;
@@ -778,22 +819,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
778 return 0; 819 return 0;
779} 820}
780 821
781/* wait_on_sync_kiocb:
782 * Waits on the given sync kiocb to complete.
783 */
784ssize_t wait_on_sync_kiocb(struct kiocb *req)
785{
786 while (!req->ki_ctx) {
787 set_current_state(TASK_UNINTERRUPTIBLE);
788 if (req->ki_ctx)
789 break;
790 io_schedule();
791 }
792 __set_current_state(TASK_RUNNING);
793 return req->ki_user_data;
794}
795EXPORT_SYMBOL(wait_on_sync_kiocb);
796
797/* 822/*
798 * exit_aio: called when the last user of mm goes away. At this point, there is 823 * exit_aio: called when the last user of mm goes away. At this point, there is
799 * no way for any new requests to be submited or any of the io_* syscalls to be 824 * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -948,9 +973,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
948 * Allocate a slot for an aio request. 973 * Allocate a slot for an aio request.
949 * Returns NULL if no requests are free. 974 * Returns NULL if no requests are free.
950 */ 975 */
951static inline struct kiocb *aio_get_req(struct kioctx *ctx) 976static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
952{ 977{
953 struct kiocb *req; 978 struct aio_kiocb *req;
954 979
955 if (!get_reqs_available(ctx)) { 980 if (!get_reqs_available(ctx)) {
956 user_refill_reqs_available(ctx); 981 user_refill_reqs_available(ctx);
@@ -971,10 +996,10 @@ out_put:
971 return NULL; 996 return NULL;
972} 997}
973 998
974static void kiocb_free(struct kiocb *req) 999static void kiocb_free(struct aio_kiocb *req)
975{ 1000{
976 if (req->ki_filp) 1001 if (req->common.ki_filp)
977 fput(req->ki_filp); 1002 fput(req->common.ki_filp);
978 if (req->ki_eventfd != NULL) 1003 if (req->ki_eventfd != NULL)
979 eventfd_ctx_put(req->ki_eventfd); 1004 eventfd_ctx_put(req->ki_eventfd);
980 kmem_cache_free(kiocb_cachep, req); 1005 kmem_cache_free(kiocb_cachep, req);
@@ -1010,8 +1035,9 @@ out:
1010/* aio_complete 1035/* aio_complete
1011 * Called when the io request on the given iocb is complete. 1036 * Called when the io request on the given iocb is complete.
1012 */ 1037 */
1013void aio_complete(struct kiocb *iocb, long res, long res2) 1038static void aio_complete(struct kiocb *kiocb, long res, long res2)
1014{ 1039{
1040 struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
1015 struct kioctx *ctx = iocb->ki_ctx; 1041 struct kioctx *ctx = iocb->ki_ctx;
1016 struct aio_ring *ring; 1042 struct aio_ring *ring;
1017 struct io_event *ev_page, *event; 1043 struct io_event *ev_page, *event;
@@ -1025,13 +1051,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1025 * ref, no other paths have a way to get another ref 1051 * ref, no other paths have a way to get another ref
1026 * - the sync task helpfully left a reference to itself in the iocb 1052 * - the sync task helpfully left a reference to itself in the iocb
1027 */ 1053 */
1028 if (is_sync_kiocb(iocb)) { 1054 BUG_ON(is_sync_kiocb(kiocb));
1029 iocb->ki_user_data = res;
1030 smp_wmb();
1031 iocb->ki_ctx = ERR_PTR(-EXDEV);
1032 wake_up_process(iocb->ki_obj.tsk);
1033 return;
1034 }
1035 1055
1036 if (iocb->ki_list.next) { 1056 if (iocb->ki_list.next) {
1037 unsigned long flags; 1057 unsigned long flags;
@@ -1057,7 +1077,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1057 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1077 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1058 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 1078 event = ev_page + pos % AIO_EVENTS_PER_PAGE;
1059 1079
1060 event->obj = (u64)(unsigned long)iocb->ki_obj.user; 1080 event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
1061 event->data = iocb->ki_user_data; 1081 event->data = iocb->ki_user_data;
1062 event->res = res; 1082 event->res = res;
1063 event->res2 = res2; 1083 event->res2 = res2;
@@ -1066,7 +1086,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1066 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1086 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1067 1087
1068 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", 1088 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
1069 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, 1089 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
1070 res, res2); 1090 res, res2);
1071 1091
1072 /* after flagging the request as done, we 1092 /* after flagging the request as done, we
@@ -1113,7 +1133,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1113 1133
1114 percpu_ref_put(&ctx->reqs); 1134 percpu_ref_put(&ctx->reqs);
1115} 1135}
1116EXPORT_SYMBOL(aio_complete);
1117 1136
1118/* aio_read_events_ring 1137/* aio_read_events_ring
1119 * Pull an event off of the ioctx's event ring. Returns the number of 1138 * Pull an event off of the ioctx's event ring. Returns the number of
@@ -1341,46 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
1341 unsigned long, loff_t); 1360 unsigned long, loff_t);
1342typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); 1361typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
1343 1362
1344static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, 1363static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
1345 int rw, char __user *buf, 1364 struct iovec **iovec,
1346 unsigned long *nr_segs, 1365 bool compat,
1347 struct iovec **iovec, 1366 struct iov_iter *iter)
1348 bool compat)
1349{ 1367{
1350 ssize_t ret;
1351
1352 *nr_segs = kiocb->ki_nbytes;
1353
1354#ifdef CONFIG_COMPAT 1368#ifdef CONFIG_COMPAT
1355 if (compat) 1369 if (compat)
1356 ret = compat_rw_copy_check_uvector(rw, 1370 return compat_import_iovec(rw,
1357 (struct compat_iovec __user *)buf, 1371 (struct compat_iovec __user *)buf,
1358 *nr_segs, UIO_FASTIOV, *iovec, iovec); 1372 len, UIO_FASTIOV, iovec, iter);
1359 else
1360#endif 1373#endif
1361 ret = rw_copy_check_uvector(rw, 1374 return import_iovec(rw, (struct iovec __user *)buf,
1362 (struct iovec __user *)buf, 1375 len, UIO_FASTIOV, iovec, iter);
1363 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1364 if (ret < 0)
1365 return ret;
1366
1367 /* ki_nbytes now reflect bytes instead of segs */
1368 kiocb->ki_nbytes = ret;
1369 return 0;
1370}
1371
1372static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1373 int rw, char __user *buf,
1374 unsigned long *nr_segs,
1375 struct iovec *iovec)
1376{
1377 if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
1378 return -EFAULT;
1379
1380 iovec->iov_base = buf;
1381 iovec->iov_len = kiocb->ki_nbytes;
1382 *nr_segs = 1;
1383 return 0;
1384} 1376}
1385 1377
1386/* 1378/*
@@ -1388,11 +1380,10 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1388 * Performs the initial checks and io submission. 1380 * Performs the initial checks and io submission.
1389 */ 1381 */
1390static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, 1382static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1391 char __user *buf, bool compat) 1383 char __user *buf, size_t len, bool compat)
1392{ 1384{
1393 struct file *file = req->ki_filp; 1385 struct file *file = req->ki_filp;
1394 ssize_t ret; 1386 ssize_t ret;
1395 unsigned long nr_segs;
1396 int rw; 1387 int rw;
1397 fmode_t mode; 1388 fmode_t mode;
1398 aio_rw_op *rw_op; 1389 aio_rw_op *rw_op;
@@ -1423,21 +1414,22 @@ rw_common:
1423 if (!rw_op && !iter_op) 1414 if (!rw_op && !iter_op)
1424 return -EINVAL; 1415 return -EINVAL;
1425 1416
1426 ret = (opcode == IOCB_CMD_PREADV || 1417 if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
1427 opcode == IOCB_CMD_PWRITEV) 1418 ret = aio_setup_vectored_rw(rw, buf, len,
1428 ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, 1419 &iovec, compat, &iter);
1429 &iovec, compat) 1420 else {
1430 : aio_setup_single_vector(req, rw, buf, &nr_segs, 1421 ret = import_single_range(rw, buf, len, iovec, &iter);
1431 iovec); 1422 iovec = NULL;
1423 }
1432 if (!ret) 1424 if (!ret)
1433 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1425 ret = rw_verify_area(rw, file, &req->ki_pos,
1426 iov_iter_count(&iter));
1434 if (ret < 0) { 1427 if (ret < 0) {
1435 if (iovec != inline_vecs) 1428 kfree(iovec);
1436 kfree(iovec);
1437 return ret; 1429 return ret;
1438 } 1430 }
1439 1431
1440 req->ki_nbytes = ret; 1432 len = ret;
1441 1433
1442 /* XXX: move/kill - rw_verify_area()? */ 1434 /* XXX: move/kill - rw_verify_area()? */
1443 /* This matches the pread()/pwrite() logic */ 1435 /* This matches the pread()/pwrite() logic */
@@ -1450,14 +1442,14 @@ rw_common:
1450 file_start_write(file); 1442 file_start_write(file);
1451 1443
1452 if (iter_op) { 1444 if (iter_op) {
1453 iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
1454 ret = iter_op(req, &iter); 1445 ret = iter_op(req, &iter);
1455 } else { 1446 } else {
1456 ret = rw_op(req, iovec, nr_segs, req->ki_pos); 1447 ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
1457 } 1448 }
1458 1449
1459 if (rw == WRITE) 1450 if (rw == WRITE)
1460 file_end_write(file); 1451 file_end_write(file);
1452 kfree(iovec);
1461 break; 1453 break;
1462 1454
1463 case IOCB_CMD_FDSYNC: 1455 case IOCB_CMD_FDSYNC:
@@ -1479,9 +1471,6 @@ rw_common:
1479 return -EINVAL; 1471 return -EINVAL;
1480 } 1472 }
1481 1473
1482 if (iovec != inline_vecs)
1483 kfree(iovec);
1484
1485 if (ret != -EIOCBQUEUED) { 1474 if (ret != -EIOCBQUEUED) {
1486 /* 1475 /*
1487 * There's no easy way to restart the syscall since other AIO's 1476 * There's no easy way to restart the syscall since other AIO's
@@ -1500,7 +1489,7 @@ rw_common:
1500static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1489static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1501 struct iocb *iocb, bool compat) 1490 struct iocb *iocb, bool compat)
1502{ 1491{
1503 struct kiocb *req; 1492 struct aio_kiocb *req;
1504 ssize_t ret; 1493 ssize_t ret;
1505 1494
1506 /* enforce forwards compatibility on users */ 1495 /* enforce forwards compatibility on users */
@@ -1523,11 +1512,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1523 if (unlikely(!req)) 1512 if (unlikely(!req))
1524 return -EAGAIN; 1513 return -EAGAIN;
1525 1514
1526 req->ki_filp = fget(iocb->aio_fildes); 1515 req->common.ki_filp = fget(iocb->aio_fildes);
1527 if (unlikely(!req->ki_filp)) { 1516 if (unlikely(!req->common.ki_filp)) {
1528 ret = -EBADF; 1517 ret = -EBADF;
1529 goto out_put_req; 1518 goto out_put_req;
1530 } 1519 }
1520 req->common.ki_pos = iocb->aio_offset;
1521 req->common.ki_complete = aio_complete;
1522 req->common.ki_flags = 0;
1531 1523
1532 if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1524 if (iocb->aio_flags & IOCB_FLAG_RESFD) {
1533 /* 1525 /*
@@ -1542,6 +1534,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1542 req->ki_eventfd = NULL; 1534 req->ki_eventfd = NULL;
1543 goto out_put_req; 1535 goto out_put_req;
1544 } 1536 }
1537
1538 req->common.ki_flags |= IOCB_EVENTFD;
1545 } 1539 }
1546 1540
1547 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); 1541 ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1550,13 +1544,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1550 goto out_put_req; 1544 goto out_put_req;
1551 } 1545 }
1552 1546
1553 req->ki_obj.user = user_iocb; 1547 req->ki_user_iocb = user_iocb;
1554 req->ki_user_data = iocb->aio_data; 1548 req->ki_user_data = iocb->aio_data;
1555 req->ki_pos = iocb->aio_offset;
1556 req->ki_nbytes = iocb->aio_nbytes;
1557 1549
1558 ret = aio_run_iocb(req, iocb->aio_lio_opcode, 1550 ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
1559 (char __user *)(unsigned long)iocb->aio_buf, 1551 (char __user *)(unsigned long)iocb->aio_buf,
1552 iocb->aio_nbytes,
1560 compat); 1553 compat);
1561 if (ret) 1554 if (ret)
1562 goto out_put_req; 1555 goto out_put_req;
@@ -1643,10 +1636,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1643/* lookup_kiocb 1636/* lookup_kiocb
1644 * Finds a given iocb for cancellation. 1637 * Finds a given iocb for cancellation.
1645 */ 1638 */
1646static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, 1639static struct aio_kiocb *
1647 u32 key) 1640lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
1648{ 1641{
1649 struct list_head *pos; 1642 struct aio_kiocb *kiocb;
1650 1643
1651 assert_spin_locked(&ctx->ctx_lock); 1644 assert_spin_locked(&ctx->ctx_lock);
1652 1645
@@ -1654,9 +1647,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
1654 return NULL; 1647 return NULL;
1655 1648
1656 /* TODO: use a hash or array, this sucks. */ 1649 /* TODO: use a hash or array, this sucks. */
1657 list_for_each(pos, &ctx->active_reqs) { 1650 list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
1658 struct kiocb *kiocb = list_kiocb(pos); 1651 if (kiocb->ki_user_iocb == iocb)
1659 if (kiocb->ki_obj.user == iocb)
1660 return kiocb; 1652 return kiocb;
1661 } 1653 }
1662 return NULL; 1654 return NULL;
@@ -1676,7 +1668,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1676 struct io_event __user *, result) 1668 struct io_event __user *, result)
1677{ 1669{
1678 struct kioctx *ctx; 1670 struct kioctx *ctx;
1679 struct kiocb *kiocb; 1671 struct aio_kiocb *kiocb;
1680 u32 key; 1672 u32 key;
1681 int ret; 1673 int ret;
1682 1674
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079d9982..fdcb4d69f430 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/vfs.h> 16#include <linux/vfs.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/uio.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include "bfs.h" 20#include "bfs.h"
20 21
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 995986b8e36b..241ef68d2893 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/random.h> 32#include <linux/random.h>
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/elf-randomize.h>
34#include <linux/utsname.h> 35#include <linux/utsname.h>
35#include <linux/coredump.h> 36#include <linux/coredump.h>
36#include <linux/sched.h> 37#include <linux/sched.h>
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 863 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 int elf_prot = 0, elf_flags; 864 int elf_prot = 0, elf_flags;
864 unsigned long k, vaddr; 865 unsigned long k, vaddr;
866 unsigned long total_size = 0;
865 867
866 if (elf_ppnt->p_type != PT_LOAD) 868 if (elf_ppnt->p_type != PT_LOAD)
867 continue; 869 continue;
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm)
909 * default mmap base, as well as whatever program they 911 * default mmap base, as well as whatever program they
910 * might try to exec. This is because the brk will 912 * might try to exec. This is because the brk will
911 * follow the loader, and is not movable. */ 913 * follow the loader, and is not movable. */
912#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE 914 load_bias = ELF_ET_DYN_BASE - vaddr;
913 /* Memory randomization might have been switched off
914 * in runtime via sysctl or explicit setting of
915 * personality flags.
916 * If that is the case, retain the original non-zero
917 * load_bias value in order to establish proper
918 * non-randomized mappings.
919 */
920 if (current->flags & PF_RANDOMIZE) 915 if (current->flags & PF_RANDOMIZE)
921 load_bias = 0; 916 load_bias += arch_mmap_rnd();
922 else 917 load_bias = ELF_PAGESTART(load_bias);
923 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 918 total_size = total_mapping_size(elf_phdata,
924#else 919 loc->elf_ex.e_phnum);
925 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 920 if (!total_size) {
926#endif 921 error = -EINVAL;
922 goto out_free_dentry;
923 }
927 } 924 }
928 925
929 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 926 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
930 elf_prot, elf_flags, 0); 927 elf_prot, elf_flags, total_size);
931 if (BAD_ADDR(error)) { 928 if (BAD_ADDR(error)) {
932 retval = IS_ERR((void *)error) ? 929 retval = IS_ERR((void *)error) ?
933 PTR_ERR((void*)error) : -EINVAL; 930 PTR_ERR((void*)error) : -EINVAL;
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
1053 current->mm->end_data = end_data; 1050 current->mm->end_data = end_data;
1054 current->mm->start_stack = bprm->p; 1051 current->mm->start_stack = bprm->p;
1055 1052
1056#ifdef arch_randomize_brk
1057 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { 1053 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1058 current->mm->brk = current->mm->start_brk = 1054 current->mm->brk = current->mm->start_brk =
1059 arch_randomize_brk(current->mm); 1055 arch_randomize_brk(current->mm);
1060#ifdef CONFIG_COMPAT_BRK 1056#ifdef compat_brk_randomized
1061 current->brk_randomized = 1; 1057 current->brk_randomized = 1;
1062#endif 1058#endif
1063 } 1059 }
1064#endif
1065 1060
1066 if (current->personality & MMAP_PAGE_ZERO) { 1061 if (current->personality & MMAP_PAGE_ZERO) {
1067 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 1062 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266be67d3..2e522aed6584 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/cleancache.h> 29#include <linux/cleancache.h>
30#include <linux/aio.h>
31#include <asm/uaccess.h> 30#include <asm/uaccess.h>
32#include "internal.h" 31#include "internal.h"
33 32
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84c3b00f3de8..f9c89cae39ee 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3387 3387
3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3389 struct btrfs_root *root); 3389 struct btrfs_root *root);
3390int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3391 struct btrfs_root *root);
3390int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 3392int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
3391int btrfs_free_block_groups(struct btrfs_fs_info *info); 3393int btrfs_free_block_groups(struct btrfs_fs_info *info);
3392int btrfs_read_block_groups(struct btrfs_root *root); 3394int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
3909 loff_t actual_len, u64 *alloc_hint); 3911 loff_t actual_len, u64 *alloc_hint);
3910int btrfs_inode_check_errors(struct inode *inode); 3912int btrfs_inode_check_errors(struct inode *inode);
3911extern const struct dentry_operations btrfs_dentry_operations; 3913extern const struct dentry_operations btrfs_dentry_operations;
3914#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3915void btrfs_test_inode_set_ops(struct inode *inode);
3916#endif
3912 3917
3913/* ioctl.c */ 3918/* ioctl.c */
3914long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3919long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f79f38542a73..639f2663ed3f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3921 } 3921 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) 3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) { 3923 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", 3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
3925 btrfs_super_sys_array_size(sb), 3925 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key) 3926 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk)); 3927 + sizeof(struct btrfs_chunk));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6f080451fcb1..8b353ad02f03 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3325,6 +3325,32 @@ out:
3325 return ret; 3325 return ret;
3326} 3326}
3327 3327
3328int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3329 struct btrfs_root *root)
3330{
3331 struct btrfs_block_group_cache *cache, *tmp;
3332 struct btrfs_transaction *cur_trans = trans->transaction;
3333 struct btrfs_path *path;
3334
3335 if (list_empty(&cur_trans->dirty_bgs) ||
3336 !btrfs_test_opt(root, SPACE_CACHE))
3337 return 0;
3338
3339 path = btrfs_alloc_path();
3340 if (!path)
3341 return -ENOMEM;
3342
3343 /* Could add new block groups, use _safe just in case */
3344 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3345 dirty_list) {
3346 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3347 cache_save_setup(cache, trans, path);
3348 }
3349
3350 btrfs_free_path(path);
3351 return 0;
3352}
3353
3328int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3354int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3329 struct btrfs_root *root) 3355 struct btrfs_root *root)
3330{ 3356{
@@ -5110,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5110 num_bytes = ALIGN(num_bytes, root->sectorsize); 5136 num_bytes = ALIGN(num_bytes, root->sectorsize);
5111 5137
5112 spin_lock(&BTRFS_I(inode)->lock); 5138 spin_lock(&BTRFS_I(inode)->lock);
5113 BTRFS_I(inode)->outstanding_extents++; 5139 nr_extents = (unsigned)div64_u64(num_bytes +
5140 BTRFS_MAX_EXTENT_SIZE - 1,
5141 BTRFS_MAX_EXTENT_SIZE);
5142 BTRFS_I(inode)->outstanding_extents += nr_extents;
5143 nr_extents = 0;
5114 5144
5115 if (BTRFS_I(inode)->outstanding_extents > 5145 if (BTRFS_I(inode)->outstanding_extents >
5116 BTRFS_I(inode)->reserved_extents) 5146 BTRFS_I(inode)->reserved_extents)
@@ -5255,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5255 if (dropped > 0) 5285 if (dropped > 0)
5256 to_free += btrfs_calc_trans_metadata_size(root, dropped); 5286 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5257 5287
5288 if (btrfs_test_is_dummy_root(root))
5289 return;
5290
5258 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5291 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5259 btrfs_ino(inode), to_free, 0); 5292 btrfs_ino(inode), to_free, 0);
5260 if (root->fs_info->quota_enabled) { 5293 if (root->fs_info->quota_enabled) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7233ff1d533..d688cfe5d496 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
4968 4968
4969 /* Should be safe to release our pages at this point */ 4969 /* Should be safe to release our pages at this point */
4970 btrfs_release_extent_buffer_page(eb); 4970 btrfs_release_extent_buffer_page(eb);
4971#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4972 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
4973 __free_extent_buffer(eb);
4974 return 1;
4975 }
4976#endif
4971 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4977 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4972 return 1; 4978 return 1;
4973 } 4979 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 30982bbd31c3..aee18f84e315 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/aio.h>
28#include <linux/falloc.h> 27#include <linux/falloc.h>
29#include <linux/swap.h> 28#include <linux/swap.h>
30#include <linux/writeback.h> 29#include <linux/writeback.h>
@@ -32,6 +31,7 @@
32#include <linux/compat.h> 31#include <linux/compat.h>
33#include <linux/slab.h> 32#include <linux/slab.h>
34#include <linux/btrfs.h> 33#include <linux/btrfs.h>
34#include <linux/uio.h>
35#include "ctree.h" 35#include "ctree.h"
36#include "disk-io.h" 36#include "disk-io.h"
37#include "transaction.h" 37#include "transaction.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da828cf5e8f8..686331f22b15 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/statfs.h> 33#include <linux/statfs.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include <linux/bit_spinlock.h> 35#include <linux/bit_spinlock.h>
37#include <linux/xattr.h> 36#include <linux/xattr.h>
38#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
43#include <linux/btrfs.h> 42#include <linux/btrfs.h>
44#include <linux/blkdev.h> 43#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h> 44#include <linux/posix_acl_xattr.h>
45#include <linux/uio.h>
46#include "ctree.h" 46#include "ctree.h"
47#include "disk-io.h" 47#include "disk-io.h"
48#include "transaction.h" 48#include "transaction.h"
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
108 108
109static int btrfs_dirty_inode(struct inode *inode); 109static int btrfs_dirty_inode(struct inode *inode);
110 110
111#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
112void btrfs_test_inode_set_ops(struct inode *inode)
113{
114 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
115}
116#endif
117
111static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 118static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112 struct inode *inode, struct inode *dir, 119 struct inode *inode, struct inode *dir,
113 const struct qstr *qstr) 120 const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
1542 u64 new_size; 1549 u64 new_size;
1543 1550
1544 /* 1551 /*
1545 * We need the largest size of the remaining extent to see if we 1552 * See the explanation in btrfs_merge_extent_hook, the same
1546 * need to add a new outstanding extent. Think of the following 1553 * applies here, just in reverse.
1547 * case
1548 *
1549 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
1550 *
1551 * The new_size would just be 4k and we'd think we had enough
1552 * outstanding extents for this if we only took one side of the
1553 * split, same goes for the other direction. We need to see if
1554 * the larger size still is the same amount of extents as the
1555 * original size, because if it is we need to add a new
1556 * outstanding extent. But if we split up and the larger size
1557 * is less than the original then we are good to go since we've
1558 * already accounted for the extra extent in our original
1559 * accounting.
1560 */ 1554 */
1561 new_size = orig->end - split + 1; 1555 new_size = orig->end - split + 1;
1562 if ((split - orig->start) > new_size) 1556 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1563 new_size = split - orig->start;
1564
1565 num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1566 BTRFS_MAX_EXTENT_SIZE); 1557 BTRFS_MAX_EXTENT_SIZE);
1567 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1558 new_size = split - orig->start;
1568 BTRFS_MAX_EXTENT_SIZE) < num_extents) 1559 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1560 BTRFS_MAX_EXTENT_SIZE);
1561 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1562 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1569 return; 1563 return;
1570 } 1564 }
1571 1565
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1591 if (!(other->state & EXTENT_DELALLOC)) 1585 if (!(other->state & EXTENT_DELALLOC))
1592 return; 1586 return;
1593 1587
1594 old_size = other->end - other->start + 1; 1588 if (new->start > other->start)
1595 new_size = old_size + (new->end - new->start + 1); 1589 new_size = new->end - other->start + 1;
1590 else
1591 new_size = other->end - new->start + 1;
1596 1592
1597 /* we're not bigger than the max, unreserve the space and go */ 1593 /* we're not bigger than the max, unreserve the space and go */
1598 if (new_size <= BTRFS_MAX_EXTENT_SIZE) { 1594 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1603 } 1599 }
1604 1600
1605 /* 1601 /*
1606 * If we grew by another max_extent, just return, we want to keep that 1602 * We have to add up either side to figure out how many extents were
1607 * reserved amount. 1603 * accounted for before we merged into one big extent. If the number of
1604 * extents we accounted for is <= the amount we need for the new range
1605 * then we can return, otherwise drop. Think of it like this
1606 *
1607 * [ 4k][MAX_SIZE]
1608 *
1609 * So we've grown the extent by a MAX_SIZE extent, this would mean we
1610 * need 2 outstanding extents, on one side we have 1 and the other side
1611 * we have 1 so they are == and we can return. But in this case
1612 *
1613 * [MAX_SIZE+4k][MAX_SIZE+4k]
1614 *
1615 * Each range on their own accounts for 2 extents, but merged together
1616 * they are only 3 extents worth of accounting, so we need to drop in
1617 * this case.
1608 */ 1618 */
1619 old_size = other->end - other->start + 1;
1609 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, 1620 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1610 BTRFS_MAX_EXTENT_SIZE); 1621 BTRFS_MAX_EXTENT_SIZE);
1622 old_size = new->end - new->start + 1;
1623 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1624 BTRFS_MAX_EXTENT_SIZE);
1625
1611 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1626 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE) > num_extents) 1627 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1613 return; 1628 return;
1614 1629
1615 spin_lock(&BTRFS_I(inode)->lock); 1630 spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
1686 spin_unlock(&BTRFS_I(inode)->lock); 1701 spin_unlock(&BTRFS_I(inode)->lock);
1687 } 1702 }
1688 1703
1704 /* For sanity tests */
1705 if (btrfs_test_is_dummy_root(root))
1706 return;
1707
1689 __percpu_counter_add(&root->fs_info->delalloc_bytes, len, 1708 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1690 root->fs_info->delalloc_batch); 1709 root->fs_info->delalloc_batch);
1691 spin_lock(&BTRFS_I(inode)->lock); 1710 spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1741 root != root->fs_info->tree_root) 1760 root != root->fs_info->tree_root)
1742 btrfs_delalloc_release_metadata(inode, len); 1761 btrfs_delalloc_release_metadata(inode, len);
1743 1762
1763 /* For sanity tests. */
1764 if (btrfs_test_is_dummy_root(root))
1765 return;
1766
1744 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1767 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1745 && do_list && !(state->state & EXTENT_NORESERVE)) 1768 && do_list && !(state->state & EXTENT_NORESERVE))
1746 btrfs_free_reserved_data_space(inode, len); 1769 btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7213 u64 start = iblock << inode->i_blkbits; 7236 u64 start = iblock << inode->i_blkbits;
7214 u64 lockstart, lockend; 7237 u64 lockstart, lockend;
7215 u64 len = bh_result->b_size; 7238 u64 len = bh_result->b_size;
7216 u64 orig_len = len; 7239 u64 *outstanding_extents = NULL;
7217 int unlock_bits = EXTENT_LOCKED; 7240 int unlock_bits = EXTENT_LOCKED;
7218 int ret = 0; 7241 int ret = 0;
7219 7242
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7225 lockstart = start; 7248 lockstart = start;
7226 lockend = start + len - 1; 7249 lockend = start + len - 1;
7227 7250
7251 if (current->journal_info) {
7252 /*
7253 * Need to pull our outstanding extents and set journal_info to NULL so
7254 * that anything that needs to check if there's a transction doesn't get
7255 * confused.
7256 */
7257 outstanding_extents = current->journal_info;
7258 current->journal_info = NULL;
7259 }
7260
7228 /* 7261 /*
7229 * If this errors out it's because we couldn't invalidate pagecache for 7262 * If this errors out it's because we couldn't invalidate pagecache for
7230 * this range and we need to fallback to buffered. 7263 * this range and we need to fallback to buffered.
@@ -7348,11 +7381,20 @@ unlock:
7348 if (start + len > i_size_read(inode)) 7381 if (start + len > i_size_read(inode))
7349 i_size_write(inode, start + len); 7382 i_size_write(inode, start + len);
7350 7383
7351 if (len < orig_len) { 7384 /*
7385 * If we have an outstanding_extents count still set then we're
7386 * within our reservation, otherwise we need to adjust our inode
7387 * counter appropriately.
7388 */
7389 if (*outstanding_extents) {
7390 (*outstanding_extents)--;
7391 } else {
7352 spin_lock(&BTRFS_I(inode)->lock); 7392 spin_lock(&BTRFS_I(inode)->lock);
7353 BTRFS_I(inode)->outstanding_extents++; 7393 BTRFS_I(inode)->outstanding_extents++;
7354 spin_unlock(&BTRFS_I(inode)->lock); 7394 spin_unlock(&BTRFS_I(inode)->lock);
7355 } 7395 }
7396
7397 current->journal_info = outstanding_extents;
7356 btrfs_free_reserved_data_space(inode, len); 7398 btrfs_free_reserved_data_space(inode, len);
7357 } 7399 }
7358 7400
@@ -7376,6 +7418,8 @@ unlock:
7376unlock_err: 7418unlock_err:
7377 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7419 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7378 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7420 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7421 if (outstanding_extents)
7422 current->journal_info = outstanding_extents;
7379 return ret; 7423 return ret;
7380} 7424}
7381 7425
@@ -8075,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8075{ 8119{
8076 struct file *file = iocb->ki_filp; 8120 struct file *file = iocb->ki_filp;
8077 struct inode *inode = file->f_mapping->host; 8121 struct inode *inode = file->f_mapping->host;
8122 u64 outstanding_extents = 0;
8078 size_t count = 0; 8123 size_t count = 0;
8079 int flags = 0; 8124 int flags = 0;
8080 bool wakeup = true; 8125 bool wakeup = true;
@@ -8112,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8112 ret = btrfs_delalloc_reserve_space(inode, count); 8157 ret = btrfs_delalloc_reserve_space(inode, count);
8113 if (ret) 8158 if (ret)
8114 goto out; 8159 goto out;
8160 outstanding_extents = div64_u64(count +
8161 BTRFS_MAX_EXTENT_SIZE - 1,
8162 BTRFS_MAX_EXTENT_SIZE);
8163
8164 /*
8165 * We need to know how many extents we reserved so that we can
8166 * do the accounting properly if we go over the number we
8167 * originally calculated. Abuse current->journal_info for this.
8168 */
8169 current->journal_info = &outstanding_extents;
8115 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 8170 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8116 &BTRFS_I(inode)->runtime_flags)) { 8171 &BTRFS_I(inode)->runtime_flags)) {
8117 inode_dio_done(inode); 8172 inode_dio_done(inode);
@@ -8124,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8124 iter, offset, btrfs_get_blocks_direct, NULL, 8179 iter, offset, btrfs_get_blocks_direct, NULL,
8125 btrfs_submit_direct, flags); 8180 btrfs_submit_direct, flags);
8126 if (rw & WRITE) { 8181 if (rw & WRITE) {
8182 current->journal_info = NULL;
8127 if (ret < 0 && ret != -EIOCBQUEUED) 8183 if (ret < 0 && ret != -EIOCBQUEUED)
8128 btrfs_delalloc_release_space(inode, count); 8184 btrfs_delalloc_release_space(inode, count);
8129 else if (ret >= 0 && (size_t)ret < count) 8185 else if (ret >= 0 && (size_t)ret < count)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97159a8e91d4..058c79eecbfb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
1259 if (oper1->seq < oper2->seq) 1259 if (oper1->seq < oper2->seq)
1260 return -1; 1260 return -1;
1261 if (oper1->seq > oper2->seq) 1261 if (oper1->seq > oper2->seq)
1262 return -1; 1262 return 1;
1263 if (oper1->ref_root < oper2->ref_root) 1263 if (oper1->ref_root < oper2->ref_root)
1264 return -1; 1264 return -1;
1265 if (oper1->ref_root > oper2->ref_root) 1265 if (oper1->ref_root > oper2->ref_root)
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a116b55ce788..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -911,6 +911,197 @@ out:
911 return ret; 911 return ret;
912} 912}
913 913
914static int test_extent_accounting(void)
915{
916 struct inode *inode = NULL;
917 struct btrfs_root *root = NULL;
918 int ret = -ENOMEM;
919
920 inode = btrfs_new_test_inode();
921 if (!inode) {
922 test_msg("Couldn't allocate inode\n");
923 return ret;
924 }
925
926 root = btrfs_alloc_dummy_root();
927 if (IS_ERR(root)) {
928 test_msg("Couldn't allocate root\n");
929 goto out;
930 }
931
932 root->fs_info = btrfs_alloc_dummy_fs_info();
933 if (!root->fs_info) {
934 test_msg("Couldn't allocate dummy fs info\n");
935 goto out;
936 }
937
938 BTRFS_I(inode)->root = root;
939 btrfs_test_inode_set_ops(inode);
940
941 /* [BTRFS_MAX_EXTENT_SIZE] */
942 BTRFS_I(inode)->outstanding_extents++;
943 ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
944 NULL);
945 if (ret) {
946 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
947 goto out;
948 }
949 if (BTRFS_I(inode)->outstanding_extents != 1) {
950 ret = -EINVAL;
951 test_msg("Miscount, wanted 1, got %u\n",
952 BTRFS_I(inode)->outstanding_extents);
953 goto out;
954 }
955
956 /* [BTRFS_MAX_EXTENT_SIZE][4k] */
957 BTRFS_I(inode)->outstanding_extents++;
958 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
959 BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
960 if (ret) {
961 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
962 goto out;
963 }
964 if (BTRFS_I(inode)->outstanding_extents != 2) {
965 ret = -EINVAL;
966 test_msg("Miscount, wanted 2, got %u\n",
967 BTRFS_I(inode)->outstanding_extents);
968 goto out;
969 }
970
971 /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
972 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
973 BTRFS_MAX_EXTENT_SIZE >> 1,
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS);
978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out;
981 }
982 if (BTRFS_I(inode)->outstanding_extents != 2) {
983 ret = -EINVAL;
984 test_msg("Miscount, wanted 2, got %u\n",
985 BTRFS_I(inode)->outstanding_extents);
986 goto out;
987 }
988
989 /* [BTRFS_MAX_EXTENT_SIZE][4K] */
990 BTRFS_I(inode)->outstanding_extents++;
991 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
992 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
993 NULL);
994 if (ret) {
995 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
996 goto out;
997 }
998 if (BTRFS_I(inode)->outstanding_extents != 2) {
999 ret = -EINVAL;
1000 test_msg("Miscount, wanted 2, got %u\n",
1001 BTRFS_I(inode)->outstanding_extents);
1002 goto out;
1003 }
1004
1005 /*
1006 * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
1007 *
1008 * I'm artificially adding 2 to outstanding_extents because in the
1009 * buffered IO case we'd add things up as we go, but I don't feel like
1010 * doing that here, this isn't the interesting case we want to test.
1011 */
1012 BTRFS_I(inode)->outstanding_extents += 2;
1013 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
1014 (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
1015 NULL);
1016 if (ret) {
1017 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1018 goto out;
1019 }
1020 if (BTRFS_I(inode)->outstanding_extents != 4) {
1021 ret = -EINVAL;
1022 test_msg("Miscount, wanted 4, got %u\n",
1023 BTRFS_I(inode)->outstanding_extents);
1024 goto out;
1025 }
1026
1027 /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
1028 BTRFS_I(inode)->outstanding_extents++;
1029 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1030 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1031 if (ret) {
1032 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1033 goto out;
1034 }
1035 if (BTRFS_I(inode)->outstanding_extents != 3) {
1036 ret = -EINVAL;
1037 test_msg("Miscount, wanted 3, got %u\n",
1038 BTRFS_I(inode)->outstanding_extents);
1039 goto out;
1040 }
1041
1042 /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
1043 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
1044 BTRFS_MAX_EXTENT_SIZE+4096,
1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS);
1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out;
1052 }
1053 if (BTRFS_I(inode)->outstanding_extents != 4) {
1054 ret = -EINVAL;
1055 test_msg("Miscount, wanted 4, got %u\n",
1056 BTRFS_I(inode)->outstanding_extents);
1057 goto out;
1058 }
1059
1060 /*
1061 * Refill the hole again just for good measure, because I thought it
1062 * might fail and I'd rather satisfy my paranoia at this point.
1063 */
1064 BTRFS_I(inode)->outstanding_extents++;
1065 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1066 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1067 if (ret) {
1068 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1069 goto out;
1070 }
1071 if (BTRFS_I(inode)->outstanding_extents != 3) {
1072 ret = -EINVAL;
1073 test_msg("Miscount, wanted 3, got %u\n",
1074 BTRFS_I(inode)->outstanding_extents);
1075 goto out;
1076 }
1077
1078 /* Empty */
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS);
1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out;
1086 }
1087 if (BTRFS_I(inode)->outstanding_extents) {
1088 ret = -EINVAL;
1089 test_msg("Miscount, wanted 0, got %u\n",
1090 BTRFS_I(inode)->outstanding_extents);
1091 goto out;
1092 }
1093 ret = 0;
1094out:
1095 if (ret)
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS);
1100 iput(inode);
1101 btrfs_free_dummy_root(root);
1102 return ret;
1103}
1104
914int btrfs_test_inodes(void) 1105int btrfs_test_inodes(void)
915{ 1106{
916 int ret; 1107 int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
924 if (ret) 1115 if (ret)
925 return ret; 1116 return ret;
926 test_msg("Running hole first btrfs_get_extent test\n"); 1117 test_msg("Running hole first btrfs_get_extent test\n");
927 return test_hole_first(); 1118 ret = test_hole_first();
1119 if (ret)
1120 return ret;
1121 test_msg("Running outstanding_extents tests\n");
1122 return test_extent_accounting();
928} 1123}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 88e51aded6bd..8be4278e25e8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1023 u64 old_root_bytenr; 1023 u64 old_root_bytenr;
1024 u64 old_root_used; 1024 u64 old_root_used;
1025 struct btrfs_root *tree_root = root->fs_info->tree_root; 1025 struct btrfs_root *tree_root = root->fs_info->tree_root;
1026 bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
1027 1026
1028 old_root_used = btrfs_root_used(&root->root_item); 1027 old_root_used = btrfs_root_used(&root->root_item);
1029 btrfs_write_dirty_block_groups(trans, root);
1030 1028
1031 while (1) { 1029 while (1) {
1032 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1030 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
1033 if (old_root_bytenr == root->node->start && 1031 if (old_root_bytenr == root->node->start &&
1034 old_root_used == btrfs_root_used(&root->root_item) && 1032 old_root_used == btrfs_root_used(&root->root_item))
1035 (!extent_root ||
1036 list_empty(&trans->transaction->dirty_bgs)))
1037 break; 1033 break;
1038 1034
1039 btrfs_set_root_node(&root->root_item, root->node); 1035 btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,14 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1044 return ret; 1040 return ret;
1045 1041
1046 old_root_used = btrfs_root_used(&root->root_item); 1042 old_root_used = btrfs_root_used(&root->root_item);
1047 if (extent_root) {
1048 ret = btrfs_write_dirty_block_groups(trans, root);
1049 if (ret)
1050 return ret;
1051 }
1052 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1053 if (ret)
1054 return ret;
1055 } 1043 }
1056 1044
1057 return 0; 1045 return 0;
@@ -1068,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1068 struct btrfs_root *root) 1056 struct btrfs_root *root)
1069{ 1057{
1070 struct btrfs_fs_info *fs_info = root->fs_info; 1058 struct btrfs_fs_info *fs_info = root->fs_info;
1059 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
1071 struct list_head *next; 1060 struct list_head *next;
1072 struct extent_buffer *eb; 1061 struct extent_buffer *eb;
1073 int ret; 1062 int ret;
@@ -1095,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1095 if (ret) 1084 if (ret)
1096 return ret; 1085 return ret;
1097 1086
1087 ret = btrfs_setup_space_cache(trans, root);
1088 if (ret)
1089 return ret;
1090
1098 /* run_qgroups might have added some more refs */ 1091 /* run_qgroups might have added some more refs */
1099 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1092 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1100 if (ret) 1093 if (ret)
1101 return ret; 1094 return ret;
1102 1095again:
1103 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 1096 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
1104 next = fs_info->dirty_cowonly_roots.next; 1097 next = fs_info->dirty_cowonly_roots.next;
1105 list_del_init(next); 1098 list_del_init(next);
@@ -1112,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1112 ret = update_cowonly_root(trans, root); 1105 ret = update_cowonly_root(trans, root);
1113 if (ret) 1106 if (ret)
1114 return ret; 1107 return ret;
1108 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1109 if (ret)
1110 return ret;
1115 } 1111 }
1116 1112
1113 while (!list_empty(dirty_bgs)) {
1114 ret = btrfs_write_dirty_block_groups(trans, root);
1115 if (ret)
1116 return ret;
1117 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1118 if (ret)
1119 return ret;
1120 }
1121
1122 if (!list_empty(&fs_info->dirty_cowonly_roots))
1123 goto again;
1124
1117 list_add_tail(&fs_info->extent_root->dirty_list, 1125 list_add_tail(&fs_info->extent_root->dirty_list,
1118 &trans->transaction->switch_commits); 1126 &trans->transaction->switch_commits);
1119 btrfs_after_dev_replace_commit(fs_info); 1127 btrfs_after_dev_replace_commit(fs_info);
@@ -1811,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1811 1819
1812 wait_for_commit(root, cur_trans); 1820 wait_for_commit(root, cur_trans);
1813 1821
1822 if (unlikely(cur_trans->aborted))
1823 ret = cur_trans->aborted;
1824
1814 btrfs_put_transaction(cur_trans); 1825 btrfs_put_transaction(cur_trans);
1815 1826
1816 return ret; 1827 return ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..c7a5602d01ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page)
3243 * to synchronise against __set_page_dirty_buffers and prevent the 3243 * to synchronise against __set_page_dirty_buffers and prevent the
3244 * dirty bit from being lost. 3244 * dirty bit from being lost.
3245 */ 3245 */
3246 if (ret) 3246 if (ret && TestClearPageDirty(page))
3247 cancel_dirty_page(page, PAGE_CACHE_SIZE); 3247 account_page_cleaned(page, mapping);
3248 spin_unlock(&mapping->private_lock); 3248 spin_unlock(&mapping->private_lock);
3249out: 3249out:
3250 if (buffers_to_free) { 3250 if (buffers_to_free) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d533075a823d..139f2fea91a0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/writeback.h> 9#include <linux/writeback.h>
10#include <linux/aio.h>
11#include <linux/falloc.h> 10#include <linux/falloc.h>
12 11
13#include "super.h" 12#include "super.h"
@@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
808{ 807{
809 struct file *filp = iocb->ki_filp; 808 struct file *filp = iocb->ki_filp;
810 struct ceph_file_info *fi = filp->private_data; 809 struct ceph_file_info *fi = filp->private_data;
811 size_t len = iocb->ki_nbytes; 810 size_t len = iov_iter_count(to);
812 struct inode *inode = file_inode(filp); 811 struct inode *inode = file_inode(filp);
813 struct ceph_inode_info *ci = ceph_inode(inode); 812 struct ceph_inode_info *ci = ceph_inode(inode);
814 struct page *pinned_page = NULL; 813 struct page *pinned_page = NULL;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ac7445e6ec7..aa0dc2573374 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * fs/cifs/cifsencrypt.c 2 * fs/cifs/cifsencrypt.c
3 * 3 *
4 * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP
5 * for more detailed information
6 *
4 * Copyright (C) International Business Machines Corp., 2005,2013 7 * Copyright (C) International Business Machines Corp., 2005,2013
5 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 9 *
@@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
515 __func__); 518 __func__);
516 return rc; 519 return rc;
517 } 520 }
518 } else if (ses->serverName) { 521 } else {
522 /* We use ses->serverName if no domain name available */
519 len = strlen(ses->serverName); 523 len = strlen(ses->serverName);
520 524
521 server = kmalloc(2 + (len * 2), GFP_KERNEL); 525 server = kmalloc(2 + (len * 2), GFP_KERNEL);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d3aa999ab785..f3bfe08e177b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
773 773
774 length = atomic_dec_return(&tcpSesAllocCount); 774 length = atomic_dec_return(&tcpSesAllocCount);
775 if (length > 0) 775 if (length > 0)
776 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 776 mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
777 GFP_KERNEL);
778} 777}
779 778
780static int 779static int
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
848 847
849 length = atomic_inc_return(&tcpSesAllocCount); 848 length = atomic_inc_return(&tcpSesAllocCount);
850 if (length > 1) 849 if (length > 1)
851 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 850 mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
852 GFP_KERNEL);
853 851
854 set_freezable(); 852 set_freezable();
855 while (server->tcpStatus != CifsExiting) { 853 while (server->tcpStatus != CifsExiting) {
@@ -1599,6 +1597,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1599 pr_warn("CIFS: username too long\n"); 1597 pr_warn("CIFS: username too long\n");
1600 goto cifs_parse_mount_err; 1598 goto cifs_parse_mount_err;
1601 } 1599 }
1600
1601 kfree(vol->username);
1602 vol->username = kstrdup(string, GFP_KERNEL); 1602 vol->username = kstrdup(string, GFP_KERNEL);
1603 if (!vol->username) 1603 if (!vol->username)
1604 goto cifs_parse_mount_err; 1604 goto cifs_parse_mount_err;
@@ -1700,6 +1700,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1700 goto cifs_parse_mount_err; 1700 goto cifs_parse_mount_err;
1701 } 1701 }
1702 1702
1703 kfree(vol->domainname);
1703 vol->domainname = kstrdup(string, GFP_KERNEL); 1704 vol->domainname = kstrdup(string, GFP_KERNEL);
1704 if (!vol->domainname) { 1705 if (!vol->domainname) {
1705 pr_warn("CIFS: no memory for domainname\n"); 1706 pr_warn("CIFS: no memory for domainname\n");
@@ -1731,6 +1732,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1731 } 1732 }
1732 1733
1733 if (strncasecmp(string, "default", 7) != 0) { 1734 if (strncasecmp(string, "default", 7) != 0) {
1735 kfree(vol->iocharset);
1734 vol->iocharset = kstrdup(string, 1736 vol->iocharset = kstrdup(string,
1735 GFP_KERNEL); 1737 GFP_KERNEL);
1736 if (!vol->iocharset) { 1738 if (!vol->iocharset) {
@@ -2913,8 +2915,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2913 * calling name ends in null (byte 16) from old smb 2915 * calling name ends in null (byte 16) from old smb
2914 * convention. 2916 * convention.
2915 */ 2917 */
2916 if (server->workstation_RFC1001_name && 2918 if (server->workstation_RFC1001_name[0] != 0)
2917 server->workstation_RFC1001_name[0] != 0)
2918 rfc1002mangle(ses_init_buf->trailer. 2919 rfc1002mangle(ses_init_buf->trailer.
2919 session_req.calling_name, 2920 session_req.calling_name,
2920 server->workstation_RFC1001_name, 2921 server->workstation_RFC1001_name,
@@ -3692,6 +3693,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
3692#endif /* CIFS_WEAK_PW_HASH */ 3693#endif /* CIFS_WEAK_PW_HASH */
3693 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, 3694 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
3694 bcc_ptr, nls_codepage); 3695 bcc_ptr, nls_codepage);
3696 if (rc) {
3697 cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
3698 __func__, rc);
3699 cifs_buf_release(smb_buffer);
3700 return rc;
3701 }
3695 3702
3696 bcc_ptr += CIFS_AUTH_RESP_SIZE; 3703 bcc_ptr += CIFS_AUTH_RESP_SIZE;
3697 if (ses->capabilities & CAP_UNICODE) { 3704 if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a94b3e673182..ca30c391a894 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1823,6 +1823,7 @@ refind_writable:
1823 cifsFileInfo_put(inv_file); 1823 cifsFileInfo_put(inv_file);
1824 spin_lock(&cifs_file_list_lock); 1824 spin_lock(&cifs_file_list_lock);
1825 ++refind; 1825 ++refind;
1826 inv_file = NULL;
1826 goto refind_writable; 1827 goto refind_writable;
1827 } 1828 }
1828 } 1829 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2d4f37235ed0..3e126d7bb2ea 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
771 cifs_buf_release(srchinf->ntwrk_buf_start); 771 cifs_buf_release(srchinf->ntwrk_buf_start);
772 } 772 }
773 kfree(srchinf); 773 kfree(srchinf);
774 if (rc)
775 goto cgii_exit;
774 } else 776 } else
775 goto cgii_exit; 777 goto cgii_exit;
776 778
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 689f035915cf..22dfdf17d065 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
322 322
323 /* return pointer to beginning of data area, ie offset from SMB start */ 323 /* return pointer to beginning of data area, ie offset from SMB start */
324 if ((*off != 0) && (*len != 0)) 324 if ((*off != 0) && (*len != 0))
325 return hdr->ProtocolId + *off; 325 return (char *)(&hdr->ProtocolId[0]) + *off;
326 else 326 else
327 return NULL; 327 return NULL;
328} 328}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 96b5d40a2ece..eab05e1aa587 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid,
684 684
685 /* No need to change MaxChunks since already set to 1 */ 685 /* No need to change MaxChunks since already set to 1 */
686 chunk_sizes_updated = true; 686 chunk_sizes_updated = true;
687 } 687 } else
688 goto cchunk_out;
688 } 689 }
689 690
690cchunk_out: 691cchunk_out:
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3417340bf89e..65cd7a84c8bc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1218 struct smb2_ioctl_req *req; 1218 struct smb2_ioctl_req *req;
1219 struct smb2_ioctl_rsp *rsp; 1219 struct smb2_ioctl_rsp *rsp;
1220 struct TCP_Server_Info *server; 1220 struct TCP_Server_Info *server;
1221 struct cifs_ses *ses = tcon->ses; 1221 struct cifs_ses *ses;
1222 struct kvec iov[2]; 1222 struct kvec iov[2];
1223 int resp_buftype; 1223 int resp_buftype;
1224 int num_iovecs; 1224 int num_iovecs;
@@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1233 if (plen) 1233 if (plen)
1234 *plen = 0; 1234 *plen = 0;
1235 1235
1236 if (tcon)
1237 ses = tcon->ses;
1238 else
1239 return -EIO;
1240
1236 if (ses && (ses->server)) 1241 if (ses && (ses->server))
1237 server = ses->server; 1242 server = ses->server;
1238 else 1243 else
@@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1296 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; 1301 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
1297 1302
1298 if ((rc != 0) && (rc != -EINVAL)) { 1303 if ((rc != 0) && (rc != -EINVAL)) {
1299 if (tcon) 1304 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1300 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1301 goto ioctl_exit; 1305 goto ioctl_exit;
1302 } else if (rc == -EINVAL) { 1306 } else if (rc == -EINVAL) {
1303 if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && 1307 if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
1304 (opcode != FSCTL_SRV_COPYCHUNK)) { 1308 (opcode != FSCTL_SRV_COPYCHUNK)) {
1305 if (tcon) 1309 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1306 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1307 goto ioctl_exit; 1310 goto ioctl_exit;
1308 } 1311 }
1309 } 1312 }
@@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1629 1632
1630 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); 1633 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
1631 1634
1632 if ((rc != 0) && tcon) 1635 if (rc != 0)
1633 cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); 1636 cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
1634 1637
1635 free_rsp_buf(resp_buftype, iov[0].iov_base); 1638 free_rsp_buf(resp_buftype, iov[0].iov_base);
@@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
2114 struct kvec iov[2]; 2117 struct kvec iov[2];
2115 int rc = 0; 2118 int rc = 0;
2116 int len; 2119 int len;
2117 int resp_buftype; 2120 int resp_buftype = CIFS_NO_BUFFER;
2118 unsigned char *bufptr; 2121 unsigned char *bufptr;
2119 struct TCP_Server_Info *server; 2122 struct TCP_Server_Info *server;
2120 struct cifs_ses *ses = tcon->ses; 2123 struct cifs_ses *ses = tcon->ses;
diff --git a/fs/dcache.c b/fs/dcache.c
index c71e3732e53b..d99736a63e3c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode,
2690 struct dentry *dentry, struct dentry *alias) 2690 struct dentry *dentry, struct dentry *alias)
2691{ 2691{
2692 struct mutex *m1 = NULL, *m2 = NULL; 2692 struct mutex *m1 = NULL, *m2 = NULL;
2693 int ret = -EBUSY; 2693 int ret = -ESTALE;
2694 2694
2695 /* If alias and dentry share a parent, then no extra locks required */ 2695 /* If alias and dentry share a parent, then no extra locks required */
2696 if (alias->d_parent == dentry->d_parent) 2696 if (alias->d_parent == dentry->d_parent)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 96400ab42d13..61e72d44cf94 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
254 254
255 pr_debug("debugfs: creating file '%s'\n",name); 255 pr_debug("debugfs: creating file '%s'\n",name);
256 256
257 if (IS_ERR(parent))
258 return parent;
259
257 error = simple_pin_fs(&debug_fs_type, &debugfs_mount, 260 error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
258 &debugfs_mount_count); 261 &debugfs_mount_count);
259 if (error) 262 if (error)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b2e297..6fb00e3f1059 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39#include <linux/prefetch.h> 39#include <linux/prefetch.h>
40#include <linux/aio.h>
41 40
42/* 41/*
43 * How many user pages to map in one call to get_user_pages(). This determines 42 * How many user pages to map in one call to get_user_pages(). This determines
@@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
265 ret = err; 264 ret = err;
266 } 265 }
267 266
268 aio_complete(dio->iocb, ret, 0); 267 dio->iocb->ki_complete(dio->iocb, ret, 0);
269 } 268 }
270 269
271 kmem_cache_free(dio_cache, dio); 270 kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio)
1056 * operation. AIO can if it was a broken operation described above or 1055 * operation. AIO can if it was a broken operation described above or
1057 * in fact if all the bios race to complete before we get here. In 1056 * in fact if all the bios race to complete before we get here. In
1058 * that case dio_complete() translates the EIOCBQUEUED into the proper 1057 * that case dio_complete() translates the EIOCBQUEUED into the proper
1059 * return code that the caller will hand to aio_complete(). 1058 * return code that the caller will hand to ->complete().
1060 * 1059 *
1061 * This is managed by the bio_lock instead of being an atomic_t so that 1060 * This is managed by the bio_lock instead of being an atomic_t so that
1062 * completion paths can drop their ref and use the remaining count to 1061 * completion paths can drop their ref and use the remaining count to
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index fd39bad6f1bd..79675089443d 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,7 +31,6 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/compat.h> 32#include <linux/compat.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <linux/aio.h>
35#include "ecryptfs_kernel.h" 34#include "ecryptfs_kernel.h"
36 35
37/** 36/**
@@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
52 struct file *file = iocb->ki_filp; 51 struct file *file = iocb->ki_filp;
53 52
54 rc = generic_file_read_iter(iocb, to); 53 rc = generic_file_read_iter(iocb, to);
55 /*
56 * Even though this is a async interface, we need to wait
57 * for IO to finish to update atime
58 */
59 if (-EIOCBQUEUED == rc)
60 rc = wait_on_sync_kiocb(iocb);
61 if (rc >= 0) { 54 if (rc >= 0) {
62 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); 55 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
63 touch_atime(path); 56 touch_atime(path);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6434bc000125..df9d6afbc5d5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,7 +31,7 @@
31#include <linux/mpage.h> 31#include <linux/mpage.h>
32#include <linux/fiemap.h> 32#include <linux/fiemap.h>
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/aio.h> 34#include <linux/uio.h>
35#include "ext2.h" 35#include "ext2.h"
36#include "acl.h" 36#include "acl.h"
37#include "xattr.h" 37#include "xattr.h"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc49ba27..db07ffbe7c85 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/mpage.h> 28#include <linux/mpage.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/aio.h> 30#include <linux/uio.h>
31#include "ext3.h" 31#include "ext3.h"
32#include "xattr.h" 32#include "xattr.h"
33#include "acl.h" 33#include "acl.h"
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 33a09da16c9c..598abbbe6786 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,9 +23,9 @@
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/path.h> 25#include <linux/path.h>
26#include <linux/aio.h>
27#include <linux/quotaops.h> 26#include <linux/quotaops.h>
28#include <linux/pagevec.h> 27#include <linux/pagevec.h>
28#include <linux/uio.h>
29#include "ext4.h" 29#include "ext4.h"
30#include "ext4_jbd2.h" 30#include "ext4_jbd2.h"
31#include "xattr.h" 31#include "xattr.h"
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 45fe924f82bc..740c7871c117 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,9 +20,9 @@
20 * (sct@redhat.com), 1993, 1998 20 * (sct@redhat.com), 1993, 1998
21 */ 21 */
22 22
23#include <linux/aio.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "truncate.h" 24#include "truncate.h"
25#include <linux/uio.h>
26 26
27#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
28 28
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cb9a212b86f..a3f451370bef 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,7 +37,6 @@
37#include <linux/printk.h> 37#include <linux/printk.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40#include <linux/aio.h>
41#include <linux/bitops.h> 40#include <linux/bitops.h>
42 41
43#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b24a2541a9ba..464984261e69 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -18,7 +18,6 @@
18#include <linux/pagevec.h> 18#include <linux/pagevec.h>
19#include <linux/mpage.h> 19#include <linux/mpage.h>
20#include <linux/namei.h> 20#include <linux/namei.h>
21#include <linux/aio.h>
22#include <linux/uio.h> 21#include <linux/uio.h>
23#include <linux/bio.h> 22#include <linux/bio.h>
24#include <linux/workqueue.h> 23#include <linux/workqueue.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed023a750..497f8515d205 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,12 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/mpage.h> 14#include <linux/mpage.h>
15#include <linux/aio.h>
16#include <linux/writeback.h> 15#include <linux/writeback.h>
17#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
18#include <linux/blkdev.h> 17#include <linux/blkdev.h>
19#include <linux/bio.h> 18#include <linux/bio.h>
20#include <linux/prefetch.h> 19#include <linux/prefetch.h>
20#include <linux/uio.h>
21 21
22#include "f2fs.h" 22#include "f2fs.h"
23#include "node.h" 23#include "node.h"
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 497c7c5263c7..8521207de229 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -19,7 +19,6 @@
19#include <linux/mpage.h> 19#include <linux/mpage.h>
20#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
21#include <linux/mount.h> 21#include <linux/mount.h>
22#include <linux/aio.h>
23#include <linux/vfs.h> 22#include <linux/vfs.h>
24#include <linux/parser.h> 23#include <linux/parser.h>
25#include <linux/uio.h> 24#include <linux/uio.h>
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..32a8bbd7a9ad 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
53 struct completion *done; /* set if the caller waits */ 53 struct completion *done; /* set if the caller waits */
54}; 54};
55 55
56/*
57 * If an inode is constantly having its pages dirtied, but then the
58 * updates stop dirtytime_expire_interval seconds in the past, it's
59 * possible for the worst case time between when an inode has its
60 * timestamps updated and when they finally get written out to be two
61 * dirtytime_expire_intervals. We set the default to 12 hours (in
62 * seconds), which means most of the time inodes will have their
63 * timestamps written to disk after 12 hours, but in the worst case a
64 * few inodes might not their timestamps updated for 24 hours.
65 */
66unsigned int dirtytime_expire_interval = 12 * 60 * 60;
67
56/** 68/**
57 * writeback_in_progress - determine whether there is writeback in progress 69 * writeback_in_progress - determine whether there is writeback in progress
58 * @bdi: the device's backing_dev_info structure. 70 * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
275 287
276 if ((flags & EXPIRE_DIRTY_ATIME) == 0) 288 if ((flags & EXPIRE_DIRTY_ATIME) == 0)
277 older_than_this = work->older_than_this; 289 older_than_this = work->older_than_this;
278 else if ((work->reason == WB_REASON_SYNC) == 0) { 290 else if (!work->for_sync) {
279 expire_time = jiffies - (HZ * 86400); 291 expire_time = jiffies - (dirtytime_expire_interval * HZ);
280 older_than_this = &expire_time; 292 older_than_this = &expire_time;
281 } 293 }
282 while (!list_empty(delaying_queue)) { 294 while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
458 */ 470 */
459 redirty_tail(inode, wb); 471 redirty_tail(inode, wb);
460 } else if (inode->i_state & I_DIRTY_TIME) { 472 } else if (inode->i_state & I_DIRTY_TIME) {
473 inode->dirtied_when = jiffies;
461 list_move(&inode->i_wb_list, &wb->b_dirty_time); 474 list_move(&inode->i_wb_list, &wb->b_dirty_time);
462 } else { 475 } else {
463 /* The inode is clean. Remove from writeback lists. */ 476 /* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
505 spin_lock(&inode->i_lock); 518 spin_lock(&inode->i_lock);
506 519
507 dirty = inode->i_state & I_DIRTY; 520 dirty = inode->i_state & I_DIRTY;
508 if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && 521 if (inode->i_state & I_DIRTY_TIME) {
509 (inode->i_state & I_DIRTY_TIME)) || 522 if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
510 (inode->i_state & I_DIRTY_TIME_EXPIRED)) { 523 unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
511 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; 524 unlikely(time_after(jiffies,
512 trace_writeback_lazytime(inode); 525 (inode->dirtied_time_when +
513 } 526 dirtytime_expire_interval * HZ)))) {
527 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
528 trace_writeback_lazytime(inode);
529 }
530 } else
531 inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
514 inode->i_state &= ~dirty; 532 inode->i_state &= ~dirty;
515 533
516 /* 534 /*
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1131 rcu_read_unlock(); 1149 rcu_read_unlock();
1132} 1150}
1133 1151
1152/*
1153 * Wake up bdi's periodically to make sure dirtytime inodes gets
1154 * written back periodically. We deliberately do *not* check the
1155 * b_dirtytime list in wb_has_dirty_io(), since this would cause the
1156 * kernel to be constantly waking up once there are any dirtytime
1157 * inodes on the system. So instead we define a separate delayed work
1158 * function which gets called much more rarely. (By default, only
1159 * once every 12 hours.)
1160 *
1161 * If there is any other write activity going on in the file system,
1162 * this function won't be necessary. But if the only thing that has
1163 * happened on the file system is a dirtytime inode caused by an atime
1164 * update, we need this infrastructure below to make sure that inode
1165 * eventually gets pushed out to disk.
1166 */
1167static void wakeup_dirtytime_writeback(struct work_struct *w);
1168static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
1169
1170static void wakeup_dirtytime_writeback(struct work_struct *w)
1171{
1172 struct backing_dev_info *bdi;
1173
1174 rcu_read_lock();
1175 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1176 if (list_empty(&bdi->wb.b_dirty_time))
1177 continue;
1178 bdi_wakeup_thread(bdi);
1179 }
1180 rcu_read_unlock();
1181 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1182}
1183
1184static int __init start_dirtytime_writeback(void)
1185{
1186 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1187 return 0;
1188}
1189__initcall(start_dirtytime_writeback);
1190
1191int dirtytime_interval_handler(struct ctl_table *table, int write,
1192 void __user *buffer, size_t *lenp, loff_t *ppos)
1193{
1194 int ret;
1195
1196 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1197 if (ret == 0 && write)
1198 mod_delayed_work(system_wq, &dirtytime_work, 0);
1199 return ret;
1200}
1201
1134static noinline void block_dump___mark_inode_dirty(struct inode *inode) 1202static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1135{ 1203{
1136 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 1204 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1269 } 1337 }
1270 1338
1271 inode->dirtied_when = jiffies; 1339 inode->dirtied_when = jiffies;
1272 list_move(&inode->i_wb_list, dirtytime ? 1340 if (dirtytime)
1273 &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); 1341 inode->dirtied_time_when = jiffies;
1342 if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
1343 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1344 else
1345 list_move(&inode->i_wb_list,
1346 &bdi->wb.b_dirty_time);
1274 spin_unlock(&bdi->wb.list_lock); 1347 spin_unlock(&bdi->wb.list_lock);
1275 trace_writeback_dirty_inode_enqueue(inode); 1348 trace_writeback_dirty_inode_enqueue(inode);
1276 1349
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 28d0c7abba1c..b3fa05032234 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,7 +38,6 @@
38#include <linux/device.h> 38#include <linux/device.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/fs.h> 40#include <linux/fs.h>
41#include <linux/aio.h>
42#include <linux/kdev_t.h> 41#include <linux/kdev_t.h>
43#include <linux/kthread.h> 42#include <linux/kthread.h>
44#include <linux/list.h> 43#include <linux/list.h>
@@ -48,6 +47,7 @@
48#include <linux/slab.h> 47#include <linux/slab.h>
49#include <linux/stat.h> 48#include <linux/stat.h>
50#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/uio.h>
51 51
52#include "fuse_i.h" 52#include "fuse_i.h"
53 53
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ed19a7d622fa..95a2797eef66 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,6 @@
19#include <linux/pipe_fs_i.h> 19#include <linux/pipe_fs_i.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/splice.h> 21#include <linux/splice.h>
22#include <linux/aio.h>
23 22
24MODULE_ALIAS_MISCDEV(FUSE_MINOR); 23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
25MODULE_ALIAS("devname:fuse"); 24MODULE_ALIAS("devname:fuse");
@@ -890,8 +889,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
890 889
891 newpage = buf->page; 890 newpage = buf->page;
892 891
893 if (WARN_ON(!PageUptodate(newpage))) 892 if (!PageUptodate(newpage))
894 return -EIO; 893 SetPageUptodate(newpage);
895 894
896 ClearPageMappedToDisk(newpage); 895 ClearPageMappedToDisk(newpage);
897 896
@@ -1353,6 +1352,17 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1353 return err; 1352 return err;
1354} 1353}
1355 1354
1355static int fuse_dev_open(struct inode *inode, struct file *file)
1356{
1357 /*
1358 * The fuse device's file's private_data is used to hold
1359 * the fuse_conn(ection) when it is mounted, and is used to
1360 * keep track of whether the file has been mounted already.
1361 */
1362 file->private_data = NULL;
1363 return 0;
1364}
1365
1356static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, 1366static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1357 unsigned long nr_segs, loff_t pos) 1367 unsigned long nr_segs, loff_t pos)
1358{ 1368{
@@ -1797,6 +1807,9 @@ copy_finish:
1797static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 1807static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1798 unsigned int size, struct fuse_copy_state *cs) 1808 unsigned int size, struct fuse_copy_state *cs)
1799{ 1809{
1810 /* Don't try to move pages (yet) */
1811 cs->move_pages = 0;
1812
1800 switch (code) { 1813 switch (code) {
1801 case FUSE_NOTIFY_POLL: 1814 case FUSE_NOTIFY_POLL:
1802 return fuse_notify_poll(fc, size, cs); 1815 return fuse_notify_poll(fc, size, cs);
@@ -2217,6 +2230,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
2217 2230
2218const struct file_operations fuse_dev_operations = { 2231const struct file_operations fuse_dev_operations = {
2219 .owner = THIS_MODULE, 2232 .owner = THIS_MODULE,
2233 .open = fuse_dev_open,
2220 .llseek = no_llseek, 2234 .llseek = no_llseek,
2221 .read = do_sync_read, 2235 .read = do_sync_read,
2222 .aio_read = fuse_dev_read, 2236 .aio_read = fuse_dev_read,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c01ec3bdcfd8..ff102cbf16ea 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,8 +15,8 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/aio.h>
19#include <linux/falloc.h> 18#include <linux/falloc.h>
19#include <linux/uio.h>
20 20
21static const struct file_operations fuse_direct_io_file_operations; 21static const struct file_operations fuse_direct_io_file_operations;
22 22
@@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
528 } 528 }
529} 529}
530 530
531static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
532{
533 if (io->err)
534 return io->err;
535
536 if (io->bytes >= 0 && io->write)
537 return -EIO;
538
539 return io->bytes < 0 ? io->size : io->bytes;
540}
541
531/** 542/**
532 * In case of short read, the caller sets 'pos' to the position of 543 * In case of short read, the caller sets 'pos' to the position of
533 * actual end of fuse request in IO request. Otherwise, if bytes_requested 544 * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
546 */ 557 */
547static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) 558static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
548{ 559{
560 bool is_sync = is_sync_kiocb(io->iocb);
549 int left; 561 int left;
550 562
551 spin_lock(&io->lock); 563 spin_lock(&io->lock);
@@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
555 io->bytes = pos; 567 io->bytes = pos;
556 568
557 left = --io->reqs; 569 left = --io->reqs;
570 if (!left && is_sync)
571 complete(io->done);
558 spin_unlock(&io->lock); 572 spin_unlock(&io->lock);
559 573
560 if (!left) { 574 if (!left && !is_sync) {
561 long res; 575 ssize_t res = fuse_get_res_by_io(io);
562 576
563 if (io->err) 577 if (res >= 0) {
564 res = io->err; 578 struct inode *inode = file_inode(io->iocb->ki_filp);
565 else if (io->bytes >= 0 && io->write) 579 struct fuse_conn *fc = get_fuse_conn(inode);
566 res = -EIO; 580 struct fuse_inode *fi = get_fuse_inode(inode);
567 else {
568 res = io->bytes < 0 ? io->size : io->bytes;
569 581
570 if (!is_sync_kiocb(io->iocb)) { 582 spin_lock(&fc->lock);
571 struct inode *inode = file_inode(io->iocb->ki_filp); 583 fi->attr_version = ++fc->attr_version;
572 struct fuse_conn *fc = get_fuse_conn(inode); 584 spin_unlock(&fc->lock);
573 struct fuse_inode *fi = get_fuse_inode(inode);
574
575 spin_lock(&fc->lock);
576 fi->attr_version = ++fc->attr_version;
577 spin_unlock(&fc->lock);
578 }
579 } 585 }
580 586
581 aio_complete(io->iocb, res, 0); 587 io->iocb->ki_complete(io->iocb, res, 0);
582 kfree(io); 588 kfree(io);
583 } 589 }
584} 590}
@@ -2801,6 +2807,7 @@ static ssize_t
2801fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 2807fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2802 loff_t offset) 2808 loff_t offset)
2803{ 2809{
2810 DECLARE_COMPLETION_ONSTACK(wait);
2804 ssize_t ret = 0; 2811 ssize_t ret = 0;
2805 struct file *file = iocb->ki_filp; 2812 struct file *file = iocb->ki_filp;
2806 struct fuse_file *ff = file->private_data; 2813 struct fuse_file *ff = file->private_data;
@@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2852 if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) 2859 if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
2853 io->async = false; 2860 io->async = false;
2854 2861
2862 if (io->async && is_sync_kiocb(iocb))
2863 io->done = &wait;
2864
2855 if (rw == WRITE) 2865 if (rw == WRITE)
2856 ret = __fuse_direct_write(io, iter, &pos); 2866 ret = __fuse_direct_write(io, iter, &pos);
2857 else 2867 else
@@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2864 if (!is_sync_kiocb(iocb)) 2874 if (!is_sync_kiocb(iocb))
2865 return -EIOCBQUEUED; 2875 return -EIOCBQUEUED;
2866 2876
2867 ret = wait_on_sync_kiocb(iocb); 2877 wait_for_completion(&wait);
2868 } else { 2878 ret = fuse_get_res_by_io(io);
2869 kfree(io);
2870 } 2879 }
2871 2880
2881 kfree(io);
2882
2872 if (rw == WRITE) { 2883 if (rw == WRITE) {
2873 if (ret > 0) 2884 if (ret > 0)
2874 fuse_write_update_size(inode, pos); 2885 fuse_write_update_size(inode, pos);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1cdfb07c1376..7354dc142a50 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -263,6 +263,7 @@ struct fuse_io_priv {
263 int err; 263 int err;
264 struct kiocb *iocb; 264 struct kiocb *iocb;
265 struct file *file; 265 struct file *file;
266 struct completion *done;
266}; 267};
267 268
268/** 269/**
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b3143064af1..1be3b061c05c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); 110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
111 if (error) 111 if (error)
112 goto out; 112 goto out;
113 113 set_cached_acl(inode, type, acl);
114 if (acl)
115 set_cached_acl(inode, type, acl);
116 else
117 forget_cached_acl(inode, type);
118out: 114out:
119 kfree(data); 115 kfree(data);
120 return error; 116 return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4ad4f94edebe..a6e6990aea39 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,7 +20,7 @@
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/aio.h> 23#include <linux/uio.h>
24#include <trace/events/writeback.h> 24#include <trace/events/writeback.h>
25 25
26#include "gfs2.h" 26#include "gfs2.h"
@@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
671 671
672 if (alloc_required) { 672 if (alloc_required) {
673 struct gfs2_alloc_parms ap = { .aflags = 0, }; 673 struct gfs2_alloc_parms ap = { .aflags = 0, };
674 error = gfs2_quota_lock_check(ip); 674 requested = data_blocks + ind_blocks;
675 ap.target = requested;
676 error = gfs2_quota_lock_check(ip, &ap);
675 if (error) 677 if (error)
676 goto out_unlock; 678 goto out_unlock;
677 679
678 requested = data_blocks + ind_blocks;
679 ap.target = requested;
680 error = gfs2_inplace_reserve(ip, &ap); 680 error = gfs2_inplace_reserve(ip, &ap);
681 if (error) 681 if (error)
682 goto out_qunlock; 682 goto out_qunlock;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945ab853e..61296ecbd0e2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
1224 1224
1225 if (gfs2_is_stuffed(ip) && 1225 if (gfs2_is_stuffed(ip) &&
1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1227 error = gfs2_quota_lock_check(ip); 1227 error = gfs2_quota_lock_check(ip, &ap);
1228 if (error) 1228 if (error)
1229 return error; 1229 return error;
1230 1230
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e32bb8e2d7e..8ec43ab5babf 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,7 +25,6 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <linux/dlm.h> 26#include <linux/dlm.h>
27#include <linux/dlm_plock.h> 27#include <linux/dlm_plock.h>
28#include <linux/aio.h>
29#include <linux/delay.h> 28#include <linux/delay.h>
30 29
31#include "gfs2.h" 30#include "gfs2.h"
@@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
429 if (ret) 428 if (ret)
430 goto out_unlock; 429 goto out_unlock;
431 430
432 ret = gfs2_quota_lock_check(ip);
433 if (ret)
434 goto out_unlock;
435 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 431 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
436 ap.target = data_blocks + ind_blocks; 432 ap.target = data_blocks + ind_blocks;
433 ret = gfs2_quota_lock_check(ip, &ap);
434 if (ret)
435 goto out_unlock;
437 ret = gfs2_inplace_reserve(ip, &ap); 436 ret = gfs2_inplace_reserve(ip, &ap);
438 if (ret) 437 if (ret)
439 goto out_quota_unlock; 438 goto out_quota_unlock;
@@ -765,22 +764,30 @@ out:
765 brelse(dibh); 764 brelse(dibh);
766 return error; 765 return error;
767} 766}
768 767/**
769static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, 768 * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
770 unsigned int *data_blocks, unsigned int *ind_blocks) 769 * blocks, determine how many bytes can be written.
770 * @ip: The inode in question.
771 * @len: Max cap of bytes. What we return in *len must be <= this.
772 * @data_blocks: Compute and return the number of data blocks needed
773 * @ind_blocks: Compute and return the number of indirect blocks needed
774 * @max_blocks: The total blocks available to work with.
775 *
776 * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
777 */
778static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
779 unsigned int *data_blocks, unsigned int *ind_blocks,
780 unsigned int max_blocks)
771{ 781{
782 loff_t max = *len;
772 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 783 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
773 unsigned int max_blocks = ip->i_rgd->rd_free_clone;
774 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); 784 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
775 785
776 for (tmp = max_data; tmp > sdp->sd_diptrs;) { 786 for (tmp = max_data; tmp > sdp->sd_diptrs;) {
777 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); 787 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
778 max_data -= tmp; 788 max_data -= tmp;
779 } 789 }
780 /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, 790
781 so it might end up with fewer data blocks */
782 if (max_data <= *data_blocks)
783 return;
784 *data_blocks = max_data; 791 *data_blocks = max_data;
785 *ind_blocks = max_blocks - max_data; 792 *ind_blocks = max_blocks - max_data;
786 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; 793 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
797 struct gfs2_inode *ip = GFS2_I(inode); 804 struct gfs2_inode *ip = GFS2_I(inode);
798 struct gfs2_alloc_parms ap = { .aflags = 0, }; 805 struct gfs2_alloc_parms ap = { .aflags = 0, };
799 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 806 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
800 loff_t bytes, max_bytes; 807 loff_t bytes, max_bytes, max_blks = UINT_MAX;
801 int error; 808 int error;
802 const loff_t pos = offset; 809 const loff_t pos = offset;
803 const loff_t count = len; 810 const loff_t count = len;
@@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
819 826
820 gfs2_size_hint(file, offset, len); 827 gfs2_size_hint(file, offset, len);
821 828
829 gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
830 ap.min_target = data_blocks + ind_blocks;
831
822 while (len > 0) { 832 while (len > 0) {
823 if (len < bytes) 833 if (len < bytes)
824 bytes = len; 834 bytes = len;
@@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
827 offset += bytes; 837 offset += bytes;
828 continue; 838 continue;
829 } 839 }
830 error = gfs2_quota_lock_check(ip); 840
841 /* We need to determine how many bytes we can actually
842 * fallocate without exceeding quota or going over the
843 * end of the fs. We start off optimistically by assuming
844 * we can write max_bytes */
845 max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
846
847 /* Since max_bytes is most likely a theoretical max, we
848 * calculate a more realistic 'bytes' to serve as a good
849 * starting point for the number of bytes we may be able
850 * to write */
851 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
852 ap.target = data_blocks + ind_blocks;
853
854 error = gfs2_quota_lock_check(ip, &ap);
831 if (error) 855 if (error)
832 return error; 856 return error;
833retry: 857 /* ap.allowed tells us how many blocks quota will allow
834 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 858 * us to write. Check if this reduces max_blks */
859 if (ap.allowed && ap.allowed < max_blks)
860 max_blks = ap.allowed;
835 861
836 ap.target = data_blocks + ind_blocks;
837 error = gfs2_inplace_reserve(ip, &ap); 862 error = gfs2_inplace_reserve(ip, &ap);
838 if (error) { 863 if (error)
839 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
840 bytes >>= 1;
841 bytes &= bsize_mask;
842 if (bytes == 0)
843 bytes = sdp->sd_sb.sb_bsize;
844 goto retry;
845 }
846 goto out_qunlock; 864 goto out_qunlock;
847 } 865
848 max_bytes = bytes; 866 /* check if the selected rgrp limits our max_blks further */
849 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, 867 if (ap.allowed && ap.allowed < max_blks)
850 &max_bytes, &data_blocks, &ind_blocks); 868 max_blks = ap.allowed;
869
870 /* Almost done. Calculate bytes that can be written using
871 * max_blks. We also recompute max_bytes, data_blocks and
872 * ind_blocks */
873 calc_max_reserv(ip, &max_bytes, &data_blocks,
874 &ind_blocks, max_blks);
851 875
852 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 876 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
853 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); 877 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -931,6 +955,22 @@ out_uninit:
931 return ret; 955 return ret;
932} 956}
933 957
958static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
959 struct file *out, loff_t *ppos,
960 size_t len, unsigned int flags)
961{
962 int error;
963 struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
964
965 error = gfs2_rs_alloc(ip);
966 if (error)
967 return (ssize_t)error;
968
969 gfs2_size_hint(out, *ppos, len);
970
971 return iter_file_splice_write(pipe, out, ppos, len, flags);
972}
973
934#ifdef CONFIG_GFS2_FS_LOCKING_DLM 974#ifdef CONFIG_GFS2_FS_LOCKING_DLM
935 975
936/** 976/**
@@ -1077,7 +1117,7 @@ const struct file_operations gfs2_file_fops = {
1077 .lock = gfs2_lock, 1117 .lock = gfs2_lock,
1078 .flock = gfs2_flock, 1118 .flock = gfs2_flock,
1079 .splice_read = generic_file_splice_read, 1119 .splice_read = generic_file_splice_read,
1080 .splice_write = iter_file_splice_write, 1120 .splice_write = gfs2_file_splice_write,
1081 .setlease = simple_nosetlease, 1121 .setlease = simple_nosetlease,
1082 .fallocate = gfs2_fallocate, 1122 .fallocate = gfs2_fallocate,
1083}; 1123};
@@ -1107,7 +1147,7 @@ const struct file_operations gfs2_file_fops_nolock = {
1107 .release = gfs2_release, 1147 .release = gfs2_release,
1108 .fsync = gfs2_fsync, 1148 .fsync = gfs2_fsync,
1109 .splice_read = generic_file_splice_read, 1149 .splice_read = generic_file_splice_read,
1110 .splice_write = iter_file_splice_write, 1150 .splice_write = gfs2_file_splice_write,
1111 .setlease = generic_setlease, 1151 .setlease = generic_setlease,
1112 .fallocate = gfs2_fallocate, 1152 .fallocate = gfs2_fallocate,
1113}; 1153};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffba056a..0fa8062f85a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = {
2047 2047
2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2049{ 2049{
2050 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2050 struct dentry *dent;
2051 if (!sdp->debugfs_dir) 2051
2052 return -ENOMEM; 2052 dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2053 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2053 if (IS_ERR_OR_NULL(dent))
2054 S_IFREG | S_IRUGO, 2054 goto fail;
2055 sdp->debugfs_dir, sdp, 2055 sdp->debugfs_dir = dent;
2056 &gfs2_glocks_fops); 2056
2057 if (!sdp->debugfs_dentry_glocks) 2057 dent = debugfs_create_file("glocks",
2058 S_IFREG | S_IRUGO,
2059 sdp->debugfs_dir, sdp,
2060 &gfs2_glocks_fops);
2061 if (IS_ERR_OR_NULL(dent))
2058 goto fail; 2062 goto fail;
2063 sdp->debugfs_dentry_glocks = dent;
2059 2064
2060 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", 2065 dent = debugfs_create_file("glstats",
2061 S_IFREG | S_IRUGO, 2066 S_IFREG | S_IRUGO,
2062 sdp->debugfs_dir, sdp, 2067 sdp->debugfs_dir, sdp,
2063 &gfs2_glstats_fops); 2068 &gfs2_glstats_fops);
2064 if (!sdp->debugfs_dentry_glstats) 2069 if (IS_ERR_OR_NULL(dent))
2065 goto fail; 2070 goto fail;
2071 sdp->debugfs_dentry_glstats = dent;
2066 2072
2067 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", 2073 dent = debugfs_create_file("sbstats",
2068 S_IFREG | S_IRUGO, 2074 S_IFREG | S_IRUGO,
2069 sdp->debugfs_dir, sdp, 2075 sdp->debugfs_dir, sdp,
2070 &gfs2_sbstats_fops); 2076 &gfs2_sbstats_fops);
2071 if (!sdp->debugfs_dentry_sbstats) 2077 if (IS_ERR_OR_NULL(dent))
2072 goto fail; 2078 goto fail;
2079 sdp->debugfs_dentry_sbstats = dent;
2073 2080
2074 return 0; 2081 return 0;
2075fail: 2082fail:
2076 gfs2_delete_debugfs_file(sdp); 2083 gfs2_delete_debugfs_file(sdp);
2077 return -ENOMEM; 2084 return dent ? PTR_ERR(dent) : -ENOMEM;
2078} 2085}
2079 2086
2080void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2087void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2100int gfs2_register_debugfs(void) 2107int gfs2_register_debugfs(void)
2101{ 2108{
2102 gfs2_root = debugfs_create_dir("gfs2", NULL); 2109 gfs2_root = debugfs_create_dir("gfs2", NULL);
2110 if (IS_ERR(gfs2_root))
2111 return PTR_ERR(gfs2_root);
2103 return gfs2_root ? 0 : -ENOMEM; 2112 return gfs2_root ? 0 : -ENOMEM;
2104} 2113}
2105 2114
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc0d634..58b75abf6ab2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@ struct gfs2_blkreserv {
301 * to the allocation code. 301 * to the allocation code.
302 */ 302 */
303struct gfs2_alloc_parms { 303struct gfs2_alloc_parms {
304 u32 target; 304 u64 target;
305 u32 min_target;
305 u32 aflags; 306 u32 aflags;
307 u64 allowed;
306}; 308};
307 309
308enum { 310enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253faac..08bc84d7e768 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; 382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
383 int error; 383 int error;
384 384
385 error = gfs2_quota_lock_check(ip); 385 error = gfs2_quota_lock_check(ip, &ap);
386 if (error) 386 if (error)
387 goto out; 387 goto out;
388 388
@@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
525 int error; 525 int error;
526 526
527 if (da->nr_blocks) { 527 if (da->nr_blocks) {
528 error = gfs2_quota_lock_check(dip); 528 error = gfs2_quota_lock_check(dip, &ap);
529 if (error) 529 if (error)
530 goto fail_quota_locks; 530 goto fail_quota_locks;
531 531
@@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
953 953
954 if (da.nr_blocks) { 954 if (da.nr_blocks) {
955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
956 error = gfs2_quota_lock_check(dip); 956 error = gfs2_quota_lock_check(dip, &ap);
957 if (error) 957 if (error)
958 goto out_gunlock; 958 goto out_gunlock;
959 959
@@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1470 1470
1471 if (da.nr_blocks) { 1471 if (da.nr_blocks) {
1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
1473 error = gfs2_quota_lock_check(ndip); 1473 error = gfs2_quota_lock_check(ndip, &ap);
1474 if (error) 1474 if (error)
1475 goto out_gunlock; 1475 goto out_gunlock;
1476 1476
@@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1669 kuid_t ouid, nuid; 1669 kuid_t ouid, nuid;
1670 kgid_t ogid, ngid; 1670 kgid_t ogid, ngid;
1671 int error; 1671 int error;
1672 struct gfs2_alloc_parms ap;
1672 1673
1673 ouid = inode->i_uid; 1674 ouid = inode->i_uid;
1674 ogid = inode->i_gid; 1675 ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1696 if (error) 1697 if (error)
1697 goto out; 1698 goto out;
1698 1699
1700 ap.target = gfs2_get_inode_blocks(&ip->i_inode);
1701
1699 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1702 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1700 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1703 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1701 error = gfs2_quota_check(ip, nuid, ngid); 1704 error = gfs2_quota_check(ip, nuid, ngid, &ap);
1702 if (error) 1705 if (error)
1703 goto out_gunlock_q; 1706 goto out_gunlock_q;
1704 } 1707 }
@@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1713 1716
1714 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1717 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1715 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1718 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1716 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1719 gfs2_quota_change(ip, -ap.target, ouid, ogid);
1717 gfs2_quota_change(ip, -blocks, ouid, ogid); 1720 gfs2_quota_change(ip, ap.target, nuid, ngid);
1718 gfs2_quota_change(ip, blocks, nuid, ngid);
1719 } 1721 }
1720 1722
1721out_end_trans: 1723out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4d1cfc..5c27e48aa76f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@ restart:
923 if (error) 923 if (error)
924 return error; 924 return error;
925 925
926 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
927 force_refresh = FORCE;
928
926 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 929 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
927 930
928 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 931 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
974 sizeof(struct gfs2_quota_data *), sort_qd, NULL); 977 sizeof(struct gfs2_quota_data *), sort_qd, NULL);
975 978
976 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 979 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
977 int force = NO_FORCE;
978 qd = ip->i_res->rs_qa_qd[x]; 980 qd = ip->i_res->rs_qa_qd[x];
979 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 981 error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
980 force = FORCE;
981 error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
982 if (error) 982 if (error)
983 break; 983 break;
984 } 984 }
@@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1094 return 0; 1094 return 0;
1095} 1095}
1096 1096
1097int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) 1097/**
1098 * gfs2_quota_check - check if allocating new blocks will exceed quota
1099 * @ip: The inode for which this check is being performed
1100 * @uid: The uid to check against
1101 * @gid: The gid to check against
1102 * @ap: The allocation parameters. ap->target contains the requested
1103 * blocks. ap->min_target, if set, contains the minimum blks
1104 * requested.
1105 *
1106 * Returns: 0 on success.
1107 * min_req = ap->min_target ? ap->min_target : ap->target;
1108 * quota must allow atleast min_req blks for success and
1109 * ap->allowed is set to the number of blocks allowed
1110 *
1111 * -EDQUOT otherwise, quota violation. ap->allowed is set to number
1112 * of blocks available.
1113 */
1114int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
1115 struct gfs2_alloc_parms *ap)
1098{ 1116{
1099 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1117 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1100 struct gfs2_quota_data *qd; 1118 struct gfs2_quota_data *qd;
1101 s64 value; 1119 s64 value, warn, limit;
1102 unsigned int x; 1120 unsigned int x;
1103 int error = 0; 1121 int error = 0;
1104 1122
1123 ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
1105 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) 1124 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
1106 return 0; 1125 return 0;
1107 1126
@@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
1115 qid_eq(qd->qd_id, make_kqid_gid(gid)))) 1134 qid_eq(qd->qd_id, make_kqid_gid(gid))))
1116 continue; 1135 continue;
1117 1136
1137 warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
1138 limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
1118 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1139 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
1119 spin_lock(&qd_lock); 1140 spin_lock(&qd_lock);
1120 value += qd->qd_change; 1141 value += qd->qd_change;
1121 spin_unlock(&qd_lock); 1142 spin_unlock(&qd_lock);
1122 1143
1123 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1144 if (limit > 0 && (limit - value) < ap->allowed)
1124 print_message(qd, "exceeded"); 1145 ap->allowed = limit - value;
1125 quota_send_warning(qd->qd_id, 1146 /* If we can't meet the target */
1126 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1147 if (limit && limit < (value + (s64)ap->target)) {
1127 1148 /* If no min_target specified or we don't meet
1128 error = -EDQUOT; 1149 * min_target, return -EDQUOT */
1129 break; 1150 if (!ap->min_target || ap->min_target > ap->allowed) {
1130 } else if (be64_to_cpu(qd->qd_qb.qb_warn) && 1151 print_message(qd, "exceeded");
1131 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && 1152 quota_send_warning(qd->qd_id,
1153 sdp->sd_vfs->s_dev,
1154 QUOTA_NL_BHARDWARN);
1155 error = -EDQUOT;
1156 break;
1157 }
1158 } else if (warn && warn < value &&
1132 time_after_eq(jiffies, qd->qd_last_warn + 1159 time_after_eq(jiffies, qd->qd_last_warn +
1133 gfs2_tune_get(sdp, 1160 gfs2_tune_get(sdp, gt_quota_warn_period)
1134 gt_quota_warn_period) * HZ)) { 1161 * HZ)) {
1135 quota_send_warning(qd->qd_id, 1162 quota_send_warning(qd->qd_id,
1136 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1163 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1137 error = print_message(qd, "warning"); 1164 error = print_message(qd, "warning");
1138 qd->qd_last_warn = jiffies; 1165 qd->qd_last_warn = jiffies;
1139 } 1166 }
1140 } 1167 }
1141
1142 return error; 1168 return error;
1143} 1169}
1144 1170
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506eb3c4a..ad04b3acae2b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip);
24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
25extern void gfs2_quota_unlock(struct gfs2_inode *ip); 25extern void gfs2_quota_unlock(struct gfs2_inode *ip);
26 26
27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
28 struct gfs2_alloc_parms *ap);
28extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 29extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
29 kuid_t uid, kgid_t gid); 30 kuid_t uid, kgid_t gid);
30 31
@@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data);
37 38
38extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); 39extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
39 40
40static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) 41static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
42 struct gfs2_alloc_parms *ap)
41{ 43{
42 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 44 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
43 int ret; 45 int ret;
@@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
48 return ret; 50 return ret;
49 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 51 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
50 return 0; 52 return 0;
51 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); 53 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
52 if (ret) 54 if (ret)
53 gfs2_quota_unlock(ip); 55 gfs2_quota_unlock(ip);
54 return ret; 56 return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207f365c..6af2396a317c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1946 * @ip: the inode to reserve space for 1946 * @ip: the inode to reserve space for
1947 * @ap: the allocation parameters 1947 * @ap: the allocation parameters
1948 * 1948 *
1949 * Returns: errno 1949 * We try our best to find an rgrp that has at least ap->target blocks
1950 * available. After a couple of passes (loops == 2), the prospects of finding
1951 * such an rgrp diminish. At this stage, we return the first rgrp that has
1952 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1953 * the number of blocks available in the chosen rgrp.
1954 *
1955 * Returns: 0 on success,
1956 * -ENOMEM if a suitable rgrp can't be found
1957 * errno otherwise
1950 */ 1958 */
1951 1959
1952int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) 1960int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1953{ 1961{
1954 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1962 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1955 struct gfs2_rgrpd *begin = NULL; 1963 struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2012 /* Skip unuseable resource groups */ 2020 /* Skip unuseable resource groups */
2013 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | 2021 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2014 GFS2_RDF_ERROR)) || 2022 GFS2_RDF_ERROR)) ||
2015 (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) 2023 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2016 goto skip_rgrp; 2024 goto skip_rgrp;
2017 2025
2018 if (sdp->sd_args.ar_rgrplvb) 2026 if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2027 goto check_rgrp; 2035 goto check_rgrp;
2028 2036
2029 /* If rgrp has enough free space, use it */ 2037 /* If rgrp has enough free space, use it */
2030 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { 2038 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2039 (loops == 2 && ap->min_target &&
2040 rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2031 ip->i_rgd = rs->rs_rbm.rgd; 2041 ip->i_rgd = rs->rs_rbm.rgd;
2042 ap->allowed = ip->i_rgd->rd_free_clone;
2032 return 0; 2043 return 0;
2033 } 2044 }
2034
2035check_rgrp: 2045check_rgrp:
2036 /* Check for unlinked inodes which can be reclaimed */ 2046 /* Check for unlinked inodes which can be reclaimed */
2037 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 2047 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4af3afd..68972ecfbb01 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
42 42
43#define GFS2_AF_ORLOV 1 43#define GFS2_AF_ORLOV 1
44extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); 44extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
45 struct gfs2_alloc_parms *ap);
45extern void gfs2_inplace_release(struct gfs2_inode *ip); 46extern void gfs2_inplace_release(struct gfs2_inode *ip);
46 47
47extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 48extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f783f787..fd260ce8869a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
732 if (error) 732 if (error)
733 return error; 733 return error;
734 734
735 error = gfs2_quota_lock_check(ip); 735 error = gfs2_quota_lock_check(ip, &ap);
736 if (error) 736 if (error)
737 return error; 737 return error;
738 738
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d0929bc81782..98d4ea45bb70 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfs_fs.h" 19#include "hfs_fs.h"
20#include "btree.h" 20#include "btree.h"
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
131 hfs_bnode_write(node, entry, data_off + key_len, entry_len); 131 hfs_bnode_write(node, entry, data_off + key_len, entry_len);
132 hfs_bnode_dump(node); 132 hfs_bnode_dump(node);
133 133
134 if (new_node) { 134 /*
135 /* update parent key if we inserted a key 135 * update parent key if we inserted a key
136 * at the start of the first node 136 * at the start of the node and it is not the new node
137 */ 137 */
138 if (!rec && new_node != node) 138 if (!rec && new_node != node) {
139 hfs_brec_update_parent(fd); 139 hfs_bnode_read_key(node, fd->search_key, data_off + size);
140 hfs_brec_update_parent(fd);
141 }
140 142
143 if (new_node) {
141 hfs_bnode_put(fd->bnode); 144 hfs_bnode_put(fd->bnode);
142 if (!new_node->parent) { 145 if (!new_node->parent) {
143 hfs_btree_inc_height(tree); 146 hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
168 goto again; 171 goto again;
169 } 172 }
170 173
171 if (!rec)
172 hfs_brec_update_parent(fd);
173
174 return 0; 174 return 0;
175} 175}
176 176
@@ -370,6 +370,8 @@ again:
370 if (IS_ERR(parent)) 370 if (IS_ERR(parent))
371 return PTR_ERR(parent); 371 return PTR_ERR(parent);
372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key); 372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
373 if (fd->record < 0)
374 return -ENOENT;
373 hfs_bnode_dump(parent); 375 hfs_bnode_dump(parent);
374 rec = fd->record; 376 rec = fd->record;
375 377
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 0cf786f2d046..f541196d4ee9 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfsplus_fs.h" 19#include "hfsplus_fs.h"
20#include "hfsplus_raw.h" 20#include "hfsplus_raw.h"
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c274aca8e8dc..db76cec3ce21 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
319 319
320static void truncate_huge_page(struct page *page) 320static void truncate_huge_page(struct page *page)
321{ 321{
322 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 322 ClearPageDirty(page);
323 ClearPageUptodate(page); 323 ClearPageUptodate(page);
324 delete_from_page_cache(page); 324 delete_from_page_cache(page);
325} 325}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d72817ac51f6..762c7a3cf43d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
195 /* unchecked xdatum is chained with c->xattr_unchecked */ 195 /* unchecked xdatum is chained with c->xattr_unchecked */
196 list_del_init(&xd->xindex); 196 list_del_init(&xd->xindex);
197 197
198 dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", 198 dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
199 xd->xid, xd->version); 199 xd->xid, xd->version);
200 200
201 return 0; 201 return 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index bd3df1ca3c9b..3197aed10614 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,8 +22,8 @@
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/quotaops.h> 24#include <linux/quotaops.h>
25#include <linux/uio.h>
25#include <linux/writeback.h> 26#include <linux/writeback.h>
26#include <linux/aio.h>
27#include "jfs_incore.h" 27#include "jfs_incore.h"
28#include "jfs_inode.h" 28#include "jfs_inode.h"
29#include "jfs_filsys.h" 29#include "jfs_filsys.h"
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56ae075..4cd9798f4948 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...)
102 vaf.fmt = fmt; 102 vaf.fmt = fmt;
103 vaf.va = &args; 103 vaf.va = &args;
104 104
105 pr_err("ERROR: (device %s): %pf: %pV\n", 105 pr_err("ERROR: (device %s): %ps: %pV\n",
106 sb->s_id, __builtin_return_address(0), &vaf); 106 sb->s_id, __builtin_return_address(0), &vaf);
107 107
108 va_end(args); 108 va_end(args);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b684e8a132e6..2bacb9988566 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
207 goto out_free; 207 goto out_free;
208 } 208 }
209 209
210 of->event = atomic_read(&of->kn->attr.open->event);
210 ops = kernfs_ops(of->kn); 211 ops = kernfs_ops(of->kn);
211 if (ops->read) 212 if (ops->read)
212 len = ops->read(of, buf, len, *ppos); 213 len = ops->read(of, buf, len, *ppos);
diff --git a/fs/locks.c b/fs/locks.c
index 528fedfda15e..40bc384728c0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1388,9 +1388,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1388int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) 1388int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1389{ 1389{
1390 int error = 0; 1390 int error = 0;
1391 struct file_lock *new_fl;
1392 struct file_lock_context *ctx = inode->i_flctx; 1391 struct file_lock_context *ctx = inode->i_flctx;
1393 struct file_lock *fl; 1392 struct file_lock *new_fl, *fl, *tmp;
1394 unsigned long break_time; 1393 unsigned long break_time;
1395 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1394 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1396 LIST_HEAD(dispose); 1395 LIST_HEAD(dispose);
@@ -1420,7 +1419,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1420 break_time++; /* so that 0 means no break time */ 1419 break_time++; /* so that 0 means no break time */
1421 } 1420 }
1422 1421
1423 list_for_each_entry(fl, &ctx->flc_lease, fl_list) { 1422 list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1424 if (!leases_conflict(fl, new_fl)) 1423 if (!leases_conflict(fl, new_fl))
1425 continue; 1424 continue;
1426 if (want_write) { 1425 if (want_write) {
diff --git a/fs/namei.c b/fs/namei.c
index c83145af4bfc..76fb76a0818b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -119,15 +119,14 @@
119 * PATH_MAX includes the nul terminator --RR. 119 * PATH_MAX includes the nul terminator --RR.
120 */ 120 */
121 121
122#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) 122#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
123 123
124struct filename * 124struct filename *
125getname_flags(const char __user *filename, int flags, int *empty) 125getname_flags(const char __user *filename, int flags, int *empty)
126{ 126{
127 struct filename *result, *err; 127 struct filename *result;
128 int len;
129 long max;
130 char *kname; 128 char *kname;
129 int len;
131 130
132 result = audit_reusename(filename); 131 result = audit_reusename(filename);
133 if (result) 132 if (result)
@@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
136 result = __getname(); 135 result = __getname();
137 if (unlikely(!result)) 136 if (unlikely(!result))
138 return ERR_PTR(-ENOMEM); 137 return ERR_PTR(-ENOMEM);
139 result->refcnt = 1;
140 138
141 /* 139 /*
142 * First, try to embed the struct filename inside the names_cache 140 * First, try to embed the struct filename inside the names_cache
143 * allocation 141 * allocation
144 */ 142 */
145 kname = (char *)result + sizeof(*result); 143 kname = (char *)result->iname;
146 result->name = kname; 144 result->name = kname;
147 result->separate = false;
148 max = EMBEDDED_NAME_MAX;
149 145
150recopy: 146 len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
151 len = strncpy_from_user(kname, filename, max);
152 if (unlikely(len < 0)) { 147 if (unlikely(len < 0)) {
153 err = ERR_PTR(len); 148 __putname(result);
154 goto error; 149 return ERR_PTR(len);
155 } 150 }
156 151
157 /* 152 /*
@@ -160,43 +155,49 @@ recopy:
160 * names_cache allocation for the pathname, and re-do the copy from 155 * names_cache allocation for the pathname, and re-do the copy from
161 * userland. 156 * userland.
162 */ 157 */
163 if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { 158 if (unlikely(len == EMBEDDED_NAME_MAX)) {
159 const size_t size = offsetof(struct filename, iname[1]);
164 kname = (char *)result; 160 kname = (char *)result;
165 161
166 result = kzalloc(sizeof(*result), GFP_KERNEL); 162 /*
167 if (!result) { 163 * size is chosen that way we to guarantee that
168 err = ERR_PTR(-ENOMEM); 164 * result->iname[0] is within the same object and that
169 result = (struct filename *)kname; 165 * kname can't be equal to result->iname, no matter what.
170 goto error; 166 */
167 result = kzalloc(size, GFP_KERNEL);
168 if (unlikely(!result)) {
169 __putname(kname);
170 return ERR_PTR(-ENOMEM);
171 } 171 }
172 result->name = kname; 172 result->name = kname;
173 result->separate = true; 173 len = strncpy_from_user(kname, filename, PATH_MAX);
174 result->refcnt = 1; 174 if (unlikely(len < 0)) {
175 max = PATH_MAX; 175 __putname(kname);
176 goto recopy; 176 kfree(result);
177 return ERR_PTR(len);
178 }
179 if (unlikely(len == PATH_MAX)) {
180 __putname(kname);
181 kfree(result);
182 return ERR_PTR(-ENAMETOOLONG);
183 }
177 } 184 }
178 185
186 result->refcnt = 1;
179 /* The empty path is special. */ 187 /* The empty path is special. */
180 if (unlikely(!len)) { 188 if (unlikely(!len)) {
181 if (empty) 189 if (empty)
182 *empty = 1; 190 *empty = 1;
183 err = ERR_PTR(-ENOENT); 191 if (!(flags & LOOKUP_EMPTY)) {
184 if (!(flags & LOOKUP_EMPTY)) 192 putname(result);
185 goto error; 193 return ERR_PTR(-ENOENT);
194 }
186 } 195 }
187 196
188 err = ERR_PTR(-ENAMETOOLONG);
189 if (unlikely(len >= PATH_MAX))
190 goto error;
191
192 result->uptr = filename; 197 result->uptr = filename;
193 result->aname = NULL; 198 result->aname = NULL;
194 audit_getname(result); 199 audit_getname(result);
195 return result; 200 return result;
196
197error:
198 putname(result);
199 return err;
200} 201}
201 202
202struct filename * 203struct filename *
@@ -216,8 +217,7 @@ getname_kernel(const char * filename)
216 return ERR_PTR(-ENOMEM); 217 return ERR_PTR(-ENOMEM);
217 218
218 if (len <= EMBEDDED_NAME_MAX) { 219 if (len <= EMBEDDED_NAME_MAX) {
219 result->name = (char *)(result) + sizeof(*result); 220 result->name = (char *)result->iname;
220 result->separate = false;
221 } else if (len <= PATH_MAX) { 221 } else if (len <= PATH_MAX) {
222 struct filename *tmp; 222 struct filename *tmp;
223 223
@@ -227,7 +227,6 @@ getname_kernel(const char * filename)
227 return ERR_PTR(-ENOMEM); 227 return ERR_PTR(-ENOMEM);
228 } 228 }
229 tmp->name = (char *)result; 229 tmp->name = (char *)result;
230 tmp->separate = true;
231 result = tmp; 230 result = tmp;
232 } else { 231 } else {
233 __putname(result); 232 __putname(result);
@@ -249,7 +248,7 @@ void putname(struct filename *name)
249 if (--name->refcnt > 0) 248 if (--name->refcnt > 0)
250 return; 249 return;
251 250
252 if (name->separate) { 251 if (name->name != name->iname) {
253 __putname(name->name); 252 __putname(name->name);
254 kfree(name); 253 kfree(name);
255 } else 254 } else
@@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1851 return err; 1850 return err;
1852} 1851}
1853 1852
1854static int path_init(int dfd, const char *name, unsigned int flags, 1853static int path_init(int dfd, const struct filename *name, unsigned int flags,
1855 struct nameidata *nd) 1854 struct nameidata *nd)
1856{ 1855{
1857 int retval = 0; 1856 int retval = 0;
1857 const char *s = name->name;
1858 1858
1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; 1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1863 if (flags & LOOKUP_ROOT) { 1863 if (flags & LOOKUP_ROOT) {
1864 struct dentry *root = nd->root.dentry; 1864 struct dentry *root = nd->root.dentry;
1865 struct inode *inode = root->d_inode; 1865 struct inode *inode = root->d_inode;
1866 if (*name) { 1866 if (*s) {
1867 if (!d_can_lookup(root)) 1867 if (!d_can_lookup(root))
1868 return -ENOTDIR; 1868 return -ENOTDIR;
1869 retval = inode_permission(inode, MAY_EXEC); 1869 retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1885 nd->root.mnt = NULL; 1885 nd->root.mnt = NULL;
1886 1886
1887 nd->m_seq = read_seqbegin(&mount_lock); 1887 nd->m_seq = read_seqbegin(&mount_lock);
1888 if (*name=='/') { 1888 if (*s == '/') {
1889 if (flags & LOOKUP_RCU) { 1889 if (flags & LOOKUP_RCU) {
1890 rcu_read_lock(); 1890 rcu_read_lock();
1891 nd->seq = set_root_rcu(nd); 1891 nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1919 1919
1920 dentry = f.file->f_path.dentry; 1920 dentry = f.file->f_path.dentry;
1921 1921
1922 if (*name) { 1922 if (*s) {
1923 if (!d_can_lookup(dentry)) { 1923 if (!d_can_lookup(dentry)) {
1924 fdput(f); 1924 fdput(f);
1925 return -ENOTDIR; 1925 return -ENOTDIR;
@@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1949 return -ECHILD; 1949 return -ECHILD;
1950done: 1950done:
1951 current->total_link_count = 0; 1951 current->total_link_count = 0;
1952 return link_path_walk(name, nd); 1952 return link_path_walk(s, nd);
1953} 1953}
1954 1954
1955static void path_cleanup(struct nameidata *nd) 1955static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
1972} 1972}
1973 1973
1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1975static int path_lookupat(int dfd, const char *name, 1975static int path_lookupat(int dfd, const struct filename *name,
1976 unsigned int flags, struct nameidata *nd) 1976 unsigned int flags, struct nameidata *nd)
1977{ 1977{
1978 struct path path; 1978 struct path path;
@@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
2027static int filename_lookup(int dfd, struct filename *name, 2027static int filename_lookup(int dfd, struct filename *name,
2028 unsigned int flags, struct nameidata *nd) 2028 unsigned int flags, struct nameidata *nd)
2029{ 2029{
2030 int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); 2030 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
2031 if (unlikely(retval == -ECHILD)) 2031 if (unlikely(retval == -ECHILD))
2032 retval = path_lookupat(dfd, name->name, flags, nd); 2032 retval = path_lookupat(dfd, name, flags, nd);
2033 if (unlikely(retval == -ESTALE)) 2033 if (unlikely(retval == -ESTALE))
2034 retval = path_lookupat(dfd, name->name, 2034 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
2035 flags | LOOKUP_REVAL, nd);
2036 2035
2037 if (likely(!retval)) 2036 if (likely(!retval))
2038 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); 2037 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
2039 return retval; 2038 return retval;
2040} 2039}
2041 2040
2042static int do_path_lookup(int dfd, const char *name,
2043 unsigned int flags, struct nameidata *nd)
2044{
2045 struct filename *filename = getname_kernel(name);
2046 int retval = PTR_ERR(filename);
2047
2048 if (!IS_ERR(filename)) {
2049 retval = filename_lookup(dfd, filename, flags, nd);
2050 putname(filename);
2051 }
2052 return retval;
2053}
2054
2055/* does lookup, returns the object with parent locked */ 2041/* does lookup, returns the object with parent locked */
2056struct dentry *kern_path_locked(const char *name, struct path *path) 2042struct dentry *kern_path_locked(const char *name, struct path *path)
2057{ 2043{
@@ -2089,9 +2075,15 @@ out:
2089int kern_path(const char *name, unsigned int flags, struct path *path) 2075int kern_path(const char *name, unsigned int flags, struct path *path)
2090{ 2076{
2091 struct nameidata nd; 2077 struct nameidata nd;
2092 int res = do_path_lookup(AT_FDCWD, name, flags, &nd); 2078 struct filename *filename = getname_kernel(name);
2093 if (!res) 2079 int res = PTR_ERR(filename);
2094 *path = nd.path; 2080
2081 if (!IS_ERR(filename)) {
2082 res = filename_lookup(AT_FDCWD, filename, flags, &nd);
2083 putname(filename);
2084 if (!res)
2085 *path = nd.path;
2086 }
2095 return res; 2087 return res;
2096} 2088}
2097EXPORT_SYMBOL(kern_path); 2089EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2108 const char *name, unsigned int flags, 2100 const char *name, unsigned int flags,
2109 struct path *path) 2101 struct path *path)
2110{ 2102{
2111 struct nameidata nd; 2103 struct filename *filename = getname_kernel(name);
2112 int err; 2104 int err = PTR_ERR(filename);
2113 nd.root.dentry = dentry; 2105
2114 nd.root.mnt = mnt;
2115 BUG_ON(flags & LOOKUP_PARENT); 2106 BUG_ON(flags & LOOKUP_PARENT);
2116 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 2107
2117 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); 2108 /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
2118 if (!err) 2109 if (!IS_ERR(filename)) {
2119 *path = nd.path; 2110 struct nameidata nd;
2111 nd.root.dentry = dentry;
2112 nd.root.mnt = mnt;
2113 err = filename_lookup(AT_FDCWD, filename,
2114 flags | LOOKUP_ROOT, &nd);
2115 if (!err)
2116 *path = nd.path;
2117 putname(filename);
2118 }
2120 return err; 2119 return err;
2121} 2120}
2122EXPORT_SYMBOL(vfs_path_lookup); 2121EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
2138 * @len: maximum length @len should be interpreted to 2137 * @len: maximum length @len should be interpreted to
2139 * 2138 *
2140 * Note that this routine is purely a helper for filesystem usage and should 2139 * Note that this routine is purely a helper for filesystem usage and should
2141 * not be called by generic code. Also note that by using this function the 2140 * not be called by generic code.
2142 * nameidata argument is passed to the filesystem methods and a filesystem
2143 * using this helper needs to be prepared for that.
2144 */ 2141 */
2145struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 2142struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2146{ 2143{
@@ -2341,7 +2338,8 @@ out:
2341 * Returns 0 and "path" will be valid on success; Returns error otherwise. 2338 * Returns 0 and "path" will be valid on success; Returns error otherwise.
2342 */ 2339 */
2343static int 2340static int
2344path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) 2341path_mountpoint(int dfd, const struct filename *name, struct path *path,
2342 unsigned int flags)
2345{ 2343{
2346 struct nameidata nd; 2344 struct nameidata nd;
2347 int err; 2345 int err;
@@ -2370,20 +2368,20 @@ out:
2370} 2368}
2371 2369
2372static int 2370static int
2373filename_mountpoint(int dfd, struct filename *s, struct path *path, 2371filename_mountpoint(int dfd, struct filename *name, struct path *path,
2374 unsigned int flags) 2372 unsigned int flags)
2375{ 2373{
2376 int error; 2374 int error;
2377 if (IS_ERR(s)) 2375 if (IS_ERR(name))
2378 return PTR_ERR(s); 2376 return PTR_ERR(name);
2379 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); 2377 error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
2380 if (unlikely(error == -ECHILD)) 2378 if (unlikely(error == -ECHILD))
2381 error = path_mountpoint(dfd, s->name, path, flags); 2379 error = path_mountpoint(dfd, name, path, flags);
2382 if (unlikely(error == -ESTALE)) 2380 if (unlikely(error == -ESTALE))
2383 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); 2381 error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
2384 if (likely(!error)) 2382 if (likely(!error))
2385 audit_inode(s, path->dentry, 0); 2383 audit_inode(name, path->dentry, 0);
2386 putname(s); 2384 putname(name);
2387 return error; 2385 return error;
2388} 2386}
2389 2387
@@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
3156 static const struct qstr name = QSTR_INIT("/", 1); 3154 static const struct qstr name = QSTR_INIT("/", 1);
3157 struct dentry *dentry, *child; 3155 struct dentry *dentry, *child;
3158 struct inode *dir; 3156 struct inode *dir;
3159 int error = path_lookupat(dfd, pathname->name, 3157 int error = path_lookupat(dfd, pathname,
3160 flags | LOOKUP_DIRECTORY, nd); 3158 flags | LOOKUP_DIRECTORY, nd);
3161 if (unlikely(error)) 3159 if (unlikely(error))
3162 return error; 3160 return error;
@@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3229 goto out; 3227 goto out;
3230 } 3228 }
3231 3229
3232 error = path_init(dfd, pathname->name, flags, nd); 3230 error = path_init(dfd, pathname, flags, nd);
3233 if (unlikely(error)) 3231 if (unlikely(error))
3234 goto out; 3232 goto out;
3235 3233
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e907c8cf732e..c3929fb2ab26 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
265 265
266 return -EINVAL; 266 return -EINVAL;
267#else 267#else
268 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); 268 VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
269 269
270 if (rw == READ) 270 if (rw == READ)
271 return nfs_file_direct_read(iocb, iter, pos); 271 return nfs_file_direct_read(iocb, iter, pos);
@@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
393 long res = (long) dreq->error; 393 long res = (long) dreq->error;
394 if (!res) 394 if (!res)
395 res = (long) dreq->count; 395 res = (long) dreq->count;
396 aio_complete(dreq->iocb, res, 0); 396 dreq->iocb->ki_complete(dreq->iocb, res, 0);
397 } 397 }
398 398
399 complete_all(&dreq->completion); 399 complete_all(&dreq->completion);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e679d24c39d3..37b15582e0de 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
26#include <linux/nfs_mount.h> 26#include <linux/nfs_mount.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/aio.h>
30#include <linux/gfp.h> 29#include <linux/gfp.h>
31#include <linux/swap.h> 30#include <linux/swap.h>
32 31
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 849ed784d6ac..759931088094 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1876 * request from the inode / page_private pointer and 1876 * request from the inode / page_private pointer and
1877 * release it */ 1877 * release it */
1878 nfs_inode_remove_request(req); 1878 nfs_inode_remove_request(req);
1879 /*
1880 * In case nfs_inode_remove_request has marked the
1881 * page as being dirty
1882 */
1883 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1884 nfs_unlock_and_release_request(req); 1879 nfs_unlock_and_release_request(req);
1885 } 1880 }
1886 1881
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdbc78c72542..03d647bf195d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
137 seg->offset = iomap.offset; 137 seg->offset = iomap.offset;
138 seg->length = iomap.length; 138 seg->length = iomap.length;
139 139
140 dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); 140 dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
141 return 0; 141 return 0;
142 142
143out_error: 143out_error:
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 9da89fddab33..9aa2796da90d 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
122 122
123 p = xdr_decode_hyper(p, &bex.foff); 123 p = xdr_decode_hyper(p, &bex.foff);
124 if (bex.foff & (block_size - 1)) { 124 if (bex.foff & (block_size - 1)) {
125 dprintk("%s: unaligned offset %lld\n", 125 dprintk("%s: unaligned offset 0x%llx\n",
126 __func__, bex.foff); 126 __func__, bex.foff);
127 goto fail; 127 goto fail;
128 } 128 }
129 p = xdr_decode_hyper(p, &bex.len); 129 p = xdr_decode_hyper(p, &bex.len);
130 if (bex.len & (block_size - 1)) { 130 if (bex.len & (block_size - 1)) {
131 dprintk("%s: unaligned length %lld\n", 131 dprintk("%s: unaligned length 0x%llx\n",
132 __func__, bex.foff); 132 __func__, bex.foff);
133 goto fail; 133 goto fail;
134 } 134 }
135 p = xdr_decode_hyper(p, &bex.soff); 135 p = xdr_decode_hyper(p, &bex.soff);
136 if (bex.soff & (block_size - 1)) { 136 if (bex.soff & (block_size - 1)) {
137 dprintk("%s: unaligned disk offset %lld\n", 137 dprintk("%s: unaligned disk offset 0x%llx\n",
138 __func__, bex.soff); 138 __func__, bex.soff);
139 goto fail; 139 goto fail;
140 } 140 }
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 3c1bfa155571..6904213a4363 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
118{ 118{
119 struct super_block *sb = exp->ex_path.mnt->mnt_sb; 119 struct super_block *sb = exp->ex_path.mnt->mnt_sb;
120 120
121 if (exp->ex_flags & NFSEXP_NOPNFS) 121 if (!(exp->ex_flags & NFSEXP_PNFS))
122 return; 122 return;
123 123
124 if (sb->s_export_op->get_uuid && 124 if (sb->s_export_op->get_uuid &&
@@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
440 list_move_tail(&lp->lo_perstate, reaplist); 440 list_move_tail(&lp->lo_perstate, reaplist);
441 return; 441 return;
442 } 442 }
443 end = seg->offset; 443 lo->offset = layout_end(seg);
444 } else { 444 } else {
445 /* retain the whole layout segment on a split. */ 445 /* retain the whole layout segment on a split. */
446 if (layout_end(seg) < end) { 446 if (layout_end(seg) < end) {
447 dprintk("%s: split not supported\n", __func__); 447 dprintk("%s: split not supported\n", __func__);
448 return; 448 return;
449 } 449 }
450 450 end = seg->offset;
451 lo->offset = layout_end(seg);
452 } 451 }
453 452
454 layout_update_len(lo, end); 453 layout_update_len(lo, end);
@@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp,
513 512
514 spin_lock(&clp->cl_lock); 513 spin_lock(&clp->cl_lock);
515 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { 514 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
515 if (ls->ls_layout_type != lrp->lr_layout_type)
516 continue;
517
516 if (lrp->lr_return_type == RETURN_FSID && 518 if (lrp->lr_return_type == RETURN_FSID &&
517 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, 519 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
518 &cstate->current_fh.fh_handle)) 520 &cstate->current_fh.fh_handle))
@@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
587 589
588 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); 590 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
589 591
590 nfsd4_cb_layout_fail(ls); 592 trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
591 593
592 printk(KERN_WARNING 594 printk(KERN_WARNING
593 "nfsd: client %s failed to respond to layout recall. " 595 "nfsd: client %s failed to respond to layout recall. "
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d30bea8d0277..92b9d97aff4f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
1237 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); 1237 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
1238 1238
1239 gdp->gd_notify_types &= ops->notify_types; 1239 gdp->gd_notify_types &= ops->notify_types;
1240 exp_put(exp);
1241out: 1240out:
1241 exp_put(exp);
1242 return nfserr; 1242 return nfserr;
1243} 1243}
1244 1244
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d2f2c37dc2db..8ba1d888f1e6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
3221 } else 3221 } else
3222 nfs4_free_openowner(&oo->oo_owner); 3222 nfs4_free_openowner(&oo->oo_owner);
3223 spin_unlock(&clp->cl_lock); 3223 spin_unlock(&clp->cl_lock);
3224 return oo; 3224 return ret;
3225} 3225}
3226 3226
3227static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { 3227static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
@@ -5062,7 +5062,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
5062 } else 5062 } else
5063 nfs4_free_lockowner(&lo->lo_owner); 5063 nfs4_free_lockowner(&lo->lo_owner);
5064 spin_unlock(&clp->cl_lock); 5064 spin_unlock(&clp->cl_lock);
5065 return lo; 5065 return ret;
5066} 5066}
5067 5067
5068static void 5068static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index df5e66caf100..5fb7e78169a6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset); 1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length); 1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length);
1564 p = xdr_decode_hyper(p, &lgp->lg_minlength); 1564 p = xdr_decode_hyper(p, &lgp->lg_minlength);
1565 nfsd4_decode_stateid(argp, &lgp->lg_sid); 1565
1566 status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
1567 if (status)
1568 return status;
1569
1566 READ_BUF(4); 1570 READ_BUF(4);
1567 lgp->lg_maxcount = be32_to_cpup(p++); 1571 lgp->lg_maxcount = be32_to_cpup(p++);
1568 1572
@@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
1580 p = xdr_decode_hyper(p, &lcp->lc_seg.offset); 1584 p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
1581 p = xdr_decode_hyper(p, &lcp->lc_seg.length); 1585 p = xdr_decode_hyper(p, &lcp->lc_seg.length);
1582 lcp->lc_reclaim = be32_to_cpup(p++); 1586 lcp->lc_reclaim = be32_to_cpup(p++);
1583 nfsd4_decode_stateid(argp, &lcp->lc_sid); 1587
1588 status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
1589 if (status)
1590 return status;
1591
1584 READ_BUF(4); 1592 READ_BUF(4);
1585 lcp->lc_newoffset = be32_to_cpup(p++); 1593 lcp->lc_newoffset = be32_to_cpup(p++);
1586 if (lcp->lc_newoffset) { 1594 if (lcp->lc_newoffset) {
@@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
1628 READ_BUF(16); 1636 READ_BUF(16);
1629 p = xdr_decode_hyper(p, &lrp->lr_seg.offset); 1637 p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
1630 p = xdr_decode_hyper(p, &lrp->lr_seg.length); 1638 p = xdr_decode_hyper(p, &lrp->lr_seg.length);
1631 nfsd4_decode_stateid(argp, &lrp->lr_sid); 1639
1640 status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
1641 if (status)
1642 return status;
1643
1632 READ_BUF(4); 1644 READ_BUF(4);
1633 lrp->lrf_body_len = be32_to_cpup(p++); 1645 lrp->lrf_body_len = be32_to_cpup(p++);
1634 if (lrp->lrf_body_len > 0) { 1646 if (lrp->lrf_body_len > 0) {
@@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
4123 return nfserr_resource; 4135 return nfserr_resource;
4124 *p++ = cpu_to_be32(lrp->lrs_present); 4136 *p++ = cpu_to_be32(lrp->lrs_present);
4125 if (lrp->lrs_present) 4137 if (lrp->lrs_present)
4126 nfsd4_encode_stateid(xdr, &lrp->lr_sid); 4138 return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
4127 return nfs_ok; 4139 return nfs_ok;
4128} 4140}
4129#endif /* CONFIG_NFSD_PNFS */ 4141#endif /* CONFIG_NFSD_PNFS */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 83a9694ec485..46ec934f5dee 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void)
165{ 165{
166 unsigned int hashsize; 166 unsigned int hashsize;
167 unsigned int i; 167 unsigned int i;
168 int status = 0;
168 169
169 max_drc_entries = nfsd_cache_size_limit(); 170 max_drc_entries = nfsd_cache_size_limit();
170 atomic_set(&num_drc_entries, 0); 171 atomic_set(&num_drc_entries, 0);
171 hashsize = nfsd_hashsize(max_drc_entries); 172 hashsize = nfsd_hashsize(max_drc_entries);
172 maskbits = ilog2(hashsize); 173 maskbits = ilog2(hashsize);
173 174
174 register_shrinker(&nfsd_reply_cache_shrinker); 175 status = register_shrinker(&nfsd_reply_cache_shrinker);
176 if (status)
177 return status;
178
175 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 179 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
176 0, 0, NULL); 180 0, 0, NULL);
177 if (!drc_slab) 181 if (!drc_slab)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8b5969538f39..ab4987bc637f 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -26,7 +26,7 @@
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/aio.h> 29#include <linux/uio.h>
30#include "nilfs.h" 30#include "nilfs.h"
31#include "btnode.h" 31#include "btnode.h"
32#include "segment.h" 32#include "segment.h"
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 36ae529511c4..2ff263e6d363 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
8 8
9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o 9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
10 10
11ccflags-y := -DNTFS_VERSION=\"2.1.31\" 11ccflags-y := -DNTFS_VERSION=\"2.1.32\"
12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG 12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW 13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
14 14
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1da9b2d184dc..c1da78dad1af 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -28,7 +28,6 @@
28#include <linux/swap.h> 28#include <linux/swap.h>
29#include <linux/uio.h> 29#include <linux/uio.h>
30#include <linux/writeback.h> 30#include <linux/writeback.h>
31#include <linux/aio.h>
32 31
33#include <asm/page.h> 32#include <asm/page.h>
34#include <asm/uaccess.h> 33#include <asm/uaccess.h>
@@ -329,62 +328,168 @@ err_out:
329 return err; 328 return err;
330} 329}
331 330
332/** 331static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
333 * ntfs_fault_in_pages_readable - 332 size_t *count)
334 *
335 * Fault a number of userspace pages into pagetables.
336 *
337 * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
338 * with more than two userspace pages as well as handling the single page case
339 * elegantly.
340 *
341 * If you find this difficult to understand, then think of the while loop being
342 * the following code, except that we do without the integer variable ret:
343 *
344 * do {
345 * ret = __get_user(c, uaddr);
346 * uaddr += PAGE_SIZE;
347 * } while (!ret && uaddr < end);
348 *
349 * Note, the final __get_user() may well run out-of-bounds of the user buffer,
350 * but _not_ out-of-bounds of the page the user buffer belongs to, and since
351 * this is only a read and not a write, and since it is still in the same page,
352 * it should not matter and this makes the code much simpler.
353 */
354static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
355 int bytes)
356{ 333{
357 const char __user *end; 334 loff_t pos;
358 volatile char c; 335 s64 end, ll;
359 336 ssize_t err;
360 /* Set @end to the first byte outside the last page we care about. */ 337 unsigned long flags;
361 end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); 338 struct inode *vi = file_inode(file);
362 339 ntfs_inode *base_ni, *ni = NTFS_I(vi);
363 while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) 340 ntfs_volume *vol = ni->vol;
364 ;
365}
366
367/**
368 * ntfs_fault_in_pages_readable_iovec -
369 *
370 * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
371 */
372static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
373 size_t iov_ofs, int bytes)
374{
375 do {
376 const char __user *buf;
377 unsigned len;
378 341
379 buf = iov->iov_base + iov_ofs; 342 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
380 len = iov->iov_len - iov_ofs; 343 "0x%llx, count 0x%lx.", vi->i_ino,
381 if (len > bytes) 344 (unsigned)le32_to_cpu(ni->type),
382 len = bytes; 345 (unsigned long long)*ppos, (unsigned long)*count);
383 ntfs_fault_in_pages_readable(buf, len); 346 /* We can write back this queue in page reclaim. */
384 bytes -= len; 347 current->backing_dev_info = inode_to_bdi(vi);
385 iov++; 348 err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
386 iov_ofs = 0; 349 if (unlikely(err))
387 } while (bytes); 350 goto out;
351 /*
352 * All checks have passed. Before we start doing any writing we want
353 * to abort any totally illegal writes.
354 */
355 BUG_ON(NInoMstProtected(ni));
356 BUG_ON(ni->type != AT_DATA);
357 /* If file is encrypted, deny access, just like NT4. */
358 if (NInoEncrypted(ni)) {
359 /* Only $DATA attributes can be encrypted. */
360 /*
361 * Reminder for later: Encrypted files are _always_
362 * non-resident so that the content can always be encrypted.
363 */
364 ntfs_debug("Denying write access to encrypted file.");
365 err = -EACCES;
366 goto out;
367 }
368 if (NInoCompressed(ni)) {
369 /* Only unnamed $DATA attribute can be compressed. */
370 BUG_ON(ni->name_len);
371 /*
372 * Reminder for later: If resident, the data is not actually
373 * compressed. Only on the switch to non-resident does
374 * compression kick in. This is in contrast to encrypted files
375 * (see above).
376 */
377 ntfs_error(vi->i_sb, "Writing to compressed files is not "
378 "implemented yet. Sorry.");
379 err = -EOPNOTSUPP;
380 goto out;
381 }
382 if (*count == 0)
383 goto out;
384 base_ni = ni;
385 if (NInoAttr(ni))
386 base_ni = ni->ext.base_ntfs_ino;
387 err = file_remove_suid(file);
388 if (unlikely(err))
389 goto out;
390 /*
391 * Our ->update_time method always succeeds thus file_update_time()
392 * cannot fail either so there is no need to check the return code.
393 */
394 file_update_time(file);
395 pos = *ppos;
396 /* The first byte after the last cluster being written to. */
397 end = (pos + *count + vol->cluster_size_mask) &
398 ~(u64)vol->cluster_size_mask;
399 /*
400 * If the write goes beyond the allocated size, extend the allocation
401 * to cover the whole of the write, rounded up to the nearest cluster.
402 */
403 read_lock_irqsave(&ni->size_lock, flags);
404 ll = ni->allocated_size;
405 read_unlock_irqrestore(&ni->size_lock, flags);
406 if (end > ll) {
407 /*
408 * Extend the allocation without changing the data size.
409 *
410 * Note we ensure the allocation is big enough to at least
411 * write some data but we do not require the allocation to be
412 * complete, i.e. it may be partial.
413 */
414 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
415 if (likely(ll >= 0)) {
416 BUG_ON(pos >= ll);
417 /* If the extension was partial truncate the write. */
418 if (end > ll) {
419 ntfs_debug("Truncating write to inode 0x%lx, "
420 "attribute type 0x%x, because "
421 "the allocation was only "
422 "partially extended.",
423 vi->i_ino, (unsigned)
424 le32_to_cpu(ni->type));
425 *count = ll - pos;
426 }
427 } else {
428 err = ll;
429 read_lock_irqsave(&ni->size_lock, flags);
430 ll = ni->allocated_size;
431 read_unlock_irqrestore(&ni->size_lock, flags);
432 /* Perform a partial write if possible or fail. */
433 if (pos < ll) {
434 ntfs_debug("Truncating write to inode 0x%lx "
435 "attribute type 0x%x, because "
436 "extending the allocation "
437 "failed (error %d).",
438 vi->i_ino, (unsigned)
439 le32_to_cpu(ni->type),
440 (int)-err);
441 *count = ll - pos;
442 } else {
443 if (err != -ENOSPC)
444 ntfs_error(vi->i_sb, "Cannot perform "
445 "write to inode "
446 "0x%lx, attribute "
447 "type 0x%x, because "
448 "extending the "
449 "allocation failed "
450 "(error %ld).",
451 vi->i_ino, (unsigned)
452 le32_to_cpu(ni->type),
453 (long)-err);
454 else
455 ntfs_debug("Cannot perform write to "
456 "inode 0x%lx, "
457 "attribute type 0x%x, "
458 "because there is not "
459 "space left.",
460 vi->i_ino, (unsigned)
461 le32_to_cpu(ni->type));
462 goto out;
463 }
464 }
465 }
466 /*
467 * If the write starts beyond the initialized size, extend it up to the
468 * beginning of the write and initialize all non-sparse space between
469 * the old initialized size and the new one. This automatically also
470 * increments the vfs inode->i_size to keep it above or equal to the
471 * initialized_size.
472 */
473 read_lock_irqsave(&ni->size_lock, flags);
474 ll = ni->initialized_size;
475 read_unlock_irqrestore(&ni->size_lock, flags);
476 if (pos > ll) {
477 /*
478 * Wait for ongoing direct i/o to complete before proceeding.
479 * New direct i/o cannot start as we hold i_mutex.
480 */
481 inode_dio_wait(vi);
482 err = ntfs_attr_extend_initialized(ni, pos);
483 if (unlikely(err < 0))
484 ntfs_error(vi->i_sb, "Cannot perform write to inode "
485 "0x%lx, attribute type 0x%x, because "
486 "extending the initialized size "
487 "failed (error %d).", vi->i_ino,
488 (unsigned)le32_to_cpu(ni->type),
489 (int)-err);
490 }
491out:
492 return err;
388} 493}
389 494
390/** 495/**
@@ -421,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
421 goto err_out; 526 goto err_out;
422 } 527 }
423 } 528 }
424 err = add_to_page_cache_lru(*cached_page, mapping, index, 529 err = add_to_page_cache_lru(*cached_page, mapping,
425 GFP_KERNEL); 530 index, GFP_KERNEL);
426 if (unlikely(err)) { 531 if (unlikely(err)) {
427 if (err == -EEXIST) 532 if (err == -EEXIST)
428 continue; 533 continue;
@@ -1268,180 +1373,6 @@ rl_not_mapped_enoent:
1268 return err; 1373 return err;
1269} 1374}
1270 1375
1271/*
1272 * Copy as much as we can into the pages and return the number of bytes which
1273 * were successfully copied. If a fault is encountered then clear the pages
1274 * out to (ofs + bytes) and return the number of bytes which were copied.
1275 */
1276static inline size_t ntfs_copy_from_user(struct page **pages,
1277 unsigned nr_pages, unsigned ofs, const char __user *buf,
1278 size_t bytes)
1279{
1280 struct page **last_page = pages + nr_pages;
1281 char *addr;
1282 size_t total = 0;
1283 unsigned len;
1284 int left;
1285
1286 do {
1287 len = PAGE_CACHE_SIZE - ofs;
1288 if (len > bytes)
1289 len = bytes;
1290 addr = kmap_atomic(*pages);
1291 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1292 kunmap_atomic(addr);
1293 if (unlikely(left)) {
1294 /* Do it the slow way. */
1295 addr = kmap(*pages);
1296 left = __copy_from_user(addr + ofs, buf, len);
1297 kunmap(*pages);
1298 if (unlikely(left))
1299 goto err_out;
1300 }
1301 total += len;
1302 bytes -= len;
1303 if (!bytes)
1304 break;
1305 buf += len;
1306 ofs = 0;
1307 } while (++pages < last_page);
1308out:
1309 return total;
1310err_out:
1311 total += len - left;
1312 /* Zero the rest of the target like __copy_from_user(). */
1313 while (++pages < last_page) {
1314 bytes -= len;
1315 if (!bytes)
1316 break;
1317 len = PAGE_CACHE_SIZE;
1318 if (len > bytes)
1319 len = bytes;
1320 zero_user(*pages, 0, len);
1321 }
1322 goto out;
1323}
1324
1325static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
1326 const struct iovec *iov, size_t iov_ofs, size_t bytes)
1327{
1328 size_t total = 0;
1329
1330 while (1) {
1331 const char __user *buf = iov->iov_base + iov_ofs;
1332 unsigned len;
1333 size_t left;
1334
1335 len = iov->iov_len - iov_ofs;
1336 if (len > bytes)
1337 len = bytes;
1338 left = __copy_from_user_inatomic(vaddr, buf, len);
1339 total += len;
1340 bytes -= len;
1341 vaddr += len;
1342 if (unlikely(left)) {
1343 total -= left;
1344 break;
1345 }
1346 if (!bytes)
1347 break;
1348 iov++;
1349 iov_ofs = 0;
1350 }
1351 return total;
1352}
1353
1354static inline void ntfs_set_next_iovec(const struct iovec **iovp,
1355 size_t *iov_ofsp, size_t bytes)
1356{
1357 const struct iovec *iov = *iovp;
1358 size_t iov_ofs = *iov_ofsp;
1359
1360 while (bytes) {
1361 unsigned len;
1362
1363 len = iov->iov_len - iov_ofs;
1364 if (len > bytes)
1365 len = bytes;
1366 bytes -= len;
1367 iov_ofs += len;
1368 if (iov->iov_len == iov_ofs) {
1369 iov++;
1370 iov_ofs = 0;
1371 }
1372 }
1373 *iovp = iov;
1374 *iov_ofsp = iov_ofs;
1375}
1376
1377/*
1378 * This has the same side-effects and return value as ntfs_copy_from_user().
1379 * The difference is that on a fault we need to memset the remainder of the
1380 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
1381 * single-segment behaviour.
1382 *
1383 * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
1384 * atomic and when not atomic. This is ok because it calls
1385 * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
1386 * fact, the only difference between __copy_from_user_inatomic() and
1387 * __copy_from_user() is that the latter calls might_sleep() and the former
1388 * should not zero the tail of the buffer on error. And on many architectures
1389 * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
1390 * makes no difference at all on those architectures.
1391 */
1392static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1393 unsigned nr_pages, unsigned ofs, const struct iovec **iov,
1394 size_t *iov_ofs, size_t bytes)
1395{
1396 struct page **last_page = pages + nr_pages;
1397 char *addr;
1398 size_t copied, len, total = 0;
1399
1400 do {
1401 len = PAGE_CACHE_SIZE - ofs;
1402 if (len > bytes)
1403 len = bytes;
1404 addr = kmap_atomic(*pages);
1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1406 *iov, *iov_ofs, len);
1407 kunmap_atomic(addr);
1408 if (unlikely(copied != len)) {
1409 /* Do it the slow way. */
1410 addr = kmap(*pages);
1411 copied = __ntfs_copy_from_user_iovec_inatomic(addr +
1412 ofs, *iov, *iov_ofs, len);
1413 if (unlikely(copied != len))
1414 goto err_out;
1415 kunmap(*pages);
1416 }
1417 total += len;
1418 ntfs_set_next_iovec(iov, iov_ofs, len);
1419 bytes -= len;
1420 if (!bytes)
1421 break;
1422 ofs = 0;
1423 } while (++pages < last_page);
1424out:
1425 return total;
1426err_out:
1427 BUG_ON(copied > len);
1428 /* Zero the rest of the target like __copy_from_user(). */
1429 memset(addr + ofs + copied, 0, len - copied);
1430 kunmap(*pages);
1431 total += copied;
1432 ntfs_set_next_iovec(iov, iov_ofs, copied);
1433 while (++pages < last_page) {
1434 bytes -= len;
1435 if (!bytes)
1436 break;
1437 len = PAGE_CACHE_SIZE;
1438 if (len > bytes)
1439 len = bytes;
1440 zero_user(*pages, 0, len);
1441 }
1442 goto out;
1443}
1444
1445static inline void ntfs_flush_dcache_pages(struct page **pages, 1376static inline void ntfs_flush_dcache_pages(struct page **pages,
1446 unsigned nr_pages) 1377 unsigned nr_pages)
1447{ 1378{
@@ -1762,86 +1693,83 @@ err_out:
1762 return err; 1693 return err;
1763} 1694}
1764 1695
1765static void ntfs_write_failed(struct address_space *mapping, loff_t to) 1696/*
1697 * Copy as much as we can into the pages and return the number of bytes which
1698 * were successfully copied. If a fault is encountered then clear the pages
1699 * out to (ofs + bytes) and return the number of bytes which were copied.
1700 */
1701static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
1702 unsigned ofs, struct iov_iter *i, size_t bytes)
1766{ 1703{
1767 struct inode *inode = mapping->host; 1704 struct page **last_page = pages + nr_pages;
1705 size_t total = 0;
1706 struct iov_iter data = *i;
1707 unsigned len, copied;
1768 1708
1769 if (to > inode->i_size) { 1709 do {
1770 truncate_pagecache(inode, inode->i_size); 1710 len = PAGE_CACHE_SIZE - ofs;
1771 ntfs_truncate_vfs(inode); 1711 if (len > bytes)
1772 } 1712 len = bytes;
1713 copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
1714 len);
1715 total += copied;
1716 bytes -= copied;
1717 if (!bytes)
1718 break;
1719 iov_iter_advance(&data, copied);
1720 if (copied < len)
1721 goto err;
1722 ofs = 0;
1723 } while (++pages < last_page);
1724out:
1725 return total;
1726err:
1727 /* Zero the rest of the target like __copy_from_user(). */
1728 len = PAGE_CACHE_SIZE - copied;
1729 do {
1730 if (len > bytes)
1731 len = bytes;
1732 zero_user(*pages, copied, len);
1733 bytes -= len;
1734 copied = 0;
1735 len = PAGE_CACHE_SIZE;
1736 } while (++pages < last_page);
1737 goto out;
1773} 1738}
1774 1739
1775/** 1740/**
1776 * ntfs_file_buffered_write - 1741 * ntfs_perform_write - perform buffered write to a file
1777 * 1742 * @file: file to write to
1778 * Locking: The vfs is holding ->i_mutex on the inode. 1743 * @i: iov_iter with data to write
1744 * @pos: byte offset in file at which to begin writing to
1779 */ 1745 */
1780static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, 1746static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
1781 const struct iovec *iov, unsigned long nr_segs, 1747 loff_t pos)
1782 loff_t pos, loff_t *ppos, size_t count)
1783{ 1748{
1784 struct file *file = iocb->ki_filp;
1785 struct address_space *mapping = file->f_mapping; 1749 struct address_space *mapping = file->f_mapping;
1786 struct inode *vi = mapping->host; 1750 struct inode *vi = mapping->host;
1787 ntfs_inode *ni = NTFS_I(vi); 1751 ntfs_inode *ni = NTFS_I(vi);
1788 ntfs_volume *vol = ni->vol; 1752 ntfs_volume *vol = ni->vol;
1789 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; 1753 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
1790 struct page *cached_page = NULL; 1754 struct page *cached_page = NULL;
1791 char __user *buf = NULL;
1792 s64 end, ll;
1793 VCN last_vcn; 1755 VCN last_vcn;
1794 LCN lcn; 1756 LCN lcn;
1795 unsigned long flags; 1757 size_t bytes;
1796 size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ 1758 ssize_t status, written = 0;
1797 ssize_t status, written;
1798 unsigned nr_pages; 1759 unsigned nr_pages;
1799 int err;
1800 1760
1801 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " 1761 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
1802 "pos 0x%llx, count 0x%lx.", 1762 "0x%llx, count 0x%lx.", vi->i_ino,
1803 vi->i_ino, (unsigned)le32_to_cpu(ni->type), 1763 (unsigned)le32_to_cpu(ni->type),
1804 (unsigned long long)pos, (unsigned long)count); 1764 (unsigned long long)pos,
1805 if (unlikely(!count)) 1765 (unsigned long)iov_iter_count(i));
1806 return 0;
1807 BUG_ON(NInoMstProtected(ni));
1808 /*
1809 * If the attribute is not an index root and it is encrypted or
1810 * compressed, we cannot write to it yet. Note we need to check for
1811 * AT_INDEX_ALLOCATION since this is the type of both directory and
1812 * index inodes.
1813 */
1814 if (ni->type != AT_INDEX_ALLOCATION) {
1815 /* If file is encrypted, deny access, just like NT4. */
1816 if (NInoEncrypted(ni)) {
1817 /*
1818 * Reminder for later: Encrypted files are _always_
1819 * non-resident so that the content can always be
1820 * encrypted.
1821 */
1822 ntfs_debug("Denying write access to encrypted file.");
1823 return -EACCES;
1824 }
1825 if (NInoCompressed(ni)) {
1826 /* Only unnamed $DATA attribute can be compressed. */
1827 BUG_ON(ni->type != AT_DATA);
1828 BUG_ON(ni->name_len);
1829 /*
1830 * Reminder for later: If resident, the data is not
1831 * actually compressed. Only on the switch to non-
1832 * resident does compression kick in. This is in
1833 * contrast to encrypted files (see above).
1834 */
1835 ntfs_error(vi->i_sb, "Writing to compressed files is "
1836 "not implemented yet. Sorry.");
1837 return -EOPNOTSUPP;
1838 }
1839 }
1840 /* 1766 /*
1841 * If a previous ntfs_truncate() failed, repeat it and abort if it 1767 * If a previous ntfs_truncate() failed, repeat it and abort if it
1842 * fails again. 1768 * fails again.
1843 */ 1769 */
1844 if (unlikely(NInoTruncateFailed(ni))) { 1770 if (unlikely(NInoTruncateFailed(ni))) {
1771 int err;
1772
1845 inode_dio_wait(vi); 1773 inode_dio_wait(vi);
1846 err = ntfs_truncate(vi); 1774 err = ntfs_truncate(vi);
1847 if (err || NInoTruncateFailed(ni)) { 1775 if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1855 return err; 1783 return err;
1856 } 1784 }
1857 } 1785 }
1858 /* The first byte after the write. */
1859 end = pos + count;
1860 /*
1861 * If the write goes beyond the allocated size, extend the allocation
1862 * to cover the whole of the write, rounded up to the nearest cluster.
1863 */
1864 read_lock_irqsave(&ni->size_lock, flags);
1865 ll = ni->allocated_size;
1866 read_unlock_irqrestore(&ni->size_lock, flags);
1867 if (end > ll) {
1868 /* Extend the allocation without changing the data size. */
1869 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
1870 if (likely(ll >= 0)) {
1871 BUG_ON(pos >= ll);
1872 /* If the extension was partial truncate the write. */
1873 if (end > ll) {
1874 ntfs_debug("Truncating write to inode 0x%lx, "
1875 "attribute type 0x%x, because "
1876 "the allocation was only "
1877 "partially extended.",
1878 vi->i_ino, (unsigned)
1879 le32_to_cpu(ni->type));
1880 end = ll;
1881 count = ll - pos;
1882 }
1883 } else {
1884 err = ll;
1885 read_lock_irqsave(&ni->size_lock, flags);
1886 ll = ni->allocated_size;
1887 read_unlock_irqrestore(&ni->size_lock, flags);
1888 /* Perform a partial write if possible or fail. */
1889 if (pos < ll) {
1890 ntfs_debug("Truncating write to inode 0x%lx, "
1891 "attribute type 0x%x, because "
1892 "extending the allocation "
1893 "failed (error code %i).",
1894 vi->i_ino, (unsigned)
1895 le32_to_cpu(ni->type), err);
1896 end = ll;
1897 count = ll - pos;
1898 } else {
1899 ntfs_error(vol->sb, "Cannot perform write to "
1900 "inode 0x%lx, attribute type "
1901 "0x%x, because extending the "
1902 "allocation failed (error "
1903 "code %i).", vi->i_ino,
1904 (unsigned)
1905 le32_to_cpu(ni->type), err);
1906 return err;
1907 }
1908 }
1909 }
1910 written = 0;
1911 /*
1912 * If the write starts beyond the initialized size, extend it up to the
1913 * beginning of the write and initialize all non-sparse space between
1914 * the old initialized size and the new one. This automatically also
1915 * increments the vfs inode->i_size to keep it above or equal to the
1916 * initialized_size.
1917 */
1918 read_lock_irqsave(&ni->size_lock, flags);
1919 ll = ni->initialized_size;
1920 read_unlock_irqrestore(&ni->size_lock, flags);
1921 if (pos > ll) {
1922 err = ntfs_attr_extend_initialized(ni, pos);
1923 if (err < 0) {
1924 ntfs_error(vol->sb, "Cannot perform write to inode "
1925 "0x%lx, attribute type 0x%x, because "
1926 "extending the initialized size "
1927 "failed (error code %i).", vi->i_ino,
1928 (unsigned)le32_to_cpu(ni->type), err);
1929 status = err;
1930 goto err_out;
1931 }
1932 }
1933 /* 1786 /*
1934 * Determine the number of pages per cluster for non-resident 1787 * Determine the number of pages per cluster for non-resident
1935 * attributes. 1788 * attributes.
@@ -1937,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1937 nr_pages = 1; 1790 nr_pages = 1;
1938 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) 1791 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
1939 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; 1792 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
1940 /* Finally, perform the actual write. */
1941 last_vcn = -1; 1793 last_vcn = -1;
1942 if (likely(nr_segs == 1))
1943 buf = iov->iov_base;
1944 do { 1794 do {
1945 VCN vcn; 1795 VCN vcn;
1946 pgoff_t idx, start_idx; 1796 pgoff_t idx, start_idx;
@@ -1965,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1965 vol->cluster_size_bits, false); 1815 vol->cluster_size_bits, false);
1966 up_read(&ni->runlist.lock); 1816 up_read(&ni->runlist.lock);
1967 if (unlikely(lcn < LCN_HOLE)) { 1817 if (unlikely(lcn < LCN_HOLE)) {
1968 status = -EIO;
1969 if (lcn == LCN_ENOMEM) 1818 if (lcn == LCN_ENOMEM)
1970 status = -ENOMEM; 1819 status = -ENOMEM;
1971 else 1820 else {
1821 status = -EIO;
1972 ntfs_error(vol->sb, "Cannot " 1822 ntfs_error(vol->sb, "Cannot "
1973 "perform write to " 1823 "perform write to "
1974 "inode 0x%lx, " 1824 "inode 0x%lx, "
@@ -1977,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1977 "is corrupt.", 1827 "is corrupt.",
1978 vi->i_ino, (unsigned) 1828 vi->i_ino, (unsigned)
1979 le32_to_cpu(ni->type)); 1829 le32_to_cpu(ni->type));
1830 }
1980 break; 1831 break;
1981 } 1832 }
1982 if (lcn == LCN_HOLE) { 1833 if (lcn == LCN_HOLE) {
@@ -1989,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1989 } 1840 }
1990 } 1841 }
1991 } 1842 }
1992 if (bytes > count) 1843 if (bytes > iov_iter_count(i))
1993 bytes = count; 1844 bytes = iov_iter_count(i);
1845again:
1994 /* 1846 /*
1995 * Bring in the user page(s) that we will copy from _first_. 1847 * Bring in the user page(s) that we will copy from _first_.
1996 * Otherwise there is a nasty deadlock on copying from the same 1848 * Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1999 * pages being swapped out between us bringing them into memory 1851 * pages being swapped out between us bringing them into memory
2000 * and doing the actual copying. 1852 * and doing the actual copying.
2001 */ 1853 */
2002 if (likely(nr_segs == 1)) 1854 if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
2003 ntfs_fault_in_pages_readable(buf, bytes); 1855 status = -EFAULT;
2004 else 1856 break;
2005 ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); 1857 }
2006 /* Get and lock @do_pages starting at index @start_idx. */ 1858 /* Get and lock @do_pages starting at index @start_idx. */
2007 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, 1859 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
2008 pages, &cached_page); 1860 pages, &cached_page);
@@ -2018,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
2018 status = ntfs_prepare_pages_for_non_resident_write( 1870 status = ntfs_prepare_pages_for_non_resident_write(
2019 pages, do_pages, pos, bytes); 1871 pages, do_pages, pos, bytes);
2020 if (unlikely(status)) { 1872 if (unlikely(status)) {
2021 loff_t i_size;
2022
2023 do { 1873 do {
2024 unlock_page(pages[--do_pages]); 1874 unlock_page(pages[--do_pages]);
2025 page_cache_release(pages[do_pages]); 1875 page_cache_release(pages[do_pages]);
2026 } while (do_pages); 1876 } while (do_pages);
2027 /*
2028 * The write preparation may have instantiated
2029 * allocated space outside i_size. Trim this
2030 * off again. We can ignore any errors in this
2031 * case as we will just be waisting a bit of
2032 * allocated space, which is not a disaster.
2033 */
2034 i_size = i_size_read(vi);
2035 if (pos + bytes > i_size) {
2036 ntfs_write_failed(mapping, pos + bytes);
2037 }
2038 break; 1877 break;
2039 } 1878 }
2040 } 1879 }
2041 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; 1880 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
2042 if (likely(nr_segs == 1)) { 1881 copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
2043 copied = ntfs_copy_from_user(pages + u, do_pages - u, 1882 i, bytes);
2044 ofs, buf, bytes);
2045 buf += copied;
2046 } else
2047 copied = ntfs_copy_from_user_iovec(pages + u,
2048 do_pages - u, ofs, &iov, &iov_ofs,
2049 bytes);
2050 ntfs_flush_dcache_pages(pages + u, do_pages - u); 1883 ntfs_flush_dcache_pages(pages + u, do_pages - u);
2051 status = ntfs_commit_pages_after_write(pages, do_pages, pos, 1884 status = 0;
2052 bytes); 1885 if (likely(copied == bytes)) {
2053 if (likely(!status)) { 1886 status = ntfs_commit_pages_after_write(pages, do_pages,
2054 written += copied; 1887 pos, bytes);
2055 count -= copied; 1888 if (!status)
2056 pos += copied; 1889 status = bytes;
2057 if (unlikely(copied != bytes))
2058 status = -EFAULT;
2059 } 1890 }
2060 do { 1891 do {
2061 unlock_page(pages[--do_pages]); 1892 unlock_page(pages[--do_pages]);
2062 page_cache_release(pages[do_pages]); 1893 page_cache_release(pages[do_pages]);
2063 } while (do_pages); 1894 } while (do_pages);
2064 if (unlikely(status)) 1895 if (unlikely(status < 0))
2065 break; 1896 break;
2066 balance_dirty_pages_ratelimited(mapping); 1897 copied = status;
2067 cond_resched(); 1898 cond_resched();
2068 } while (count); 1899 if (unlikely(!copied)) {
2069err_out: 1900 size_t sc;
2070 *ppos = pos; 1901
1902 /*
1903 * We failed to copy anything. Fall back to single
1904 * segment length write.
1905 *
1906 * This is needed to avoid possible livelock in the
1907 * case that all segments in the iov cannot be copied
1908 * at once without a pagefault.
1909 */
1910 sc = iov_iter_single_seg_count(i);
1911 if (bytes > sc)
1912 bytes = sc;
1913 goto again;
1914 }
1915 iov_iter_advance(i, copied);
1916 pos += copied;
1917 written += copied;
1918 balance_dirty_pages_ratelimited(mapping);
1919 if (fatal_signal_pending(current)) {
1920 status = -EINTR;
1921 break;
1922 }
1923 } while (iov_iter_count(i));
2071 if (cached_page) 1924 if (cached_page)
2072 page_cache_release(cached_page); 1925 page_cache_release(cached_page);
2073 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 1926 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
@@ -2077,59 +1930,56 @@ err_out:
2077} 1930}
2078 1931
2079/** 1932/**
2080 * ntfs_file_aio_write_nolock - 1933 * ntfs_file_write_iter_nolock - write data to a file
1934 * @iocb: IO state structure (file, offset, etc.)
1935 * @from: iov_iter with data to write
1936 *
1937 * Basically the same as __generic_file_write_iter() except that it ends
1938 * up calling ntfs_perform_write() instead of generic_perform_write() and that
1939 * O_DIRECT is not implemented.
2081 */ 1940 */
2082static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, 1941static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
2083 const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) 1942 struct iov_iter *from)
2084{ 1943{
2085 struct file *file = iocb->ki_filp; 1944 struct file *file = iocb->ki_filp;
2086 struct address_space *mapping = file->f_mapping; 1945 loff_t pos = iocb->ki_pos;
2087 struct inode *inode = mapping->host; 1946 ssize_t written = 0;
2088 loff_t pos; 1947 ssize_t err;
2089 size_t count; /* after file limit checks */ 1948 size_t count = iov_iter_count(from);
2090 ssize_t written, err;
2091 1949
2092 count = iov_length(iov, nr_segs); 1950 err = ntfs_prepare_file_for_write(file, &pos, &count);
2093 pos = *ppos; 1951 if (count && !err) {
2094 /* We can write back this queue in page reclaim. */ 1952 iov_iter_truncate(from, count);
2095 current->backing_dev_info = inode_to_bdi(inode); 1953 written = ntfs_perform_write(file, from, pos);
2096 written = 0; 1954 if (likely(written >= 0))
2097 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1955 iocb->ki_pos = pos + written;
2098 if (err) 1956 }
2099 goto out;
2100 if (!count)
2101 goto out;
2102 err = file_remove_suid(file);
2103 if (err)
2104 goto out;
2105 err = file_update_time(file);
2106 if (err)
2107 goto out;
2108 written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
2109 count);
2110out:
2111 current->backing_dev_info = NULL; 1957 current->backing_dev_info = NULL;
2112 return written ? written : err; 1958 return written ? written : err;
2113} 1959}
2114 1960
2115/** 1961/**
2116 * ntfs_file_aio_write - 1962 * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
1963 * @iocb: IO state structure
1964 * @from: iov_iter with data to write
1965 *
1966 * Basically the same as generic_file_write_iter() except that it ends up
1967 * calling ntfs_file_write_iter_nolock() instead of
1968 * __generic_file_write_iter().
2117 */ 1969 */
2118static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 1970static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2119 unsigned long nr_segs, loff_t pos)
2120{ 1971{
2121 struct file *file = iocb->ki_filp; 1972 struct file *file = iocb->ki_filp;
2122 struct address_space *mapping = file->f_mapping; 1973 struct inode *vi = file_inode(file);
2123 struct inode *inode = mapping->host;
2124 ssize_t ret; 1974 ssize_t ret;
2125 1975
2126 BUG_ON(iocb->ki_pos != pos); 1976 mutex_lock(&vi->i_mutex);
2127 1977 ret = ntfs_file_write_iter_nolock(iocb, from);
2128 mutex_lock(&inode->i_mutex); 1978 mutex_unlock(&vi->i_mutex);
2129 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2130 mutex_unlock(&inode->i_mutex);
2131 if (ret > 0) { 1979 if (ret > 0) {
2132 int err = generic_write_sync(file, iocb->ki_pos - ret, ret); 1980 ssize_t err;
1981
1982 err = generic_write_sync(file, iocb->ki_pos - ret, ret);
2133 if (err < 0) 1983 if (err < 0)
2134 ret = err; 1984 ret = err;
2135 } 1985 }
@@ -2197,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
2197#endif /* NTFS_RW */ 2047#endif /* NTFS_RW */
2198 2048
2199const struct file_operations ntfs_file_ops = { 2049const struct file_operations ntfs_file_ops = {
2200 .llseek = generic_file_llseek, /* Seek inside file. */ 2050 .llseek = generic_file_llseek,
2201 .read = new_sync_read, /* Read from file. */ 2051 .read = new_sync_read,
2202 .read_iter = generic_file_read_iter, /* Async read from file. */ 2052 .read_iter = generic_file_read_iter,
2203#ifdef NTFS_RW 2053#ifdef NTFS_RW
2204 .write = do_sync_write, /* Write to file. */ 2054 .write = new_sync_write,
2205 .aio_write = ntfs_file_aio_write, /* Async write to file. */ 2055 .write_iter = ntfs_file_write_iter,
2206 /*.release = ,*/ /* Last file is closed. See 2056 .fsync = ntfs_file_fsync,
2207 fs/ext2/file.c::
2208 ext2_release_file() for
2209 how to use this to discard
2210 preallocated space for
2211 write opened files. */
2212 .fsync = ntfs_file_fsync, /* Sync a file to disk. */
2213 /*.aio_fsync = ,*/ /* Sync all outstanding async
2214 i/o operations on a
2215 kiocb. */
2216#endif /* NTFS_RW */ 2057#endif /* NTFS_RW */
2217 /*.ioctl = ,*/ /* Perform function on the 2058 .mmap = generic_file_mmap,
2218 mounted filesystem. */ 2059 .open = ntfs_file_open,
2219 .mmap = generic_file_mmap, /* Mmap file. */ 2060 .splice_read = generic_file_splice_read,
2220 .open = ntfs_file_open, /* Open file. */
2221 .splice_read = generic_file_splice_read /* Zero-copy data send with
2222 the data source being on
2223 the ntfs partition. We do
2224 not need to care about the
2225 data destination. */
2226 /*.sendpage = ,*/ /* Zero-copy data send with
2227 the data destination being
2228 on the ntfs partition. We
2229 do not need to care about
2230 the data source. */
2231}; 2061};
2232 2062
2233const struct inode_operations ntfs_file_inode_ops = { 2063const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 898b9949d363..1d0c21df0d80 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,7 +28,6 @@
28#include <linux/quotaops.h> 28#include <linux/quotaops.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/log2.h> 30#include <linux/log2.h>
31#include <linux/aio.h>
32 31
33#include "aops.h" 32#include "aops.h"
34#include "attrib.h" 33#include "attrib.h"
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 044158bd22be..2d7f76e52c37 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3370 ret = ocfs2_get_right_path(et, left_path, &right_path); 3370 ret = ocfs2_get_right_path(et, left_path, &right_path);
3371 if (ret) { 3371 if (ret) {
3372 mlog_errno(ret); 3372 mlog_errno(ret);
3373 goto out; 3373 return ret;
3374 } 3374 }
3375 3375
3376 right_el = path_leaf_el(right_path); 3376 right_el = path_leaf_el(right_path);
@@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3453 subtree_index); 3453 subtree_index);
3454 } 3454 }
3455out: 3455out:
3456 if (right_path) 3456 ocfs2_free_path(right_path);
3457 ocfs2_free_path(right_path);
3458 return ret; 3457 return ret;
3459} 3458}
3460 3459
@@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3536 ret = ocfs2_get_left_path(et, right_path, &left_path); 3535 ret = ocfs2_get_left_path(et, right_path, &left_path);
3537 if (ret) { 3536 if (ret) {
3538 mlog_errno(ret); 3537 mlog_errno(ret);
3539 goto out; 3538 return ret;
3540 } 3539 }
3541 3540
3542 left_el = path_leaf_el(left_path); 3541 left_el = path_leaf_el(left_path);
@@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3647 right_path, subtree_index); 3646 right_path, subtree_index);
3648 } 3647 }
3649out: 3648out:
3650 if (left_path) 3649 ocfs2_free_path(left_path);
3651 ocfs2_free_path(left_path);
3652 return ret; 3650 return ret;
3653} 3651}
3654 3652
@@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4334 } else if (path->p_tree_depth > 0) { 4332 } else if (path->p_tree_depth > 0) {
4335 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); 4333 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
4336 if (status) 4334 if (status)
4337 goto out; 4335 goto exit;
4338 4336
4339 if (left_cpos != 0) { 4337 if (left_cpos != 0) {
4340 left_path = ocfs2_new_path_from_path(path); 4338 left_path = ocfs2_new_path_from_path(path);
4341 if (!left_path) 4339 if (!left_path)
4342 goto out; 4340 goto exit;
4343 4341
4344 status = ocfs2_find_path(et->et_ci, left_path, 4342 status = ocfs2_find_path(et->et_ci, left_path,
4345 left_cpos); 4343 left_cpos);
4346 if (status) 4344 if (status)
4347 goto out; 4345 goto free_left_path;
4348 4346
4349 new_el = path_leaf_el(left_path); 4347 new_el = path_leaf_el(left_path);
4350 4348
@@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4361 le16_to_cpu(new_el->l_next_free_rec), 4359 le16_to_cpu(new_el->l_next_free_rec),
4362 le16_to_cpu(new_el->l_count)); 4360 le16_to_cpu(new_el->l_count));
4363 status = -EINVAL; 4361 status = -EINVAL;
4364 goto out; 4362 goto free_left_path;
4365 } 4363 }
4366 rec = &new_el->l_recs[ 4364 rec = &new_el->l_recs[
4367 le16_to_cpu(new_el->l_next_free_rec) - 1]; 4365 le16_to_cpu(new_el->l_next_free_rec) - 1];
@@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4388 path->p_tree_depth > 0) { 4386 path->p_tree_depth > 0) {
4389 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); 4387 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
4390 if (status) 4388 if (status)
4391 goto out; 4389 goto free_left_path;
4392 4390
4393 if (right_cpos == 0) 4391 if (right_cpos == 0)
4394 goto out; 4392 goto free_left_path;
4395 4393
4396 right_path = ocfs2_new_path_from_path(path); 4394 right_path = ocfs2_new_path_from_path(path);
4397 if (!right_path) 4395 if (!right_path)
4398 goto out; 4396 goto free_left_path;
4399 4397
4400 status = ocfs2_find_path(et->et_ci, right_path, right_cpos); 4398 status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
4401 if (status) 4399 if (status)
4402 goto out; 4400 goto free_right_path;
4403 4401
4404 new_el = path_leaf_el(right_path); 4402 new_el = path_leaf_el(right_path);
4405 rec = &new_el->l_recs[0]; 4403 rec = &new_el->l_recs[0];
@@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4413 (unsigned long long)le64_to_cpu(eb->h_blkno), 4411 (unsigned long long)le64_to_cpu(eb->h_blkno),
4414 le16_to_cpu(new_el->l_next_free_rec)); 4412 le16_to_cpu(new_el->l_next_free_rec));
4415 status = -EINVAL; 4413 status = -EINVAL;
4416 goto out; 4414 goto free_right_path;
4417 } 4415 }
4418 rec = &new_el->l_recs[1]; 4416 rec = &new_el->l_recs[1];
4419 } 4417 }
@@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4430 ret = contig_type; 4428 ret = contig_type;
4431 } 4429 }
4432 4430
4433out: 4431free_right_path:
4434 if (left_path) 4432 ocfs2_free_path(right_path);
4435 ocfs2_free_path(left_path); 4433free_left_path:
4436 if (right_path) 4434 ocfs2_free_path(left_path);
4437 ocfs2_free_path(right_path); 4435exit:
4438
4439 return ret; 4436 return ret;
4440} 4437}
4441 4438
@@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6858 if (pages == NULL) { 6855 if (pages == NULL) {
6859 ret = -ENOMEM; 6856 ret = -ENOMEM;
6860 mlog_errno(ret); 6857 mlog_errno(ret);
6861 goto out; 6858 return ret;
6862 } 6859 }
6863 6860
6864 ret = ocfs2_reserve_clusters(osb, 1, &data_ac); 6861 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
6865 if (ret) { 6862 if (ret) {
6866 mlog_errno(ret); 6863 mlog_errno(ret);
6867 goto out; 6864 goto free_pages;
6868 } 6865 }
6869 } 6866 }
6870 6867
@@ -6996,9 +6993,8 @@ out_commit:
6996out: 6993out:
6997 if (data_ac) 6994 if (data_ac)
6998 ocfs2_free_alloc_context(data_ac); 6995 ocfs2_free_alloc_context(data_ac);
6999 if (pages) 6996free_pages:
7000 kfree(pages); 6997 kfree(pages);
7001
7002 return ret; 6998 return ret;
7003} 6999}
7004 7000
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 44db1808cdb5..8d2bc840c288 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/uio.h>
32 33
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
34 35
@@ -663,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb,
663 return 0; 664 return 0;
664} 665}
665 666
667static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
668 struct inode *inode, loff_t offset,
669 u64 zero_len, int cluster_align)
670{
671 u32 p_cpos = 0;
672 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
673 unsigned int num_clusters = 0;
674 unsigned int ext_flags = 0;
675 int ret = 0;
676
677 if (offset <= i_size_read(inode) || cluster_align)
678 return 0;
679
680 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
681 &ext_flags);
682 if (ret < 0) {
683 mlog_errno(ret);
684 return ret;
685 }
686
687 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
688 u64 s = i_size_read(inode);
689 sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
690 (do_div(s, osb->s_clustersize) >> 9);
691
692 ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
693 zero_len >> 9, GFP_NOFS, false);
694 if (ret < 0)
695 mlog_errno(ret);
696 }
697
698 return ret;
699}
700
701static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
702 struct inode *inode, loff_t offset)
703{
704 u64 zero_start, zero_len, total_zero_len;
705 u32 p_cpos = 0, clusters_to_add;
706 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
707 unsigned int num_clusters = 0;
708 unsigned int ext_flags = 0;
709 u32 size_div, offset_div;
710 int ret = 0;
711
712 {
713 u64 o = offset;
714 u64 s = i_size_read(inode);
715
716 offset_div = do_div(o, osb->s_clustersize);
717 size_div = do_div(s, osb->s_clustersize);
718 }
719
720 if (offset <= i_size_read(inode))
721 return 0;
722
723 clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
724 ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
725 total_zero_len = offset - i_size_read(inode);
726 if (clusters_to_add)
727 total_zero_len -= offset_div;
728
729 /* Allocate clusters to fill out holes, and this is only needed
730 * when we add more than one clusters. Otherwise the cluster will
731 * be allocated during direct IO */
732 if (clusters_to_add > 1) {
733 ret = ocfs2_extend_allocation(inode,
734 OCFS2_I(inode)->ip_clusters,
735 clusters_to_add - 1, 0);
736 if (ret) {
737 mlog_errno(ret);
738 goto out;
739 }
740 }
741
742 while (total_zero_len) {
743 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
744 &ext_flags);
745 if (ret < 0) {
746 mlog_errno(ret);
747 goto out;
748 }
749
750 zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
751 size_div;
752 zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
753 size_div;
754 zero_len = min(total_zero_len, zero_len);
755
756 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
757 ret = blkdev_issue_zeroout(osb->sb->s_bdev,
758 zero_start >> 9, zero_len >> 9,
759 GFP_NOFS, false);
760 if (ret < 0) {
761 mlog_errno(ret);
762 goto out;
763 }
764 }
765
766 total_zero_len -= zero_len;
767 v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
768
769 /* Only at first iteration can be cluster not aligned.
770 * So set size_div to 0 for the rest */
771 size_div = 0;
772 }
773
774out:
775 return ret;
776}
777
666static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, 778static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
667 struct iov_iter *iter, 779 struct iov_iter *iter,
668 loff_t offset) 780 loff_t offset)
@@ -677,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
677 struct buffer_head *di_bh = NULL; 789 struct buffer_head *di_bh = NULL;
678 size_t count = iter->count; 790 size_t count = iter->count;
679 journal_t *journal = osb->journal->j_journal; 791 journal_t *journal = osb->journal->j_journal;
680 u32 zero_len; 792 u64 zero_len_head, zero_len_tail;
681 int cluster_align; 793 int cluster_align_head, cluster_align_tail;
682 loff_t final_size = offset + count; 794 loff_t final_size = offset + count;
683 int append_write = offset >= i_size_read(inode) ? 1 : 0; 795 int append_write = offset >= i_size_read(inode) ? 1 : 0;
684 unsigned int num_clusters = 0; 796 unsigned int num_clusters = 0;
@@ -686,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
686 798
687 { 799 {
688 u64 o = offset; 800 u64 o = offset;
801 u64 s = i_size_read(inode);
802
803 zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
804 cluster_align_head = !zero_len_head;
689 805
690 zero_len = do_div(o, 1 << osb->s_clustersize_bits); 806 zero_len_tail = osb->s_clustersize -
691 cluster_align = !zero_len; 807 do_div(s, osb->s_clustersize);
808 if ((offset - i_size_read(inode)) < zero_len_tail)
809 zero_len_tail = offset - i_size_read(inode);
810 cluster_align_tail = !zero_len_tail;
692 } 811 }
693 812
694 /* 813 /*
@@ -706,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
706 } 825 }
707 826
708 if (append_write) { 827 if (append_write) {
709 ret = ocfs2_inode_lock(inode, &di_bh, 1); 828 ret = ocfs2_inode_lock(inode, NULL, 1);
710 if (ret < 0) { 829 if (ret < 0) {
711 mlog_errno(ret); 830 mlog_errno(ret);
712 goto clean_orphan; 831 goto clean_orphan;
713 } 832 }
714 833
834 /* zeroing out the previously allocated cluster tail
835 * that but not zeroed */
715 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 836 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
716 ret = ocfs2_zero_extend(inode, di_bh, offset); 837 ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
838 zero_len_tail, cluster_align_tail);
717 else 839 else
718 ret = ocfs2_extend_no_holes(inode, di_bh, offset, 840 ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
719 offset); 841 offset);
720 if (ret < 0) { 842 if (ret < 0) {
721 mlog_errno(ret); 843 mlog_errno(ret);
722 ocfs2_inode_unlock(inode, 1); 844 ocfs2_inode_unlock(inode, 1);
723 brelse(di_bh);
724 goto clean_orphan; 845 goto clean_orphan;
725 } 846 }
726 847
@@ -728,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
728 if (is_overwrite < 0) { 849 if (is_overwrite < 0) {
729 mlog_errno(is_overwrite); 850 mlog_errno(is_overwrite);
730 ocfs2_inode_unlock(inode, 1); 851 ocfs2_inode_unlock(inode, 1);
731 brelse(di_bh);
732 goto clean_orphan; 852 goto clean_orphan;
733 } 853 }
734 854
735 ocfs2_inode_unlock(inode, 1); 855 ocfs2_inode_unlock(inode, 1);
736 brelse(di_bh);
737 di_bh = NULL;
738 } 856 }
739 857
740 written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, 858 written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
@@ -771,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
771 if (ret < 0) 889 if (ret < 0)
772 mlog_errno(ret); 890 mlog_errno(ret);
773 } 891 }
774 } else if (written < 0 && append_write && !is_overwrite && 892 } else if (written > 0 && append_write && !is_overwrite &&
775 !cluster_align) { 893 !cluster_align_head) {
894 /* zeroing out the allocated cluster head */
776 u32 p_cpos = 0; 895 u32 p_cpos = 0;
777 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); 896 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
778 897
898 ret = ocfs2_inode_lock(inode, NULL, 0);
899 if (ret < 0) {
900 mlog_errno(ret);
901 goto clean_orphan;
902 }
903
779 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, 904 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
780 &num_clusters, &ext_flags); 905 &num_clusters, &ext_flags);
781 if (ret < 0) { 906 if (ret < 0) {
782 mlog_errno(ret); 907 mlog_errno(ret);
908 ocfs2_inode_unlock(inode, 0);
783 goto clean_orphan; 909 goto clean_orphan;
784 } 910 }
785 911
@@ -787,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
787 913
788 ret = blkdev_issue_zeroout(osb->sb->s_bdev, 914 ret = blkdev_issue_zeroout(osb->sb->s_bdev,
789 p_cpos << (osb->s_clustersize_bits - 9), 915 p_cpos << (osb->s_clustersize_bits - 9),
790 zero_len >> 9, GFP_KERNEL, false); 916 zero_len_head >> 9, GFP_NOFS, false);
791 if (ret < 0) 917 if (ret < 0)
792 mlog_errno(ret); 918 mlog_errno(ret);
919
920 ocfs2_inode_unlock(inode, 0);
793 } 921 }
794 922
795clean_orphan: 923clean_orphan:
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 6cae155d54df..dd59599b022d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,7 +22,7 @@
22#ifndef OCFS2_AOPS_H 22#ifndef OCFS2_AOPS_H
23#define OCFS2_AOPS_H 23#define OCFS2_AOPS_H
24 24
25#include <linux/aio.h> 25#include <linux/fs.h>
26 26
27handle_t *ocfs2_start_walk_page_trans(struct inode *inode, 27handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
28 struct page *page, 28 struct page *page,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 16eff45727ee..8e19b9d7aba8 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void)
1312 int ret = -ENOMEM; 1312 int ret = -ENOMEM;
1313 1313
1314 o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); 1314 o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
1315 if (!o2hb_debug_dir) { 1315 if (IS_ERR_OR_NULL(o2hb_debug_dir)) {
1316 ret = o2hb_debug_dir ?
1317 PTR_ERR(o2hb_debug_dir) : -ENOMEM;
1316 mlog_errno(ret); 1318 mlog_errno(ret);
1317 goto bail; 1319 goto bail;
1318 } 1320 }
@@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void)
1325 sizeof(o2hb_live_node_bitmap), 1327 sizeof(o2hb_live_node_bitmap),
1326 O2NM_MAX_NODES, 1328 O2NM_MAX_NODES,
1327 o2hb_live_node_bitmap); 1329 o2hb_live_node_bitmap);
1328 if (!o2hb_debug_livenodes) { 1330 if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) {
1331 ret = o2hb_debug_livenodes ?
1332 PTR_ERR(o2hb_debug_livenodes) : -ENOMEM;
1329 mlog_errno(ret); 1333 mlog_errno(ret);
1330 goto bail; 1334 goto bail;
1331 } 1335 }
@@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void)
1338 sizeof(o2hb_live_region_bitmap), 1342 sizeof(o2hb_live_region_bitmap),
1339 O2NM_MAX_REGIONS, 1343 O2NM_MAX_REGIONS,
1340 o2hb_live_region_bitmap); 1344 o2hb_live_region_bitmap);
1341 if (!o2hb_debug_liveregions) { 1345 if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) {
1346 ret = o2hb_debug_liveregions ?
1347 PTR_ERR(o2hb_debug_liveregions) : -ENOMEM;
1342 mlog_errno(ret); 1348 mlog_errno(ret);
1343 goto bail; 1349 goto bail;
1344 } 1350 }
@@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void)
1352 sizeof(o2hb_quorum_region_bitmap), 1358 sizeof(o2hb_quorum_region_bitmap),
1353 O2NM_MAX_REGIONS, 1359 O2NM_MAX_REGIONS,
1354 o2hb_quorum_region_bitmap); 1360 o2hb_quorum_region_bitmap);
1355 if (!o2hb_debug_quorumregions) { 1361 if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) {
1362 ret = o2hb_debug_quorumregions ?
1363 PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM;
1356 mlog_errno(ret); 1364 mlog_errno(ret);
1357 goto bail; 1365 goto bail;
1358 } 1366 }
@@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void)
1366 sizeof(o2hb_failed_region_bitmap), 1374 sizeof(o2hb_failed_region_bitmap),
1367 O2NM_MAX_REGIONS, 1375 O2NM_MAX_REGIONS,
1368 o2hb_failed_region_bitmap); 1376 o2hb_failed_region_bitmap);
1369 if (!o2hb_debug_failedregions) { 1377 if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) {
1378 ret = o2hb_debug_failedregions ?
1379 PTR_ERR(o2hb_debug_failedregions) : -ENOMEM;
1370 mlog_errno(ret); 1380 mlog_errno(ret);
1371 goto bail; 1381 goto bail;
1372 } 1382 }
@@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
2000 2010
2001 reg->hr_debug_dir = 2011 reg->hr_debug_dir =
2002 debugfs_create_dir(config_item_name(&reg->hr_item), dir); 2012 debugfs_create_dir(config_item_name(&reg->hr_item), dir);
2003 if (!reg->hr_debug_dir) { 2013 if (IS_ERR_OR_NULL(reg->hr_debug_dir)) {
2014 ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM;
2004 mlog_errno(ret); 2015 mlog_errno(ret);
2005 goto bail; 2016 goto bail;
2006 } 2017 }
@@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
2013 O2HB_DB_TYPE_REGION_LIVENODES, 2024 O2HB_DB_TYPE_REGION_LIVENODES,
2014 sizeof(reg->hr_live_node_bitmap), 2025 sizeof(reg->hr_live_node_bitmap),
2015 O2NM_MAX_NODES, reg); 2026 O2NM_MAX_NODES, reg);
2016 if (!reg->hr_debug_livenodes) { 2027 if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) {
2028 ret = reg->hr_debug_livenodes ?
2029 PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM;
2017 mlog_errno(ret); 2030 mlog_errno(ret);
2018 goto bail; 2031 goto bail;
2019 } 2032 }
@@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
2025 sizeof(*(reg->hr_db_regnum)), 2038 sizeof(*(reg->hr_db_regnum)),
2026 O2HB_DB_TYPE_REGION_NUMBER, 2039 O2HB_DB_TYPE_REGION_NUMBER,
2027 0, O2NM_MAX_NODES, reg); 2040 0, O2NM_MAX_NODES, reg);
2028 if (!reg->hr_debug_regnum) { 2041 if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) {
2042 ret = reg->hr_debug_regnum ?
2043 PTR_ERR(reg->hr_debug_regnum) : -ENOMEM;
2029 mlog_errno(ret); 2044 mlog_errno(ret);
2030 goto bail; 2045 goto bail;
2031 } 2046 }
@@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
2037 sizeof(*(reg->hr_db_elapsed_time)), 2052 sizeof(*(reg->hr_db_elapsed_time)),
2038 O2HB_DB_TYPE_REGION_ELAPSED_TIME, 2053 O2HB_DB_TYPE_REGION_ELAPSED_TIME,
2039 0, 0, reg); 2054 0, 0, reg);
2040 if (!reg->hr_debug_elapsed_time) { 2055 if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) {
2056 ret = reg->hr_debug_elapsed_time ?
2057 PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM;
2041 mlog_errno(ret); 2058 mlog_errno(ret);
2042 goto bail; 2059 goto bail;
2043 } 2060 }
@@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
2049 sizeof(*(reg->hr_db_pinned)), 2066 sizeof(*(reg->hr_db_pinned)),
2050 O2HB_DB_TYPE_REGION_PINNED, 2067 O2HB_DB_TYPE_REGION_PINNED,
2051 0, 0, reg); 2068 0, 0, reg);
2052 if (!reg->hr_debug_pinned) { 2069 if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) {
2070 ret = reg->hr_debug_pinned ?
2071 PTR_ERR(reg->hr_debug_pinned) : -ENOMEM;
2053 mlog_errno(ret); 2072 mlog_errno(ret);
2054 goto bail; 2073 goto bail;
2055 } 2074 }
2056 2075
2057 ret = 0; 2076 return 0;
2058bail: 2077bail:
2078 debugfs_remove_recursive(reg->hr_debug_dir);
2059 return ret; 2079 return ret;
2060} 2080}
2061 2081
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2260fb9e6508..7fdc25a4d8c0 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
196 } \ 196 } \
197} while (0) 197} while (0)
198 198
199#define mlog_errno(st) do { \ 199#define mlog_errno(st) ({ \
200 int _st = (st); \ 200 int _st = (st); \
201 if (_st != -ERESTARTSYS && _st != -EINTR && \ 201 if (_st != -ERESTARTSYS && _st != -EINTR && \
202 _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ 202 _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
203 _st != -EDQUOT) \ 203 _st != -EDQUOT) \
204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ 204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
205} while (0) 205 _st; \
206})
206 207
207#define mlog_bug_on_msg(cond, fmt, args...) do { \ 208#define mlog_bug_on_msg(cond, fmt, args...) do { \
208 if (cond) { \ 209 if (cond) { \
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b08050bd3f2e..ccd4dcfc3645 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -18,7 +18,7 @@
18 * 18 *
19 * linux/fs/minix/dir.c 19 * linux/fs/minix/dir.c
20 * 20 *
21 * Copyright (C) 1991, 1992 Linux Torvalds 21 * Copyright (C) 1991, 1992 Linus Torvalds
22 * 22 *
23 * This program is free software; you can redistribute it and/or 23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public 24 * modify it under the terms of the GNU General Public
@@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
2047 const char *name, 2047 const char *name,
2048 int namelen) 2048 int namelen)
2049{ 2049{
2050 int ret; 2050 int ret = 0;
2051 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2051 struct ocfs2_dir_lookup_result lookup = { NULL, };
2052 2052
2053 trace_ocfs2_check_dir_for_entry( 2053 trace_ocfs2_check_dir_for_entry(
2054 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); 2054 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
2055 2055
2056 ret = -EEXIST; 2056 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
2057 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) 2057 ret = -EEXIST;
2058 goto bail; 2058 mlog_errno(ret);
2059 }
2059 2060
2060 ret = 0;
2061bail:
2062 ocfs2_free_dir_lookup_result(&lookup); 2061 ocfs2_free_dir_lookup_result(&lookup);
2063 2062
2064 if (ret)
2065 mlog_errno(ret);
2066 return ret; 2063 return ret;
2067} 2064}
2068 2065
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 11849a44dc5a..956edf67be20 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1391 int noqueue_attempted = 0; 1391 int noqueue_attempted = 0;
1392 int dlm_locked = 0; 1392 int dlm_locked = 0;
1393 1393
1394 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
1395 mlog_errno(-EINVAL);
1396 return -EINVAL;
1397 }
1398
1394 ocfs2_init_mask_waiter(&mw); 1399 ocfs2_init_mask_waiter(&mw);
1395 1400
1396 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1401 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
@@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2954 osb->osb_debug_root, 2959 osb->osb_debug_root,
2955 osb, 2960 osb,
2956 &ocfs2_dlm_debug_fops); 2961 &ocfs2_dlm_debug_fops);
2957 if (!dlm_debug->d_locking_state) { 2962 if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) {
2958 ret = -EINVAL; 2963 ret = -EINVAL;
2959 mlog(ML_ERROR, 2964 mlog(ML_ERROR,
2960 "Unable to create locking state debugfs file.\n"); 2965 "Unable to create locking state debugfs file.\n");
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 29651167190d..540dc4bdd042 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
82 } 82 }
83 83
84 status = ocfs2_test_inode_bit(osb, blkno, &set); 84 status = ocfs2_test_inode_bit(osb, blkno, &set);
85 trace_ocfs2_get_dentry_test_bit(status, set);
86 if (status < 0) { 85 if (status < 0) {
87 if (status == -EINVAL) { 86 if (status == -EINVAL) {
88 /* 87 /*
@@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
96 goto unlock_nfs_sync; 95 goto unlock_nfs_sync;
97 } 96 }
98 97
98 trace_ocfs2_get_dentry_test_bit(status, set);
99 /* If the inode allocator bit is clear, this inode must be stale */ 99 /* If the inode allocator bit is clear, this inode must be stale */
100 if (!set) { 100 if (!set) {
101 status = -ESTALE; 101 status = -ESTALE;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 46e0d4e857c7..91f03ce98108 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2280 file->f_path.dentry->d_name.name, 2280 file->f_path.dentry->d_name.name,
2281 (unsigned int)from->nr_segs); /* GRRRRR */ 2281 (unsigned int)from->nr_segs); /* GRRRRR */
2282 2282
2283 if (iocb->ki_nbytes == 0) 2283 if (count == 0)
2284 return 0; 2284 return 0;
2285 2285
2286 appending = file->f_flags & O_APPEND ? 1 : 0; 2286 appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2330,8 +2330,7 @@ relock:
2330 } 2330 }
2331 2331
2332 can_do_direct = direct_io; 2332 can_do_direct = direct_io;
2333 ret = ocfs2_prepare_inode_for_write(file, ppos, 2333 ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
2334 iocb->ki_nbytes, appending,
2335 &can_do_direct, &has_refcount); 2334 &can_do_direct, &has_refcount);
2336 if (ret < 0) { 2335 if (ret < 0) {
2337 mlog_errno(ret); 2336 mlog_errno(ret);
@@ -2339,8 +2338,7 @@ relock:
2339 } 2338 }
2340 2339
2341 if (direct_io && !is_sync_kiocb(iocb)) 2340 if (direct_io && !is_sync_kiocb(iocb))
2342 unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, 2341 unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);
2343 *ppos);
2344 2342
2345 /* 2343 /*
2346 * We can't complete the direct I/O as requested, fall back to 2344 * We can't complete the direct I/O as requested, fall back to
@@ -2394,7 +2392,6 @@ relock:
2394 /* 2392 /*
2395 * for completing the rest of the request. 2393 * for completing the rest of the request.
2396 */ 2394 */
2397 *ppos += written;
2398 count -= written; 2395 count -= written;
2399 written_buffered = generic_perform_write(file, from, *ppos); 2396 written_buffered = generic_perform_write(file, from, *ppos);
2400 /* 2397 /*
@@ -2409,7 +2406,6 @@ relock:
2409 goto out_dio; 2406 goto out_dio;
2410 } 2407 }
2411 2408
2412 iocb->ki_pos = *ppos + written_buffered;
2413 /* We need to ensure that the page cache pages are written to 2409 /* We need to ensure that the page cache pages are written to
2414 * disk and invalidated to preserve the expected O_DIRECT 2410 * disk and invalidated to preserve the expected O_DIRECT
2415 * semantics. 2411 * semantics.
@@ -2418,6 +2414,7 @@ relock:
2418 ret = filemap_write_and_wait_range(file->f_mapping, *ppos, 2414 ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
2419 endbyte); 2415 endbyte);
2420 if (ret == 0) { 2416 if (ret == 0) {
2417 iocb->ki_pos = *ppos + written_buffered;
2421 written += written_buffered; 2418 written += written_buffered;
2422 invalidate_mapping_pages(mapping, 2419 invalidate_mapping_pages(mapping,
2423 *ppos >> PAGE_CACHE_SHIFT, 2420 *ppos >> PAGE_CACHE_SHIFT,
@@ -2440,10 +2437,14 @@ out_dio:
2440 /* buffered aio wouldn't have proper lock coverage today */ 2437 /* buffered aio wouldn't have proper lock coverage today */
2441 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2438 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2442 2439
2440 if (unlikely(written <= 0))
2441 goto no_sync;
2442
2443 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || 2443 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
2444 ((file->f_flags & O_DIRECT) && !direct_io)) { 2444 ((file->f_flags & O_DIRECT) && !direct_io)) {
2445 ret = filemap_fdatawrite_range(file->f_mapping, *ppos, 2445 ret = filemap_fdatawrite_range(file->f_mapping,
2446 *ppos + count - 1); 2446 iocb->ki_pos - written,
2447 iocb->ki_pos - 1);
2447 if (ret < 0) 2448 if (ret < 0)
2448 written = ret; 2449 written = ret;
2449 2450
@@ -2454,10 +2455,12 @@ out_dio:
2454 } 2455 }
2455 2456
2456 if (!ret) 2457 if (!ret)
2457 ret = filemap_fdatawait_range(file->f_mapping, *ppos, 2458 ret = filemap_fdatawait_range(file->f_mapping,
2458 *ppos + count - 1); 2459 iocb->ki_pos - written,
2460 iocb->ki_pos - 1);
2459 } 2461 }
2460 2462
2463no_sync:
2461 /* 2464 /*
2462 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2465 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2463 * function pointer which is called when o_direct io completes so that 2466 * function pointer which is called when o_direct io completes so that
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3025c0da6b8a..be71ca0937f7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode,
624 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, 624 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
625 le16_to_cpu(di->i_suballoc_slot)); 625 le16_to_cpu(di->i_suballoc_slot));
626 if (!inode_alloc_inode) { 626 if (!inode_alloc_inode) {
627 status = -EEXIST; 627 status = -ENOENT;
628 mlog_errno(status); 628 mlog_errno(status);
629 goto bail; 629 goto bail;
630 } 630 }
@@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
742 ORPHAN_DIR_SYSTEM_INODE, 742 ORPHAN_DIR_SYSTEM_INODE,
743 orphaned_slot); 743 orphaned_slot);
744 if (!orphan_dir_inode) { 744 if (!orphan_dir_inode) {
745 status = -EEXIST; 745 status = -ENOENT;
746 mlog_errno(status); 746 mlog_errno(status);
747 goto bail; 747 goto bail;
748 } 748 }
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 044013455621..857bbbcd39f3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
666 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 666 if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
667 ocfs2_local_alloc_count_bits(alloc)) { 667 ocfs2_local_alloc_count_bits(alloc)) {
668 ocfs2_error(osb->sb, "local alloc inode %llu says it has " 668 ocfs2_error(osb->sb, "local alloc inode %llu says it has "
669 "%u free bits, but a count shows %u", 669 "%u used bits, but a count shows %u",
670 (unsigned long long)le64_to_cpu(alloc->i_blkno), 670 (unsigned long long)le64_to_cpu(alloc->i_blkno),
671 le32_to_cpu(alloc->id1.bitmap1.i_used), 671 le32_to_cpu(alloc->id1.bitmap1.i_used),
672 ocfs2_local_alloc_count_bits(alloc)); 672 ocfs2_local_alloc_count_bits(alloc));
@@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
839 u32 *numbits, 839 u32 *numbits,
840 struct ocfs2_alloc_reservation *resv) 840 struct ocfs2_alloc_reservation *resv)
841{ 841{
842 int numfound, bitoff, left, startoff, lastzero; 842 int numfound = 0, bitoff, left, startoff, lastzero;
843 int local_resv = 0; 843 int local_resv = 0;
844 struct ocfs2_alloc_reservation r; 844 struct ocfs2_alloc_reservation r;
845 void *bitmap = NULL; 845 void *bitmap = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b5c3a5ea3ee6..09f90cbf0e24 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2322 2322
2323 trace_ocfs2_orphan_del( 2323 trace_ocfs2_orphan_del(
2324 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, 2324 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2325 name, namelen); 2325 name, strlen(name));
2326 2326
2327 /* find it's spot in the orphan directory */ 2327 /* find it's spot in the orphan directory */
2328 status = ocfs2_find_entry(name, namelen, orphan_dir_inode, 2328 status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
2329 &lookup); 2329 &lookup);
2330 if (status) { 2330 if (status) {
2331 mlog_errno(status); 2331 mlog_errno(status);
@@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2808 ORPHAN_DIR_SYSTEM_INODE, 2808 ORPHAN_DIR_SYSTEM_INODE,
2809 osb->slot_num); 2809 osb->slot_num);
2810 if (!orphan_dir_inode) { 2810 if (!orphan_dir_inode) {
2811 status = -EEXIST; 2811 status = -ENOENT;
2812 mlog_errno(status); 2812 mlog_errno(status);
2813 goto leave; 2813 goto leave;
2814 } 2814 }
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee541f92dab4..df3a500789c7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4276 error = posix_acl_create(dir, &mode, &default_acl, &acl); 4276 error = posix_acl_create(dir, &mode, &default_acl, &acl);
4277 if (error) { 4277 if (error) {
4278 mlog_errno(error); 4278 mlog_errno(error);
4279 goto out; 4279 return error;
4280 } 4280 }
4281 4281
4282 error = ocfs2_create_inode_in_orphan(dir, mode, 4282 error = ocfs2_create_inode_in_orphan(dir, mode,
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d5493e361a38..e78a203d44c8 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
427 if (!si) { 427 if (!si) {
428 status = -ENOMEM; 428 status = -ENOMEM;
429 mlog_errno(status); 429 mlog_errno(status);
430 goto bail; 430 return status;
431 } 431 }
432 432
433 si->si_extended = ocfs2_uses_extended_slot_map(osb); 433 si->si_extended = ocfs2_uses_extended_slot_map(osb);
@@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
452 452
453 osb->slot_info = (struct ocfs2_slot_info *)si; 453 osb->slot_info = (struct ocfs2_slot_info *)si;
454bail: 454bail:
455 if (status < 0 && si) 455 if (status < 0)
456 __ocfs2_free_slot_info(si); 456 __ocfs2_free_slot_info(si);
457 457
458 return status; 458 return status;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 1724d43d3da1..220cae7bbdbc 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -295,7 +295,7 @@ static int o2cb_cluster_check(void)
295 set_bit(node_num, netmap); 295 set_bit(node_num, netmap);
296 if (!memcmp(hbmap, netmap, sizeof(hbmap))) 296 if (!memcmp(hbmap, netmap, sizeof(hbmap)))
297 return 0; 297 return 0;
298 if (i < O2CB_MAP_STABILIZE_COUNT) 298 if (i < O2CB_MAP_STABILIZE_COUNT - 1)
299 msleep(1000); 299 msleep(1000);
300 } 300 }
301 301
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 720aa389e0ea..2768eb1da2b8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
1004 BUG_ON(conn == NULL); 1004 BUG_ON(conn == NULL);
1005 1005
1006 lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); 1006 lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
1007 if (!lc) { 1007 if (!lc)
1008 rc = -ENOMEM; 1008 return -ENOMEM;
1009 goto out;
1010 }
1011 1009
1012 init_waitqueue_head(&lc->oc_wait); 1010 init_waitqueue_head(&lc->oc_wait);
1013 init_completion(&lc->oc_sync_wait); 1011 init_completion(&lc->oc_sync_wait);
@@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
1063 } 1061 }
1064 1062
1065out: 1063out:
1066 if (rc && lc) 1064 if (rc)
1067 kfree(lc); 1065 kfree(lc);
1068 return rc; 1066 return rc;
1069} 1067}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0cb889a17ae1..4479029630bb 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2499 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); 2499 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
2500 if (status < 0) { 2500 if (status < 0) {
2501 mlog_errno(status); 2501 mlog_errno(status);
2502 ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
2503 start_bit, count);
2502 goto bail; 2504 goto bail;
2503 } 2505 }
2504 2506
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26675185b886..837ddce4b659 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1112 1112
1113 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, 1113 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
1114 ocfs2_debugfs_root); 1114 ocfs2_debugfs_root);
1115 if (!osb->osb_debug_root) { 1115 if (IS_ERR_OR_NULL(osb->osb_debug_root)) {
1116 status = -EINVAL; 1116 status = -EINVAL;
1117 mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); 1117 mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n");
1118 goto read_super_error; 1118 goto read_super_error;
@@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1122 osb->osb_debug_root, 1122 osb->osb_debug_root,
1123 osb, 1123 osb,
1124 &ocfs2_osb_debug_fops); 1124 &ocfs2_osb_debug_fops);
1125 if (!osb->osb_ctxt) { 1125 if (IS_ERR_OR_NULL(osb->osb_ctxt)) {
1126 status = -EINVAL; 1126 status = -EINVAL;
1127 mlog_errno(status); 1127 mlog_errno(status);
1128 goto read_super_error; 1128 goto read_super_error;
@@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void)
1606 } 1606 }
1607 1607
1608 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 1608 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
1609 if (!ocfs2_debugfs_root) { 1609 if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) {
1610 status = -ENOMEM; 1610 status = ocfs2_debugfs_root ?
1611 PTR_ERR(ocfs2_debugfs_root) : -ENOMEM;
1611 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1612 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
1612 goto out4; 1613 goto out4;
1613 } 1614 }
@@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2069 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); 2070 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
2070 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); 2071 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
2071 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); 2072 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
2073 memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
2074 sizeof(di->id2.i_super.s_uuid));
2072 2075
2073 osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; 2076 osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
2074 2077
@@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
2333 mlog_errno(status); 2336 mlog_errno(status);
2334 goto bail; 2337 goto bail;
2335 } 2338 }
2336 cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); 2339 cleancache_init_shared_fs(sb);
2337 2340
2338bail: 2341bail:
2339 return status; 2342 return status;
@@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb)
2563 ocfs2_set_ro_flag(osb, 0); 2566 ocfs2_set_ro_flag(osb, 0);
2564} 2567}
2565 2568
2566static char error_buf[1024]; 2569void __ocfs2_error(struct super_block *sb, const char *function,
2567 2570 const char *fmt, ...)
2568void __ocfs2_error(struct super_block *sb,
2569 const char *function,
2570 const char *fmt, ...)
2571{ 2571{
2572 struct va_format vaf;
2572 va_list args; 2573 va_list args;
2573 2574
2574 va_start(args, fmt); 2575 va_start(args, fmt);
2575 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2576 vaf.fmt = fmt;
2576 va_end(args); 2577 vaf.va = &args;
2577 2578
2578 /* Not using mlog here because we want to show the actual 2579 /* Not using mlog here because we want to show the actual
2579 * function the error came from. */ 2580 * function the error came from. */
2580 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", 2581 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n",
2581 sb->s_id, function, error_buf); 2582 sb->s_id, function, &vaf);
2583
2584 va_end(args);
2582 2585
2583 ocfs2_handle_error(sb); 2586 ocfs2_handle_error(sb);
2584} 2587}
@@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb,
2586/* Handle critical errors. This is intentionally more drastic than 2589/* Handle critical errors. This is intentionally more drastic than
2587 * ocfs2_handle_error, so we only use for things like journal errors, 2590 * ocfs2_handle_error, so we only use for things like journal errors,
2588 * etc. */ 2591 * etc. */
2589void __ocfs2_abort(struct super_block* sb, 2592void __ocfs2_abort(struct super_block *sb, const char *function,
2590 const char *function,
2591 const char *fmt, ...) 2593 const char *fmt, ...)
2592{ 2594{
2595 struct va_format vaf;
2593 va_list args; 2596 va_list args;
2594 2597
2595 va_start(args, fmt); 2598 va_start(args, fmt);
2596 vsnprintf(error_buf, sizeof(error_buf), fmt, args);
2597 va_end(args);
2598 2599
2599 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", 2600 vaf.fmt = fmt;
2600 sb->s_id, function, error_buf); 2601 vaf.va = &args;
2602
2603 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n",
2604 sb->s_id, function, &vaf);
2605
2606 va_end(args);
2601 2607
2602 /* We don't have the cluster support yet to go straight to 2608 /* We don't have the cluster support yet to go straight to
2603 * hard readonly in here. Until then, we want to keep 2609 * hard readonly in here. Until then, we want to keep
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 85b190dc132f..4ca7533be479 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1238 i, 1238 i,
1239 &block_off, 1239 &block_off,
1240 &name_offset); 1240 &name_offset);
1241 if (ret) {
1242 mlog_errno(ret);
1243 goto cleanup;
1244 }
1241 xs->base = bucket_block(xs->bucket, block_off); 1245 xs->base = bucket_block(xs->bucket, block_off);
1242 } 1246 }
1243 if (ocfs2_xattr_is_local(xs->here)) { 1247 if (ocfs2_xattr_is_local(xs->here)) {
@@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5665 5669
5666 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5670 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5667 i, &xv, NULL); 5671 i, &xv, NULL);
5672 if (ret) {
5673 mlog_errno(ret);
5674 break;
5675 }
5668 5676
5669 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5677 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5670 args->ref_ci, 5678 args->ref_ci,
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..6a83c47d5904 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
570 uid = make_kuid(current_user_ns(), user); 570 uid = make_kuid(current_user_ns(), user);
571 gid = make_kgid(current_user_ns(), group); 571 gid = make_kgid(current_user_ns(), group);
572 572
573retry_deleg:
573 newattrs.ia_valid = ATTR_CTIME; 574 newattrs.ia_valid = ATTR_CTIME;
574 if (user != (uid_t) -1) { 575 if (user != (uid_t) -1) {
575 if (!uid_valid(uid)) 576 if (!uid_valid(uid))
@@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
586 if (!S_ISDIR(inode->i_mode)) 587 if (!S_ISDIR(inode->i_mode))
587 newattrs.ia_valid |= 588 newattrs.ia_valid |=
588 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 589 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
589retry_deleg:
590 mutex_lock(&inode->i_mutex); 590 mutex_lock(&inode->i_mutex);
591 error = security_path_chown(path, uid, gid); 591 error = security_path_chown(path, uid, gid);
592 if (!error) 592 if (!error)
@@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
988 return ERR_PTR(err); 988 return ERR_PTR(err);
989 if (flags & O_CREAT) 989 if (flags & O_CREAT)
990 return ERR_PTR(-EINVAL); 990 return ERR_PTR(-EINVAL);
991 if (!filename && (flags & O_DIRECTORY))
992 if (!dentry->d_inode->i_op->lookup)
993 return ERR_PTR(-ENOTDIR);
994 return do_file_open_root(dentry, mnt, filename, &op); 991 return do_file_open_root(dentry, mnt, filename, &op);
995} 992}
996EXPORT_SYMBOL(file_open_root); 993EXPORT_SYMBOL(file_open_root);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b90952f528b1..5f0d1993e6e3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
529{ 529{
530 struct ovl_fs *ufs = sb->s_fs_info; 530 struct ovl_fs *ufs = sb->s_fs_info;
531 531
532 if (!(*flags & MS_RDONLY) && 532 if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
533 (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)))
534 return -EROFS; 533 return -EROFS;
535 534
536 return 0; 535 return 0;
@@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
615 break; 614 break;
616 615
617 default: 616 default:
617 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
618 return -EINVAL; 618 return -EINVAL;
619 } 619 }
620 } 620 }
621
622 /* Workdir is useless in non-upper mount */
623 if (!config->upperdir && config->workdir) {
624 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
625 config->workdir);
626 kfree(config->workdir);
627 config->workdir = NULL;
628 }
629
621 return 0; 630 return 0;
622} 631}
623 632
@@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
837 846
838 sb->s_stack_depth = 0; 847 sb->s_stack_depth = 0;
839 if (ufs->config.upperdir) { 848 if (ufs->config.upperdir) {
840 /* FIXME: workdir is not needed for a R/O mount */
841 if (!ufs->config.workdir) { 849 if (!ufs->config.workdir) {
842 pr_err("overlayfs: missing 'workdir'\n"); 850 pr_err("overlayfs: missing 'workdir'\n");
843 goto out_free_config; 851 goto out_free_config;
@@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
847 if (err) 855 if (err)
848 goto out_free_config; 856 goto out_free_config;
849 857
858 /* Upper fs should not be r/o */
859 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
860 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
861 err = -EINVAL;
862 goto out_put_upperpath;
863 }
864
850 err = ovl_mount_dir(ufs->config.workdir, &workpath); 865 err = ovl_mount_dir(ufs->config.workdir, &workpath);
851 if (err) 866 if (err)
852 goto out_put_upperpath; 867 goto out_put_upperpath;
@@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
869 884
870 err = -EINVAL; 885 err = -EINVAL;
871 stacklen = ovl_split_lowerdirs(lowertmp); 886 stacklen = ovl_split_lowerdirs(lowertmp);
872 if (stacklen > OVL_MAX_STACK) 887 if (stacklen > OVL_MAX_STACK) {
888 pr_err("overlayfs: too many lower directries, limit is %d\n",
889 OVL_MAX_STACK);
873 goto out_free_lowertmp; 890 goto out_free_lowertmp;
891 } else if (!ufs->config.upperdir && stacklen == 1) {
892 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
893 goto out_free_lowertmp;
894 }
874 895
875 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 896 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
876 if (!stack) 897 if (!stack)
@@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
932 ufs->numlower++; 953 ufs->numlower++;
933 } 954 }
934 955
935 /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ 956 /* If the upper fs is nonexistent, we mark overlayfs r/o too */
936 if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) 957 if (!ufs->upper_mnt)
937 sb->s_flags |= MS_RDONLY; 958 sb->s_flags |= MS_RDONLY;
938 959
939 sb->s_d_op = &ovl_dentry_operations; 960 sb->s_d_op = &ovl_dentry_operations;
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e58e2a6..2d084f2d0b83 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23#include <linux/fcntl.h> 23#include <linux/fcntl.h>
24#include <linux/aio.h>
25 24
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
27#include <asm/ioctls.h> 26#include <asm/ioctls.h>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
1325 1325
1326static int pagemap_open(struct inode *inode, struct file *file) 1326static int pagemap_open(struct inode *inode, struct file *file)
1327{ 1327{
1328 /* do not disclose physical addresses: attack vector */
1329 if (!capable(CAP_SYS_ADMIN))
1330 return -EPERM;
1328 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " 1331 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
1329 "to stop being page-shift some time soon. See the " 1332 "to stop being page-shift some time soon. See the "
1330 "linux/Documentation/vm/pagemap.txt for details.\n"); 1333 "linux/Documentation/vm/pagemap.txt for details.\n");
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373128e9..44a549beeafa 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev)
539 mem_address = pdata->mem_address; 539 mem_address = pdata->mem_address;
540 record_size = pdata->record_size; 540 record_size = pdata->record_size;
541 dump_oops = pdata->dump_oops; 541 dump_oops = pdata->dump_oops;
542 ramoops_console_size = pdata->console_size;
543 ramoops_pmsg_size = pdata->pmsg_size;
544 ramoops_ftrace_size = pdata->ftrace_size;
542 545
543 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", 546 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
544 cxt->size, (unsigned long long)cxt->phys_addr, 547 cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/read_write.c b/fs/read_write.c
index 8e1b68786d66..69128b378646 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
9#include <linux/fcntl.h> 9#include <linux/fcntl.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/aio.h>
13#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
14#include <linux/security.h> 13#include <linux/security.h>
15#include <linux/export.h> 14#include <linux/export.h>
@@ -343,13 +342,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
343 342
344 init_sync_kiocb(&kiocb, file); 343 init_sync_kiocb(&kiocb, file);
345 kiocb.ki_pos = *ppos; 344 kiocb.ki_pos = *ppos;
346 kiocb.ki_nbytes = iov_iter_count(iter);
347 345
348 iter->type |= READ; 346 iter->type |= READ;
349 ret = file->f_op->read_iter(&kiocb, iter); 347 ret = file->f_op->read_iter(&kiocb, iter);
350 if (ret == -EIOCBQUEUED) 348 BUG_ON(ret == -EIOCBQUEUED);
351 ret = wait_on_sync_kiocb(&kiocb);
352
353 if (ret > 0) 349 if (ret > 0)
354 *ppos = kiocb.ki_pos; 350 *ppos = kiocb.ki_pos;
355 return ret; 351 return ret;
@@ -366,13 +362,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
366 362
367 init_sync_kiocb(&kiocb, file); 363 init_sync_kiocb(&kiocb, file);
368 kiocb.ki_pos = *ppos; 364 kiocb.ki_pos = *ppos;
369 kiocb.ki_nbytes = iov_iter_count(iter);
370 365
371 iter->type |= WRITE; 366 iter->type |= WRITE;
372 ret = file->f_op->write_iter(&kiocb, iter); 367 ret = file->f_op->write_iter(&kiocb, iter);
373 if (ret == -EIOCBQUEUED) 368 BUG_ON(ret == -EIOCBQUEUED);
374 ret = wait_on_sync_kiocb(&kiocb);
375
376 if (ret > 0) 369 if (ret > 0)
377 *ppos = kiocb.ki_pos; 370 *ppos = kiocb.ki_pos;
378 return ret; 371 return ret;
@@ -426,11 +419,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
426 419
427 init_sync_kiocb(&kiocb, filp); 420 init_sync_kiocb(&kiocb, filp);
428 kiocb.ki_pos = *ppos; 421 kiocb.ki_pos = *ppos;
429 kiocb.ki_nbytes = len;
430 422
431 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 423 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
432 if (-EIOCBQUEUED == ret) 424 BUG_ON(ret == -EIOCBQUEUED);
433 ret = wait_on_sync_kiocb(&kiocb);
434 *ppos = kiocb.ki_pos; 425 *ppos = kiocb.ki_pos;
435 return ret; 426 return ret;
436} 427}
@@ -446,12 +437,10 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
446 437
447 init_sync_kiocb(&kiocb, filp); 438 init_sync_kiocb(&kiocb, filp);
448 kiocb.ki_pos = *ppos; 439 kiocb.ki_pos = *ppos;
449 kiocb.ki_nbytes = len;
450 iov_iter_init(&iter, READ, &iov, 1, len); 440 iov_iter_init(&iter, READ, &iov, 1, len);
451 441
452 ret = filp->f_op->read_iter(&kiocb, &iter); 442 ret = filp->f_op->read_iter(&kiocb, &iter);
453 if (-EIOCBQUEUED == ret) 443 BUG_ON(ret == -EIOCBQUEUED);
454 ret = wait_on_sync_kiocb(&kiocb);
455 *ppos = kiocb.ki_pos; 444 *ppos = kiocb.ki_pos;
456 return ret; 445 return ret;
457} 446}
@@ -510,11 +499,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
510 499
511 init_sync_kiocb(&kiocb, filp); 500 init_sync_kiocb(&kiocb, filp);
512 kiocb.ki_pos = *ppos; 501 kiocb.ki_pos = *ppos;
513 kiocb.ki_nbytes = len;
514 502
515 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 503 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
516 if (-EIOCBQUEUED == ret) 504 BUG_ON(ret == -EIOCBQUEUED);
517 ret = wait_on_sync_kiocb(&kiocb);
518 *ppos = kiocb.ki_pos; 505 *ppos = kiocb.ki_pos;
519 return ret; 506 return ret;
520} 507}
@@ -530,12 +517,10 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
530 517
531 init_sync_kiocb(&kiocb, filp); 518 init_sync_kiocb(&kiocb, filp);
532 kiocb.ki_pos = *ppos; 519 kiocb.ki_pos = *ppos;
533 kiocb.ki_nbytes = len;
534 iov_iter_init(&iter, WRITE, &iov, 1, len); 520 iov_iter_init(&iter, WRITE, &iov, 1, len);
535 521
536 ret = filp->f_op->write_iter(&kiocb, &iter); 522 ret = filp->f_op->write_iter(&kiocb, &iter);
537 if (-EIOCBQUEUED == ret) 523 BUG_ON(ret == -EIOCBQUEUED);
538 ret = wait_on_sync_kiocb(&kiocb);
539 *ppos = kiocb.ki_pos; 524 *ppos = kiocb.ki_pos;
540 return ret; 525 return ret;
541} 526}
@@ -710,60 +695,47 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
710} 695}
711EXPORT_SYMBOL(iov_shorten); 696EXPORT_SYMBOL(iov_shorten);
712 697
713static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, 698static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
714 unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) 699 loff_t *ppos, iter_fn_t fn)
715{ 700{
716 struct kiocb kiocb; 701 struct kiocb kiocb;
717 struct iov_iter iter;
718 ssize_t ret; 702 ssize_t ret;
719 703
720 init_sync_kiocb(&kiocb, filp); 704 init_sync_kiocb(&kiocb, filp);
721 kiocb.ki_pos = *ppos; 705 kiocb.ki_pos = *ppos;
722 kiocb.ki_nbytes = len;
723 706
724 iov_iter_init(&iter, rw, iov, nr_segs, len); 707 ret = fn(&kiocb, iter);
725 ret = fn(&kiocb, &iter); 708 BUG_ON(ret == -EIOCBQUEUED);
726 if (ret == -EIOCBQUEUED)
727 ret = wait_on_sync_kiocb(&kiocb);
728 *ppos = kiocb.ki_pos; 709 *ppos = kiocb.ki_pos;
729 return ret; 710 return ret;
730} 711}
731 712
732static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 713static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter,
733 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 714 loff_t *ppos, iov_fn_t fn)
734{ 715{
735 struct kiocb kiocb; 716 struct kiocb kiocb;
736 ssize_t ret; 717 ssize_t ret;
737 718
738 init_sync_kiocb(&kiocb, filp); 719 init_sync_kiocb(&kiocb, filp);
739 kiocb.ki_pos = *ppos; 720 kiocb.ki_pos = *ppos;
740 kiocb.ki_nbytes = len;
741 721
742 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 722 ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos);
743 if (ret == -EIOCBQUEUED) 723 BUG_ON(ret == -EIOCBQUEUED);
744 ret = wait_on_sync_kiocb(&kiocb);
745 *ppos = kiocb.ki_pos; 724 *ppos = kiocb.ki_pos;
746 return ret; 725 return ret;
747} 726}
748 727
749/* Do it by hand, with file-ops */ 728/* Do it by hand, with file-ops */
750static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 729static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
751 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 730 loff_t *ppos, io_fn_t fn)
752{ 731{
753 struct iovec *vector = iov;
754 ssize_t ret = 0; 732 ssize_t ret = 0;
755 733
756 while (nr_segs > 0) { 734 while (iov_iter_count(iter)) {
757 void __user *base; 735 struct iovec iovec = iov_iter_iovec(iter);
758 size_t len;
759 ssize_t nr; 736 ssize_t nr;
760 737
761 base = vector->iov_base; 738 nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
762 len = vector->iov_len;
763 vector++;
764 nr_segs--;
765
766 nr = fn(filp, base, len, ppos);
767 739
768 if (nr < 0) { 740 if (nr < 0) {
769 if (!ret) 741 if (!ret)
@@ -771,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
771 break; 743 break;
772 } 744 }
773 ret += nr; 745 ret += nr;
774 if (nr != len) 746 if (nr != iovec.iov_len)
775 break; 747 break;
748 iov_iter_advance(iter, nr);
776 } 749 }
777 750
778 return ret; 751 return ret;
@@ -863,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file,
863 size_t tot_len; 836 size_t tot_len;
864 struct iovec iovstack[UIO_FASTIOV]; 837 struct iovec iovstack[UIO_FASTIOV];
865 struct iovec *iov = iovstack; 838 struct iovec *iov = iovstack;
839 struct iov_iter iter;
866 ssize_t ret; 840 ssize_t ret;
867 io_fn_t fn; 841 io_fn_t fn;
868 iov_fn_t fnv; 842 iov_fn_t fnv;
869 iter_fn_t iter_fn; 843 iter_fn_t iter_fn;
870 844
871 ret = rw_copy_check_uvector(type, uvector, nr_segs, 845 ret = import_iovec(type, uvector, nr_segs,
872 ARRAY_SIZE(iovstack), iovstack, &iov); 846 ARRAY_SIZE(iovstack), &iov, &iter);
873 if (ret <= 0) 847 if (ret < 0)
874 goto out; 848 return ret;
875 849
876 tot_len = ret; 850 tot_len = iov_iter_count(&iter);
851 if (!tot_len)
852 goto out;
877 ret = rw_verify_area(type, file, pos, tot_len); 853 ret = rw_verify_area(type, file, pos, tot_len);
878 if (ret < 0) 854 if (ret < 0)
879 goto out; 855 goto out;
@@ -891,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file,
891 } 867 }
892 868
893 if (iter_fn) 869 if (iter_fn)
894 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 870 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
895 pos, iter_fn);
896 else if (fnv) 871 else if (fnv)
897 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 872 ret = do_sync_readv_writev(file, &iter, pos, fnv);
898 pos, fnv);
899 else 873 else
900 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 874 ret = do_loop_readv_writev(file, &iter, pos, fn);
901 875
902 if (type != READ) 876 if (type != READ)
903 file_end_write(file); 877 file_end_write(file);
904 878
905out: 879out:
906 if (iov != iovstack) 880 kfree(iov);
907 kfree(iov);
908 if ((ret + (type == READ)) > 0) { 881 if ((ret + (type == READ)) > 0) {
909 if (type == READ) 882 if (type == READ)
910 fsnotify_access(file); 883 fsnotify_access(file);
@@ -1043,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1043 compat_ssize_t tot_len; 1016 compat_ssize_t tot_len;
1044 struct iovec iovstack[UIO_FASTIOV]; 1017 struct iovec iovstack[UIO_FASTIOV];
1045 struct iovec *iov = iovstack; 1018 struct iovec *iov = iovstack;
1019 struct iov_iter iter;
1046 ssize_t ret; 1020 ssize_t ret;
1047 io_fn_t fn; 1021 io_fn_t fn;
1048 iov_fn_t fnv; 1022 iov_fn_t fnv;
1049 iter_fn_t iter_fn; 1023 iter_fn_t iter_fn;
1050 1024
1051 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 1025 ret = compat_import_iovec(type, uvector, nr_segs,
1052 UIO_FASTIOV, iovstack, &iov); 1026 UIO_FASTIOV, &iov, &iter);
1053 if (ret <= 0) 1027 if (ret < 0)
1054 goto out; 1028 return ret;
1055 1029
1056 tot_len = ret; 1030 tot_len = iov_iter_count(&iter);
1031 if (!tot_len)
1032 goto out;
1057 ret = rw_verify_area(type, file, pos, tot_len); 1033 ret = rw_verify_area(type, file, pos, tot_len);
1058 if (ret < 0) 1034 if (ret < 0)
1059 goto out; 1035 goto out;
@@ -1071,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1071 } 1047 }
1072 1048
1073 if (iter_fn) 1049 if (iter_fn)
1074 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 1050 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
1075 pos, iter_fn);
1076 else if (fnv) 1051 else if (fnv)
1077 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 1052 ret = do_sync_readv_writev(file, &iter, pos, fnv);
1078 pos, fnv);
1079 else 1053 else
1080 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 1054 ret = do_loop_readv_writev(file, &iter, pos, fn);
1081 1055
1082 if (type != READ) 1056 if (type != READ)
1083 file_end_write(file); 1057 file_end_write(file);
1084 1058
1085out: 1059out:
1086 if (iov != iovstack) 1060 kfree(iov);
1087 kfree(iov);
1088 if ((ret + (type == READ)) > 0) { 1061 if ((ret + (type == READ)) > 0) {
1089 if (type == READ) 1062 if (type == READ)
1090 fsnotify_access(file); 1063 fsnotify_access(file);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e72401e1f995..9312b7842e03 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,7 +18,7 @@
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/aio.h> 21#include <linux/uio.h>
22 22
23int reiserfs_commit_write(struct file *f, struct page *page, 23int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
diff --git a/fs/splice.c b/fs/splice.c
index 7968da96bebb..41cbb16299e0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,7 +32,6 @@
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h> 33#include <linux/socket.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include "internal.h" 35#include "internal.h"
37 36
38/* 37/*
@@ -1534,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
1534 struct iovec iovstack[UIO_FASTIOV]; 1533 struct iovec iovstack[UIO_FASTIOV];
1535 struct iovec *iov = iovstack; 1534 struct iovec *iov = iovstack;
1536 struct iov_iter iter; 1535 struct iov_iter iter;
1537 ssize_t count;
1538 1536
1539 pipe = get_pipe_info(file); 1537 pipe = get_pipe_info(file);
1540 if (!pipe) 1538 if (!pipe)
1541 return -EBADF; 1539 return -EBADF;
1542 1540
1543 ret = rw_copy_check_uvector(READ, uiov, nr_segs, 1541 ret = import_iovec(READ, uiov, nr_segs,
1544 ARRAY_SIZE(iovstack), iovstack, &iov); 1542 ARRAY_SIZE(iovstack), &iov, &iter);
1545 if (ret <= 0) 1543 if (ret < 0)
1546 goto out; 1544 return ret;
1547
1548 count = ret;
1549 iov_iter_init(&iter, READ, iov, nr_segs, count);
1550 1545
1546 sd.total_len = iov_iter_count(&iter);
1551 sd.len = 0; 1547 sd.len = 0;
1552 sd.total_len = count;
1553 sd.flags = flags; 1548 sd.flags = flags;
1554 sd.u.data = &iter; 1549 sd.u.data = &iter;
1555 sd.pos = 0; 1550 sd.pos = 0;
1556 1551
1557 pipe_lock(pipe); 1552 if (sd.total_len) {
1558 ret = __splice_from_pipe(pipe, &sd, pipe_to_user); 1553 pipe_lock(pipe);
1559 pipe_unlock(pipe); 1554 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
1560 1555 pipe_unlock(pipe);
1561out: 1556 }
1562 if (iov != iovstack)
1563 kfree(iov);
1564 1557
1558 kfree(iov);
1565 return ret; 1559 return ret;
1566} 1560}
1567 1561
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3cef9927..19636af5e75c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
66{ 66{
67 int retval; 67 int retval;
68 68
69 retval = security_inode_getattr(path->mnt, path->dentry); 69 retval = security_inode_getattr(path);
70 if (retval) 70 if (retval)
71 return retval; 71 return retval;
72 return vfs_getattr_nosec(path, stat); 72 return vfs_getattr_nosec(path, stat);
diff --git a/fs/super.c b/fs/super.c
index 2b7dc90ccdbb..928c20f47af9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
224 s->s_maxbytes = MAX_NON_LFS; 224 s->s_maxbytes = MAX_NON_LFS;
225 s->s_op = &default_op; 225 s->s_op = &default_op;
226 s->s_time_gran = 1000000000; 226 s->s_time_gran = 1000000000;
227 s->cleancache_poolid = -1; 227 s->cleancache_poolid = CLEANCACHE_NO_POOL;
228 228
229 s->s_shrink.seeks = DEFAULT_SEEKS; 229 s->s_shrink.seeks = DEFAULT_SEEKS;
230 s->s_shrink.scan_objects = super_cache_scan; 230 s->s_shrink.scan_objects = super_cache_scan;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2554d8835b48..b400c04371f0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
41 41
42 if (grp->attrs) { 42 if (grp->attrs) {
43 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { 43 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
44 umode_t mode = 0; 44 umode_t mode = (*attr)->mode;
45 45
46 /* 46 /*
47 * In update mode, we're changing the permissions or 47 * In update mode, we're changing the permissions or
@@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
55 if (!mode) 55 if (!mode)
56 continue; 56 continue;
57 } 57 }
58
59 WARN(mode & ~(SYSFS_PREALLOC | 0664),
60 "Attribute %s: Invalid permissions 0%o\n",
61 (*attr)->name, mode);
62
63 mode &= SYSFS_PREALLOC | 0664;
58 error = sysfs_add_file_mode_ns(parent, *attr, false, 64 error = sysfs_add_file_mode_ns(parent, *attr, false,
59 (*attr)->mode | mode, 65 mode, NULL);
60 NULL);
61 if (unlikely(error)) 66 if (unlikely(error))
62 break; 67 break;
63 } 68 }
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 000000000000..82fa35b656c4
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
1tracefs-objs := inode.o
2
3obj-$(CONFIG_TRACING) += tracefs.o
4
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..d92bdf3b079a
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
1/*
2 * inode.c - part of tracefs, a pseudo file system for activating tracing
3 *
4 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
5 *
6 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * tracefs is the file system that is used by the tracing infrastructure.
13 *
14 */
15
16#include <linux/module.h>
17#include <linux/fs.h>
18#include <linux/mount.h>
19#include <linux/kobject.h>
20#include <linux/namei.h>
21#include <linux/tracefs.h>
22#include <linux/fsnotify.h>
23#include <linux/seq_file.h>
24#include <linux/parser.h>
25#include <linux/magic.h>
26#include <linux/slab.h>
27
28#define TRACEFS_DEFAULT_MODE 0700
29
30static struct vfsmount *tracefs_mount;
31static int tracefs_mount_count;
32static bool tracefs_registered;
33
34static ssize_t default_read_file(struct file *file, char __user *buf,
35 size_t count, loff_t *ppos)
36{
37 return 0;
38}
39
40static ssize_t default_write_file(struct file *file, const char __user *buf,
41 size_t count, loff_t *ppos)
42{
43 return count;
44}
45
46static const struct file_operations tracefs_file_operations = {
47 .read = default_read_file,
48 .write = default_write_file,
49 .open = simple_open,
50 .llseek = noop_llseek,
51};
52
53static struct tracefs_dir_ops {
54 int (*mkdir)(const char *name);
55 int (*rmdir)(const char *name);
56} tracefs_ops;
57
58static char *get_dname(struct dentry *dentry)
59{
60 const char *dname;
61 char *name;
62 int len = dentry->d_name.len;
63
64 dname = dentry->d_name.name;
65 name = kmalloc(len + 1, GFP_KERNEL);
66 if (!name)
67 return NULL;
68 memcpy(name, dname, len);
69 name[len] = 0;
70 return name;
71}
72
73static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
74{
75 char *name;
76 int ret;
77
78 name = get_dname(dentry);
79 if (!name)
80 return -ENOMEM;
81
82 /*
83 * The mkdir call can call the generic functions that create
84 * the files within the tracefs system. It is up to the individual
85 * mkdir routine to handle races.
86 */
87 mutex_unlock(&inode->i_mutex);
88 ret = tracefs_ops.mkdir(name);
89 mutex_lock(&inode->i_mutex);
90
91 kfree(name);
92
93 return ret;
94}
95
96static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
97{
98 char *name;
99 int ret;
100
101 name = get_dname(dentry);
102 if (!name)
103 return -ENOMEM;
104
105 /*
106 * The rmdir call can call the generic functions that create
107 * the files within the tracefs system. It is up to the individual
108 * rmdir routine to handle races.
109 * This time we need to unlock not only the parent (inode) but
110 * also the directory that is being deleted.
111 */
112 mutex_unlock(&inode->i_mutex);
113 mutex_unlock(&dentry->d_inode->i_mutex);
114
115 ret = tracefs_ops.rmdir(name);
116
117 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
118 mutex_lock(&dentry->d_inode->i_mutex);
119
120 kfree(name);
121
122 return ret;
123}
124
125static const struct inode_operations tracefs_dir_inode_operations = {
126 .lookup = simple_lookup,
127 .mkdir = tracefs_syscall_mkdir,
128 .rmdir = tracefs_syscall_rmdir,
129};
130
131static struct inode *tracefs_get_inode(struct super_block *sb)
132{
133 struct inode *inode = new_inode(sb);
134 if (inode) {
135 inode->i_ino = get_next_ino();
136 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
137 }
138 return inode;
139}
140
141struct tracefs_mount_opts {
142 kuid_t uid;
143 kgid_t gid;
144 umode_t mode;
145};
146
147enum {
148 Opt_uid,
149 Opt_gid,
150 Opt_mode,
151 Opt_err
152};
153
154static const match_table_t tokens = {
155 {Opt_uid, "uid=%u"},
156 {Opt_gid, "gid=%u"},
157 {Opt_mode, "mode=%o"},
158 {Opt_err, NULL}
159};
160
161struct tracefs_fs_info {
162 struct tracefs_mount_opts mount_opts;
163};
164
165static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
166{
167 substring_t args[MAX_OPT_ARGS];
168 int option;
169 int token;
170 kuid_t uid;
171 kgid_t gid;
172 char *p;
173
174 opts->mode = TRACEFS_DEFAULT_MODE;
175
176 while ((p = strsep(&data, ",")) != NULL) {
177 if (!*p)
178 continue;
179
180 token = match_token(p, tokens, args);
181 switch (token) {
182 case Opt_uid:
183 if (match_int(&args[0], &option))
184 return -EINVAL;
185 uid = make_kuid(current_user_ns(), option);
186 if (!uid_valid(uid))
187 return -EINVAL;
188 opts->uid = uid;
189 break;
190 case Opt_gid:
191 if (match_int(&args[0], &option))
192 return -EINVAL;
193 gid = make_kgid(current_user_ns(), option);
194 if (!gid_valid(gid))
195 return -EINVAL;
196 opts->gid = gid;
197 break;
198 case Opt_mode:
199 if (match_octal(&args[0], &option))
200 return -EINVAL;
201 opts->mode = option & S_IALLUGO;
202 break;
203 /*
204 * We might like to report bad mount options here;
205 * but traditionally tracefs has ignored all mount options
206 */
207 }
208 }
209
210 return 0;
211}
212
213static int tracefs_apply_options(struct super_block *sb)
214{
215 struct tracefs_fs_info *fsi = sb->s_fs_info;
216 struct inode *inode = sb->s_root->d_inode;
217 struct tracefs_mount_opts *opts = &fsi->mount_opts;
218
219 inode->i_mode &= ~S_IALLUGO;
220 inode->i_mode |= opts->mode;
221
222 inode->i_uid = opts->uid;
223 inode->i_gid = opts->gid;
224
225 return 0;
226}
227
228static int tracefs_remount(struct super_block *sb, int *flags, char *data)
229{
230 int err;
231 struct tracefs_fs_info *fsi = sb->s_fs_info;
232
233 sync_filesystem(sb);
234 err = tracefs_parse_options(data, &fsi->mount_opts);
235 if (err)
236 goto fail;
237
238 tracefs_apply_options(sb);
239
240fail:
241 return err;
242}
243
244static int tracefs_show_options(struct seq_file *m, struct dentry *root)
245{
246 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
247 struct tracefs_mount_opts *opts = &fsi->mount_opts;
248
249 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
250 seq_printf(m, ",uid=%u",
251 from_kuid_munged(&init_user_ns, opts->uid));
252 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
253 seq_printf(m, ",gid=%u",
254 from_kgid_munged(&init_user_ns, opts->gid));
255 if (opts->mode != TRACEFS_DEFAULT_MODE)
256 seq_printf(m, ",mode=%o", opts->mode);
257
258 return 0;
259}
260
261static const struct super_operations tracefs_super_operations = {
262 .statfs = simple_statfs,
263 .remount_fs = tracefs_remount,
264 .show_options = tracefs_show_options,
265};
266
267static int trace_fill_super(struct super_block *sb, void *data, int silent)
268{
269 static struct tree_descr trace_files[] = {{""}};
270 struct tracefs_fs_info *fsi;
271 int err;
272
273 save_mount_options(sb, data);
274
275 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
276 sb->s_fs_info = fsi;
277 if (!fsi) {
278 err = -ENOMEM;
279 goto fail;
280 }
281
282 err = tracefs_parse_options(data, &fsi->mount_opts);
283 if (err)
284 goto fail;
285
286 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
287 if (err)
288 goto fail;
289
290 sb->s_op = &tracefs_super_operations;
291
292 tracefs_apply_options(sb);
293
294 return 0;
295
296fail:
297 kfree(fsi);
298 sb->s_fs_info = NULL;
299 return err;
300}
301
302static struct dentry *trace_mount(struct file_system_type *fs_type,
303 int flags, const char *dev_name,
304 void *data)
305{
306 return mount_single(fs_type, flags, data, trace_fill_super);
307}
308
309static struct file_system_type trace_fs_type = {
310 .owner = THIS_MODULE,
311 .name = "tracefs",
312 .mount = trace_mount,
313 .kill_sb = kill_litter_super,
314};
315MODULE_ALIAS_FS("tracefs");
316
317static struct dentry *start_creating(const char *name, struct dentry *parent)
318{
319 struct dentry *dentry;
320 int error;
321
322 pr_debug("tracefs: creating file '%s'\n",name);
323
324 error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
325 &tracefs_mount_count);
326 if (error)
327 return ERR_PTR(error);
328
329 /* If the parent is not specified, we create it in the root.
330 * We need the root dentry to do this, which is in the super
331 * block. A pointer to that is in the struct vfsmount that we
332 * have around.
333 */
334 if (!parent)
335 parent = tracefs_mount->mnt_root;
336
337 mutex_lock(&parent->d_inode->i_mutex);
338 dentry = lookup_one_len(name, parent, strlen(name));
339 if (!IS_ERR(dentry) && dentry->d_inode) {
340 dput(dentry);
341 dentry = ERR_PTR(-EEXIST);
342 }
343 if (IS_ERR(dentry))
344 mutex_unlock(&parent->d_inode->i_mutex);
345 return dentry;
346}
347
348static struct dentry *failed_creating(struct dentry *dentry)
349{
350 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
351 dput(dentry);
352 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
353 return NULL;
354}
355
356static struct dentry *end_creating(struct dentry *dentry)
357{
358 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
359 return dentry;
360}
361
362/**
363 * tracefs_create_file - create a file in the tracefs filesystem
364 * @name: a pointer to a string containing the name of the file to create.
365 * @mode: the permission that the file should have.
366 * @parent: a pointer to the parent dentry for this file. This should be a
367 * directory dentry if set. If this parameter is NULL, then the
368 * file will be created in the root of the tracefs filesystem.
369 * @data: a pointer to something that the caller will want to get to later
370 * on. The inode.i_private pointer will point to this value on
371 * the open() call.
372 * @fops: a pointer to a struct file_operations that should be used for
373 * this file.
374 *
375 * This is the basic "create a file" function for tracefs. It allows for a
376 * wide range of flexibility in creating a file, or a directory (if you want
377 * to create a directory, the tracefs_create_dir() function is
378 * recommended to be used instead.)
379 *
380 * This function will return a pointer to a dentry if it succeeds. This
381 * pointer must be passed to the tracefs_remove() function when the file is
382 * to be removed (no automatic cleanup happens if your module is unloaded,
383 * you are responsible here.) If an error occurs, %NULL will be returned.
384 *
385 * If tracefs is not enabled in the kernel, the value -%ENODEV will be
386 * returned.
387 */
388struct dentry *tracefs_create_file(const char *name, umode_t mode,
389 struct dentry *parent, void *data,
390 const struct file_operations *fops)
391{
392 struct dentry *dentry;
393 struct inode *inode;
394
395 if (!(mode & S_IFMT))
396 mode |= S_IFREG;
397 BUG_ON(!S_ISREG(mode));
398 dentry = start_creating(name, parent);
399
400 if (IS_ERR(dentry))
401 return NULL;
402
403 inode = tracefs_get_inode(dentry->d_sb);
404 if (unlikely(!inode))
405 return failed_creating(dentry);
406
407 inode->i_mode = mode;
408 inode->i_fop = fops ? fops : &tracefs_file_operations;
409 inode->i_private = data;
410 d_instantiate(dentry, inode);
411 fsnotify_create(dentry->d_parent->d_inode, dentry);
412 return end_creating(dentry);
413}
414
415static struct dentry *__create_dir(const char *name, struct dentry *parent,
416 const struct inode_operations *ops)
417{
418 struct dentry *dentry = start_creating(name, parent);
419 struct inode *inode;
420
421 if (IS_ERR(dentry))
422 return NULL;
423
424 inode = tracefs_get_inode(dentry->d_sb);
425 if (unlikely(!inode))
426 return failed_creating(dentry);
427
428 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
429 inode->i_op = ops;
430 inode->i_fop = &simple_dir_operations;
431
432 /* directory inodes start off with i_nlink == 2 (for "." entry) */
433 inc_nlink(inode);
434 d_instantiate(dentry, inode);
435 inc_nlink(dentry->d_parent->d_inode);
436 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
437 return end_creating(dentry);
438}
439
440/**
441 * tracefs_create_dir - create a directory in the tracefs filesystem
442 * @name: a pointer to a string containing the name of the directory to
443 * create.
444 * @parent: a pointer to the parent dentry for this file. This should be a
445 * directory dentry if set. If this parameter is NULL, then the
446 * directory will be created in the root of the tracefs filesystem.
447 *
448 * This function creates a directory in tracefs with the given name.
449 *
450 * This function will return a pointer to a dentry if it succeeds. This
451 * pointer must be passed to the tracefs_remove() function when the file is
452 * to be removed. If an error occurs, %NULL will be returned.
453 *
454 * If tracing is not enabled in the kernel, the value -%ENODEV will be
455 * returned.
456 */
457struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
458{
459 return __create_dir(name, parent, &simple_dir_inode_operations);
460}
461
462/**
463 * tracefs_create_instance_dir - create the tracing instances directory
464 * @name: The name of the instances directory to create
465 * @parent: The parent directory that the instances directory will exist
466 * @mkdir: The function to call when a mkdir is performed.
467 * @rmdir: The function to call when a rmdir is performed.
468 *
469 * Only one instances directory is allowed.
470 *
471 * The instances directory is special as it allows for mkdir and rmdir to
472 * to be done by userspace. When a mkdir or rmdir is performed, the inode
473 * locks are released and the methhods passed in (@mkdir and @rmdir) are
474 * called without locks and with the name of the directory being created
475 * within the instances directory.
476 *
477 * Returns the dentry of the instances directory.
478 */
479struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
480 int (*mkdir)(const char *name),
481 int (*rmdir)(const char *name))
482{
483 struct dentry *dentry;
484
485 /* Only allow one instance of the instances directory. */
486 if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
487 return NULL;
488
489 dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
490 if (!dentry)
491 return NULL;
492
493 tracefs_ops.mkdir = mkdir;
494 tracefs_ops.rmdir = rmdir;
495
496 return dentry;
497}
498
499static inline int tracefs_positive(struct dentry *dentry)
500{
501 return dentry->d_inode && !d_unhashed(dentry);
502}
503
504static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
505{
506 int ret = 0;
507
508 if (tracefs_positive(dentry)) {
509 if (dentry->d_inode) {
510 dget(dentry);
511 switch (dentry->d_inode->i_mode & S_IFMT) {
512 case S_IFDIR:
513 ret = simple_rmdir(parent->d_inode, dentry);
514 break;
515 default:
516 simple_unlink(parent->d_inode, dentry);
517 break;
518 }
519 if (!ret)
520 d_delete(dentry);
521 dput(dentry);
522 }
523 }
524 return ret;
525}
526
527/**
528 * tracefs_remove - removes a file or directory from the tracefs filesystem
529 * @dentry: a pointer to a the dentry of the file or directory to be
530 * removed.
531 *
532 * This function removes a file or directory in tracefs that was previously
533 * created with a call to another tracefs function (like
534 * tracefs_create_file() or variants thereof.)
535 */
536void tracefs_remove(struct dentry *dentry)
537{
538 struct dentry *parent;
539 int ret;
540
541 if (IS_ERR_OR_NULL(dentry))
542 return;
543
544 parent = dentry->d_parent;
545 if (!parent || !parent->d_inode)
546 return;
547
548 mutex_lock(&parent->d_inode->i_mutex);
549 ret = __tracefs_remove(dentry, parent);
550 mutex_unlock(&parent->d_inode->i_mutex);
551 if (!ret)
552 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
553}
554
555/**
556 * tracefs_remove_recursive - recursively removes a directory
557 * @dentry: a pointer to a the dentry of the directory to be removed.
558 *
559 * This function recursively removes a directory tree in tracefs that
560 * was previously created with a call to another tracefs function
561 * (like tracefs_create_file() or variants thereof.)
562 */
563void tracefs_remove_recursive(struct dentry *dentry)
564{
565 struct dentry *child, *parent;
566
567 if (IS_ERR_OR_NULL(dentry))
568 return;
569
570 parent = dentry->d_parent;
571 if (!parent || !parent->d_inode)
572 return;
573
574 parent = dentry;
575 down:
576 mutex_lock(&parent->d_inode->i_mutex);
577 loop:
578 /*
579 * The parent->d_subdirs is protected by the d_lock. Outside that
580 * lock, the child can be unlinked and set to be freed which can
581 * use the d_u.d_child as the rcu head and corrupt this list.
582 */
583 spin_lock(&parent->d_lock);
584 list_for_each_entry(child, &parent->d_subdirs, d_child) {
585 if (!tracefs_positive(child))
586 continue;
587
588 /* perhaps simple_empty(child) makes more sense */
589 if (!list_empty(&child->d_subdirs)) {
590 spin_unlock(&parent->d_lock);
591 mutex_unlock(&parent->d_inode->i_mutex);
592 parent = child;
593 goto down;
594 }
595
596 spin_unlock(&parent->d_lock);
597
598 if (!__tracefs_remove(child, parent))
599 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
600
601 /*
602 * The parent->d_lock protects agaist child from unlinking
603 * from d_subdirs. When releasing the parent->d_lock we can
604 * no longer trust that the next pointer is valid.
605 * Restart the loop. We'll skip this one with the
606 * tracefs_positive() check.
607 */
608 goto loop;
609 }
610 spin_unlock(&parent->d_lock);
611
612 mutex_unlock(&parent->d_inode->i_mutex);
613 child = parent;
614 parent = parent->d_parent;
615 mutex_lock(&parent->d_inode->i_mutex);
616
617 if (child != dentry)
618 /* go up */
619 goto loop;
620
621 if (!__tracefs_remove(child, parent))
622 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
623 mutex_unlock(&parent->d_inode->i_mutex);
624}
625
626/**
627 * tracefs_initialized - Tells whether tracefs has been registered
628 */
629bool tracefs_initialized(void)
630{
631 return tracefs_registered;
632}
633
634static struct kobject *trace_kobj;
635
636static int __init tracefs_init(void)
637{
638 int retval;
639
640 trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
641 if (!trace_kobj)
642 return -EINVAL;
643
644 retval = register_filesystem(&trace_fs_type);
645 if (!retval)
646 tracefs_registered = true;
647
648 return retval;
649}
650core_initcall(tracefs_init);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e627c0acf626..c3d15fe83403 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,7 +50,6 @@
50 */ 50 */
51 51
52#include "ubifs.h" 52#include "ubifs.h"
53#include <linux/aio.h>
54#include <linux/mount.h> 53#include <linux/mount.h>
55#include <linux/namei.h> 54#include <linux/namei.h>
56#include <linux/slab.h> 55#include <linux/slab.h>
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 08f3555fbeac..7f885cc8b0b7 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,7 +34,7 @@
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/buffer_head.h> 36#include <linux/buffer_head.h>
37#include <linux/aio.h> 37#include <linux/uio.h>
38 38
39#include "udf_i.h" 39#include "udf_i.h"
40#include "udf_sb.h" 40#include "udf_sb.h"
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
122 struct file *file = iocb->ki_filp; 122 struct file *file = iocb->ki_filp;
123 struct inode *inode = file_inode(file); 123 struct inode *inode = file_inode(file);
124 int err, pos; 124 int err, pos;
125 size_t count = iocb->ki_nbytes; 125 size_t count = iov_iter_count(from);
126 struct udf_inode_info *iinfo = UDF_I(inode); 126 struct udf_inode_info *iinfo = UDF_I(inode);
127 127
128 mutex_lock(&inode->i_mutex); 128 mutex_lock(&inode->i_mutex);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a445d599098d..9c1fbd23913d 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,7 +38,7 @@
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/crc-itu-t.h> 39#include <linux/crc-itu-t.h>
40#include <linux/mpage.h> 40#include <linux/mpage.h>
41#include <linux/aio.h> 41#include <linux/uio.h>
42 42
43#include "udf_i.h" 43#include "udf_i.h"
44#include "udf_sb.h" 44#include "udf_sb.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1b8704..4f8cdc59bc38 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,7 +31,6 @@
31#include "xfs_bmap.h" 31#include "xfs_bmap.h"
32#include "xfs_bmap_util.h" 32#include "xfs_bmap_util.h"
33#include "xfs_bmap_btree.h" 33#include "xfs_bmap_btree.h"
34#include <linux/aio.h>
35#include <linux/gfp.h> 34#include <linux/gfp.h>
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/pagevec.h> 36#include <linux/pagevec.h>
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a2e1cb8a568b..f44212fae653 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,7 +38,6 @@
38#include "xfs_icache.h" 38#include "xfs_icache.h"
39#include "xfs_pnfs.h" 39#include "xfs_pnfs.h"
40 40
41#include <linux/aio.h>
42#include <linux/dcache.h> 41#include <linux/dcache.h>
43#include <linux/falloc.h> 42#include <linux/falloc.h>
44#include <linux/pagevec.h> 43#include <linux/pagevec.h>