aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c192
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/ecryptfs/file.c7
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/indirect.c2
-rw-r--r--fs/ext4/inode.c1
-rw-r--r--fs/ext4/page-io.c1
-rw-r--r--fs/f2fs/data.c2
-rw-r--r--fs/fat/inode.c1
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/file.c55
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c102
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/quota.c62
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/namei.c168
-rw-r--r--fs/nfs/direct.c4
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c784
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/open.c5
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/read_write.c117
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/splice.c28
-rw-r--r--fs/stat.c2
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/udf/file.c4
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/xfs/xfs_aops.c1
-rw-r--r--fs/xfs/xfs_file.c1
66 files changed, 1446 insertions, 933 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e055ea83..ff1a5bac4200 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/aio.h> 36#include <linux/uio.h>
37#include <net/9p/9p.h> 37#include <net/9p/9p.h>
38#include <net/9p/client.h> 38#include <net/9p/client.h>
39 39
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
118obj-$(CONFIG_HPPFS) += hppfs/ 118obj-$(CONFIG_HPPFS) += hppfs/
119obj-$(CONFIG_CACHEFILES) += cachefiles/ 119obj-$(CONFIG_CACHEFILES) += cachefiles/
120obj-$(CONFIG_DEBUG_FS) += debugfs/ 120obj-$(CONFIG_DEBUG_FS) += debugfs/
121obj-$(CONFIG_TRACING) += tracefs/
121obj-$(CONFIG_OCFS2_FS) += ocfs2/ 122obj-$(CONFIG_OCFS2_FS) += ocfs2/
122obj-$(CONFIG_BTRFS_FS) += btrfs/ 123obj-$(CONFIG_BTRFS_FS) += btrfs/
123obj-$(CONFIG_GFS2_FS) += gfs2/ 124obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/affs/file.c b/fs/affs/file.c
index a91795e01a7f..3aa7eb66547e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
12 * affs regular file handling primitives 12 * affs regular file handling primitives
13 */ 13 */
14 14
15#include <linux/aio.h> 15#include <linux/uio.h>
16#include "affs.h" 16#include "affs.h"
17 17
18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); 18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08964ed..0714abcd7f32 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/pagevec.h> 16#include <linux/pagevec.h>
17#include <linux/aio.h>
18#include "internal.h" 17#include "internal.h"
19 18
20static int afs_write_back_from_locked_page(struct afs_writeback *wb, 19static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index a793f7023755..1ab60010cf6c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -151,6 +151,38 @@ struct kioctx {
151 unsigned id; 151 unsigned id;
152}; 152};
153 153
154/*
155 * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
156 * cancelled or completed (this makes a certain amount of sense because
157 * successful cancellation - io_cancel() - does deliver the completion to
158 * userspace).
159 *
160 * And since most things don't implement kiocb cancellation and we'd really like
161 * kiocb completion to be lockless when possible, we use ki_cancel to
162 * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
163 * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
164 */
165#define KIOCB_CANCELLED ((void *) (~0ULL))
166
167struct aio_kiocb {
168 struct kiocb common;
169
170 struct kioctx *ki_ctx;
171 kiocb_cancel_fn *ki_cancel;
172
173 struct iocb __user *ki_user_iocb; /* user's aiocb */
174 __u64 ki_user_data; /* user's data for completion */
175
176 struct list_head ki_list; /* the aio core uses this
177 * for cancellation */
178
179 /*
180 * If the aio_resfd field of the userspace iocb is not zero,
181 * this is the underlying eventfd context to deliver events to.
182 */
183 struct eventfd_ctx *ki_eventfd;
184};
185
154/*------ sysctl variables----*/ 186/*------ sysctl variables----*/
155static DEFINE_SPINLOCK(aio_nr_lock); 187static DEFINE_SPINLOCK(aio_nr_lock);
156unsigned long aio_nr; /* current system wide number of aio requests */ 188unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
220 if (IS_ERR(aio_mnt)) 252 if (IS_ERR(aio_mnt))
221 panic("Failed to create aio fs mount."); 253 panic("Failed to create aio fs mount.");
222 254
223 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 255 kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
224 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 256 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
225 257
226 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); 258 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -484,8 +516,9 @@ static int aio_setup_ring(struct kioctx *ctx)
484#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 516#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
485#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 517#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
486 518
487void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) 519void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
488{ 520{
521 struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
489 struct kioctx *ctx = req->ki_ctx; 522 struct kioctx *ctx = req->ki_ctx;
490 unsigned long flags; 523 unsigned long flags;
491 524
@@ -500,7 +533,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
500} 533}
501EXPORT_SYMBOL(kiocb_set_cancel_fn); 534EXPORT_SYMBOL(kiocb_set_cancel_fn);
502 535
503static int kiocb_cancel(struct kiocb *kiocb) 536static int kiocb_cancel(struct aio_kiocb *kiocb)
504{ 537{
505 kiocb_cancel_fn *old, *cancel; 538 kiocb_cancel_fn *old, *cancel;
506 539
@@ -518,7 +551,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
518 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); 551 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
519 } while (cancel != old); 552 } while (cancel != old);
520 553
521 return cancel(kiocb); 554 return cancel(&kiocb->common);
522} 555}
523 556
524static void free_ioctx(struct work_struct *work) 557static void free_ioctx(struct work_struct *work)
@@ -554,13 +587,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
554static void free_ioctx_users(struct percpu_ref *ref) 587static void free_ioctx_users(struct percpu_ref *ref)
555{ 588{
556 struct kioctx *ctx = container_of(ref, struct kioctx, users); 589 struct kioctx *ctx = container_of(ref, struct kioctx, users);
557 struct kiocb *req; 590 struct aio_kiocb *req;
558 591
559 spin_lock_irq(&ctx->ctx_lock); 592 spin_lock_irq(&ctx->ctx_lock);
560 593
561 while (!list_empty(&ctx->active_reqs)) { 594 while (!list_empty(&ctx->active_reqs)) {
562 req = list_first_entry(&ctx->active_reqs, 595 req = list_first_entry(&ctx->active_reqs,
563 struct kiocb, ki_list); 596 struct aio_kiocb, ki_list);
564 597
565 list_del_init(&req->ki_list); 598 list_del_init(&req->ki_list);
566 kiocb_cancel(req); 599 kiocb_cancel(req);
@@ -786,22 +819,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
786 return 0; 819 return 0;
787} 820}
788 821
789/* wait_on_sync_kiocb:
790 * Waits on the given sync kiocb to complete.
791 */
792ssize_t wait_on_sync_kiocb(struct kiocb *req)
793{
794 while (!req->ki_ctx) {
795 set_current_state(TASK_UNINTERRUPTIBLE);
796 if (req->ki_ctx)
797 break;
798 io_schedule();
799 }
800 __set_current_state(TASK_RUNNING);
801 return req->ki_user_data;
802}
803EXPORT_SYMBOL(wait_on_sync_kiocb);
804
805/* 822/*
806 * exit_aio: called when the last user of mm goes away. At this point, there is 823 * exit_aio: called when the last user of mm goes away. At this point, there is
807 * no way for any new requests to be submited or any of the io_* syscalls to be 824 * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -956,9 +973,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
956 * Allocate a slot for an aio request. 973 * Allocate a slot for an aio request.
957 * Returns NULL if no requests are free. 974 * Returns NULL if no requests are free.
958 */ 975 */
959static inline struct kiocb *aio_get_req(struct kioctx *ctx) 976static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
960{ 977{
961 struct kiocb *req; 978 struct aio_kiocb *req;
962 979
963 if (!get_reqs_available(ctx)) { 980 if (!get_reqs_available(ctx)) {
964 user_refill_reqs_available(ctx); 981 user_refill_reqs_available(ctx);
@@ -979,10 +996,10 @@ out_put:
979 return NULL; 996 return NULL;
980} 997}
981 998
982static void kiocb_free(struct kiocb *req) 999static void kiocb_free(struct aio_kiocb *req)
983{ 1000{
984 if (req->ki_filp) 1001 if (req->common.ki_filp)
985 fput(req->ki_filp); 1002 fput(req->common.ki_filp);
986 if (req->ki_eventfd != NULL) 1003 if (req->ki_eventfd != NULL)
987 eventfd_ctx_put(req->ki_eventfd); 1004 eventfd_ctx_put(req->ki_eventfd);
988 kmem_cache_free(kiocb_cachep, req); 1005 kmem_cache_free(kiocb_cachep, req);
@@ -1018,8 +1035,9 @@ out:
1018/* aio_complete 1035/* aio_complete
1019 * Called when the io request on the given iocb is complete. 1036 * Called when the io request on the given iocb is complete.
1020 */ 1037 */
1021void aio_complete(struct kiocb *iocb, long res, long res2) 1038static void aio_complete(struct kiocb *kiocb, long res, long res2)
1022{ 1039{
1040 struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
1023 struct kioctx *ctx = iocb->ki_ctx; 1041 struct kioctx *ctx = iocb->ki_ctx;
1024 struct aio_ring *ring; 1042 struct aio_ring *ring;
1025 struct io_event *ev_page, *event; 1043 struct io_event *ev_page, *event;
@@ -1033,13 +1051,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1033 * ref, no other paths have a way to get another ref 1051 * ref, no other paths have a way to get another ref
1034 * - the sync task helpfully left a reference to itself in the iocb 1052 * - the sync task helpfully left a reference to itself in the iocb
1035 */ 1053 */
1036 if (is_sync_kiocb(iocb)) { 1054 BUG_ON(is_sync_kiocb(kiocb));
1037 iocb->ki_user_data = res;
1038 smp_wmb();
1039 iocb->ki_ctx = ERR_PTR(-EXDEV);
1040 wake_up_process(iocb->ki_obj.tsk);
1041 return;
1042 }
1043 1055
1044 if (iocb->ki_list.next) { 1056 if (iocb->ki_list.next) {
1045 unsigned long flags; 1057 unsigned long flags;
@@ -1065,7 +1077,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1065 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1077 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1066 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 1078 event = ev_page + pos % AIO_EVENTS_PER_PAGE;
1067 1079
1068 event->obj = (u64)(unsigned long)iocb->ki_obj.user; 1080 event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
1069 event->data = iocb->ki_user_data; 1081 event->data = iocb->ki_user_data;
1070 event->res = res; 1082 event->res = res;
1071 event->res2 = res2; 1083 event->res2 = res2;
@@ -1074,7 +1086,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1074 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1086 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1075 1087
1076 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", 1088 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
1077 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, 1089 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
1078 res, res2); 1090 res, res2);
1079 1091
1080 /* after flagging the request as done, we 1092 /* after flagging the request as done, we
@@ -1121,7 +1133,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1121 1133
1122 percpu_ref_put(&ctx->reqs); 1134 percpu_ref_put(&ctx->reqs);
1123} 1135}
1124EXPORT_SYMBOL(aio_complete);
1125 1136
1126/* aio_read_events_ring 1137/* aio_read_events_ring
1127 * Pull an event off of the ioctx's event ring. Returns the number of 1138 * Pull an event off of the ioctx's event ring. Returns the number of
@@ -1349,46 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
1349 unsigned long, loff_t); 1360 unsigned long, loff_t);
1350typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); 1361typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
1351 1362
1352static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, 1363static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
1353 int rw, char __user *buf, 1364 struct iovec **iovec,
1354 unsigned long *nr_segs, 1365 bool compat,
1355 struct iovec **iovec, 1366 struct iov_iter *iter)
1356 bool compat)
1357{ 1367{
1358 ssize_t ret;
1359
1360 *nr_segs = kiocb->ki_nbytes;
1361
1362#ifdef CONFIG_COMPAT 1368#ifdef CONFIG_COMPAT
1363 if (compat) 1369 if (compat)
1364 ret = compat_rw_copy_check_uvector(rw, 1370 return compat_import_iovec(rw,
1365 (struct compat_iovec __user *)buf, 1371 (struct compat_iovec __user *)buf,
1366 *nr_segs, UIO_FASTIOV, *iovec, iovec); 1372 len, UIO_FASTIOV, iovec, iter);
1367 else
1368#endif 1373#endif
1369 ret = rw_copy_check_uvector(rw, 1374 return import_iovec(rw, (struct iovec __user *)buf,
1370 (struct iovec __user *)buf, 1375 len, UIO_FASTIOV, iovec, iter);
1371 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1372 if (ret < 0)
1373 return ret;
1374
1375 /* ki_nbytes now reflect bytes instead of segs */
1376 kiocb->ki_nbytes = ret;
1377 return 0;
1378}
1379
1380static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1381 int rw, char __user *buf,
1382 unsigned long *nr_segs,
1383 struct iovec *iovec)
1384{
1385 if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
1386 return -EFAULT;
1387
1388 iovec->iov_base = buf;
1389 iovec->iov_len = kiocb->ki_nbytes;
1390 *nr_segs = 1;
1391 return 0;
1392} 1376}
1393 1377
1394/* 1378/*
@@ -1396,11 +1380,10 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1396 * Performs the initial checks and io submission. 1380 * Performs the initial checks and io submission.
1397 */ 1381 */
1398static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, 1382static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1399 char __user *buf, bool compat) 1383 char __user *buf, size_t len, bool compat)
1400{ 1384{
1401 struct file *file = req->ki_filp; 1385 struct file *file = req->ki_filp;
1402 ssize_t ret; 1386 ssize_t ret;
1403 unsigned long nr_segs;
1404 int rw; 1387 int rw;
1405 fmode_t mode; 1388 fmode_t mode;
1406 aio_rw_op *rw_op; 1389 aio_rw_op *rw_op;
@@ -1431,21 +1414,22 @@ rw_common:
1431 if (!rw_op && !iter_op) 1414 if (!rw_op && !iter_op)
1432 return -EINVAL; 1415 return -EINVAL;
1433 1416
1434 ret = (opcode == IOCB_CMD_PREADV || 1417 if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
1435 opcode == IOCB_CMD_PWRITEV) 1418 ret = aio_setup_vectored_rw(rw, buf, len,
1436 ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, 1419 &iovec, compat, &iter);
1437 &iovec, compat) 1420 else {
1438 : aio_setup_single_vector(req, rw, buf, &nr_segs, 1421 ret = import_single_range(rw, buf, len, iovec, &iter);
1439 iovec); 1422 iovec = NULL;
1423 }
1440 if (!ret) 1424 if (!ret)
1441 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1425 ret = rw_verify_area(rw, file, &req->ki_pos,
1426 iov_iter_count(&iter));
1442 if (ret < 0) { 1427 if (ret < 0) {
1443 if (iovec != inline_vecs) 1428 kfree(iovec);
1444 kfree(iovec);
1445 return ret; 1429 return ret;
1446 } 1430 }
1447 1431
1448 req->ki_nbytes = ret; 1432 len = ret;
1449 1433
1450 /* XXX: move/kill - rw_verify_area()? */ 1434 /* XXX: move/kill - rw_verify_area()? */
1451 /* This matches the pread()/pwrite() logic */ 1435 /* This matches the pread()/pwrite() logic */
@@ -1458,14 +1442,14 @@ rw_common:
1458 file_start_write(file); 1442 file_start_write(file);
1459 1443
1460 if (iter_op) { 1444 if (iter_op) {
1461 iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
1462 ret = iter_op(req, &iter); 1445 ret = iter_op(req, &iter);
1463 } else { 1446 } else {
1464 ret = rw_op(req, iovec, nr_segs, req->ki_pos); 1447 ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
1465 } 1448 }
1466 1449
1467 if (rw == WRITE) 1450 if (rw == WRITE)
1468 file_end_write(file); 1451 file_end_write(file);
1452 kfree(iovec);
1469 break; 1453 break;
1470 1454
1471 case IOCB_CMD_FDSYNC: 1455 case IOCB_CMD_FDSYNC:
@@ -1487,9 +1471,6 @@ rw_common:
1487 return -EINVAL; 1471 return -EINVAL;
1488 } 1472 }
1489 1473
1490 if (iovec != inline_vecs)
1491 kfree(iovec);
1492
1493 if (ret != -EIOCBQUEUED) { 1474 if (ret != -EIOCBQUEUED) {
1494 /* 1475 /*
1495 * There's no easy way to restart the syscall since other AIO's 1476 * There's no easy way to restart the syscall since other AIO's
@@ -1508,7 +1489,7 @@ rw_common:
1508static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1489static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1509 struct iocb *iocb, bool compat) 1490 struct iocb *iocb, bool compat)
1510{ 1491{
1511 struct kiocb *req; 1492 struct aio_kiocb *req;
1512 ssize_t ret; 1493 ssize_t ret;
1513 1494
1514 /* enforce forwards compatibility on users */ 1495 /* enforce forwards compatibility on users */
@@ -1531,11 +1512,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1531 if (unlikely(!req)) 1512 if (unlikely(!req))
1532 return -EAGAIN; 1513 return -EAGAIN;
1533 1514
1534 req->ki_filp = fget(iocb->aio_fildes); 1515 req->common.ki_filp = fget(iocb->aio_fildes);
1535 if (unlikely(!req->ki_filp)) { 1516 if (unlikely(!req->common.ki_filp)) {
1536 ret = -EBADF; 1517 ret = -EBADF;
1537 goto out_put_req; 1518 goto out_put_req;
1538 } 1519 }
1520 req->common.ki_pos = iocb->aio_offset;
1521 req->common.ki_complete = aio_complete;
1522 req->common.ki_flags = 0;
1539 1523
1540 if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1524 if (iocb->aio_flags & IOCB_FLAG_RESFD) {
1541 /* 1525 /*
@@ -1550,6 +1534,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1550 req->ki_eventfd = NULL; 1534 req->ki_eventfd = NULL;
1551 goto out_put_req; 1535 goto out_put_req;
1552 } 1536 }
1537
1538 req->common.ki_flags |= IOCB_EVENTFD;
1553 } 1539 }
1554 1540
1555 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); 1541 ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1558,13 +1544,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1558 goto out_put_req; 1544 goto out_put_req;
1559 } 1545 }
1560 1546
1561 req->ki_obj.user = user_iocb; 1547 req->ki_user_iocb = user_iocb;
1562 req->ki_user_data = iocb->aio_data; 1548 req->ki_user_data = iocb->aio_data;
1563 req->ki_pos = iocb->aio_offset;
1564 req->ki_nbytes = iocb->aio_nbytes;
1565 1549
1566 ret = aio_run_iocb(req, iocb->aio_lio_opcode, 1550 ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
1567 (char __user *)(unsigned long)iocb->aio_buf, 1551 (char __user *)(unsigned long)iocb->aio_buf,
1552 iocb->aio_nbytes,
1568 compat); 1553 compat);
1569 if (ret) 1554 if (ret)
1570 goto out_put_req; 1555 goto out_put_req;
@@ -1651,10 +1636,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1651/* lookup_kiocb 1636/* lookup_kiocb
1652 * Finds a given iocb for cancellation. 1637 * Finds a given iocb for cancellation.
1653 */ 1638 */
1654static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, 1639static struct aio_kiocb *
1655 u32 key) 1640lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
1656{ 1641{
1657 struct list_head *pos; 1642 struct aio_kiocb *kiocb;
1658 1643
1659 assert_spin_locked(&ctx->ctx_lock); 1644 assert_spin_locked(&ctx->ctx_lock);
1660 1645
@@ -1662,9 +1647,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
1662 return NULL; 1647 return NULL;
1663 1648
1664 /* TODO: use a hash or array, this sucks. */ 1649 /* TODO: use a hash or array, this sucks. */
1665 list_for_each(pos, &ctx->active_reqs) { 1650 list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
1666 struct kiocb *kiocb = list_kiocb(pos); 1651 if (kiocb->ki_user_iocb == iocb)
1667 if (kiocb->ki_obj.user == iocb)
1668 return kiocb; 1652 return kiocb;
1669 } 1653 }
1670 return NULL; 1654 return NULL;
@@ -1684,7 +1668,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1684 struct io_event __user *, result) 1668 struct io_event __user *, result)
1685{ 1669{
1686 struct kioctx *ctx; 1670 struct kioctx *ctx;
1687 struct kiocb *kiocb; 1671 struct aio_kiocb *kiocb;
1688 u32 key; 1672 u32 key;
1689 int ret; 1673 int ret;
1690 1674
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079d9982..fdcb4d69f430 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/vfs.h> 16#include <linux/vfs.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/uio.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include "bfs.h" 20#include "bfs.h"
20 21
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266be67d3..2e522aed6584 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/cleancache.h> 29#include <linux/cleancache.h>
30#include <linux/aio.h>
31#include <asm/uaccess.h> 30#include <asm/uaccess.h>
32#include "internal.h" 31#include "internal.h"
33 32
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 30982bbd31c3..aee18f84e315 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/aio.h>
28#include <linux/falloc.h> 27#include <linux/falloc.h>
29#include <linux/swap.h> 28#include <linux/swap.h>
30#include <linux/writeback.h> 29#include <linux/writeback.h>
@@ -32,6 +31,7 @@
32#include <linux/compat.h> 31#include <linux/compat.h>
33#include <linux/slab.h> 32#include <linux/slab.h>
34#include <linux/btrfs.h> 33#include <linux/btrfs.h>
34#include <linux/uio.h>
35#include "ctree.h" 35#include "ctree.h"
36#include "disk-io.h" 36#include "disk-io.h"
37#include "transaction.h" 37#include "transaction.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2e732d7af52..686331f22b15 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/statfs.h> 33#include <linux/statfs.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include <linux/bit_spinlock.h> 35#include <linux/bit_spinlock.h>
37#include <linux/xattr.h> 36#include <linux/xattr.h>
38#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
43#include <linux/btrfs.h> 42#include <linux/btrfs.h>
44#include <linux/blkdev.h> 43#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h> 44#include <linux/posix_acl_xattr.h>
45#include <linux/uio.h>
46#include "ctree.h" 46#include "ctree.h"
47#include "disk-io.h" 47#include "disk-io.h"
48#include "transaction.h" 48#include "transaction.h"
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d533075a823d..139f2fea91a0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/writeback.h> 9#include <linux/writeback.h>
10#include <linux/aio.h>
11#include <linux/falloc.h> 10#include <linux/falloc.h>
12 11
13#include "super.h" 12#include "super.h"
@@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
808{ 807{
809 struct file *filp = iocb->ki_filp; 808 struct file *filp = iocb->ki_filp;
810 struct ceph_file_info *fi = filp->private_data; 809 struct ceph_file_info *fi = filp->private_data;
811 size_t len = iocb->ki_nbytes; 810 size_t len = iov_iter_count(to);
812 struct inode *inode = file_inode(filp); 811 struct inode *inode = file_inode(filp);
813 struct ceph_inode_info *ci = ceph_inode(inode); 812 struct ceph_inode_info *ci = ceph_inode(inode);
814 struct page *pinned_page = NULL; 813 struct page *pinned_page = NULL;
diff --git a/fs/dcache.c b/fs/dcache.c
index c71e3732e53b..d99736a63e3c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode,
2690 struct dentry *dentry, struct dentry *alias) 2690 struct dentry *dentry, struct dentry *alias)
2691{ 2691{
2692 struct mutex *m1 = NULL, *m2 = NULL; 2692 struct mutex *m1 = NULL, *m2 = NULL;
2693 int ret = -EBUSY; 2693 int ret = -ESTALE;
2694 2694
2695 /* If alias and dentry share a parent, then no extra locks required */ 2695 /* If alias and dentry share a parent, then no extra locks required */
2696 if (alias->d_parent == dentry->d_parent) 2696 if (alias->d_parent == dentry->d_parent)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b2e297..6fb00e3f1059 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39#include <linux/prefetch.h> 39#include <linux/prefetch.h>
40#include <linux/aio.h>
41 40
42/* 41/*
43 * How many user pages to map in one call to get_user_pages(). This determines 42 * How many user pages to map in one call to get_user_pages(). This determines
@@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
265 ret = err; 264 ret = err;
266 } 265 }
267 266
268 aio_complete(dio->iocb, ret, 0); 267 dio->iocb->ki_complete(dio->iocb, ret, 0);
269 } 268 }
270 269
271 kmem_cache_free(dio_cache, dio); 270 kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio)
1056 * operation. AIO can if it was a broken operation described above or 1055 * operation. AIO can if it was a broken operation described above or
1057 * in fact if all the bios race to complete before we get here. In 1056 * in fact if all the bios race to complete before we get here. In
1058 * that case dio_complete() translates the EIOCBQUEUED into the proper 1057 * that case dio_complete() translates the EIOCBQUEUED into the proper
1059 * return code that the caller will hand to aio_complete(). 1058 * return code that the caller will hand to ->complete().
1060 * 1059 *
1061 * This is managed by the bio_lock instead of being an atomic_t so that 1060 * This is managed by the bio_lock instead of being an atomic_t so that
1062 * completion paths can drop their ref and use the remaining count to 1061 * completion paths can drop their ref and use the remaining count to
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index fd39bad6f1bd..79675089443d 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,7 +31,6 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/compat.h> 32#include <linux/compat.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <linux/aio.h>
35#include "ecryptfs_kernel.h" 34#include "ecryptfs_kernel.h"
36 35
37/** 36/**
@@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
52 struct file *file = iocb->ki_filp; 51 struct file *file = iocb->ki_filp;
53 52
54 rc = generic_file_read_iter(iocb, to); 53 rc = generic_file_read_iter(iocb, to);
55 /*
56 * Even though this is a async interface, we need to wait
57 * for IO to finish to update atime
58 */
59 if (-EIOCBQUEUED == rc)
60 rc = wait_on_sync_kiocb(iocb);
61 if (rc >= 0) { 54 if (rc >= 0) {
62 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); 55 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
63 touch_atime(path); 56 touch_atime(path);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6434bc000125..df9d6afbc5d5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,7 +31,7 @@
31#include <linux/mpage.h> 31#include <linux/mpage.h>
32#include <linux/fiemap.h> 32#include <linux/fiemap.h>
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/aio.h> 34#include <linux/uio.h>
35#include "ext2.h" 35#include "ext2.h"
36#include "acl.h" 36#include "acl.h"
37#include "xattr.h" 37#include "xattr.h"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc49ba27..db07ffbe7c85 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/mpage.h> 28#include <linux/mpage.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/aio.h> 30#include <linux/uio.h>
31#include "ext3.h" 31#include "ext3.h"
32#include "xattr.h" 32#include "xattr.h"
33#include "acl.h" 33#include "acl.h"
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 33a09da16c9c..598abbbe6786 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,9 +23,9 @@
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/path.h> 25#include <linux/path.h>
26#include <linux/aio.h>
27#include <linux/quotaops.h> 26#include <linux/quotaops.h>
28#include <linux/pagevec.h> 27#include <linux/pagevec.h>
28#include <linux/uio.h>
29#include "ext4.h" 29#include "ext4.h"
30#include "ext4_jbd2.h" 30#include "ext4_jbd2.h"
31#include "xattr.h" 31#include "xattr.h"
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 45fe924f82bc..740c7871c117 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,9 +20,9 @@
20 * (sct@redhat.com), 1993, 1998 20 * (sct@redhat.com), 1993, 1998
21 */ 21 */
22 22
23#include <linux/aio.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "truncate.h" 24#include "truncate.h"
25#include <linux/uio.h>
26 26
27#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
28 28
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cb9a212b86f..a3f451370bef 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,7 +37,6 @@
37#include <linux/printk.h> 37#include <linux/printk.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40#include <linux/aio.h>
41#include <linux/bitops.h> 40#include <linux/bitops.h>
42 41
43#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b24a2541a9ba..464984261e69 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -18,7 +18,6 @@
18#include <linux/pagevec.h> 18#include <linux/pagevec.h>
19#include <linux/mpage.h> 19#include <linux/mpage.h>
20#include <linux/namei.h> 20#include <linux/namei.h>
21#include <linux/aio.h>
22#include <linux/uio.h> 21#include <linux/uio.h>
23#include <linux/bio.h> 22#include <linux/bio.h>
24#include <linux/workqueue.h> 23#include <linux/workqueue.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed023a750..497f8515d205 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,12 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/mpage.h> 14#include <linux/mpage.h>
15#include <linux/aio.h>
16#include <linux/writeback.h> 15#include <linux/writeback.h>
17#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
18#include <linux/blkdev.h> 17#include <linux/blkdev.h>
19#include <linux/bio.h> 18#include <linux/bio.h>
20#include <linux/prefetch.h> 19#include <linux/prefetch.h>
20#include <linux/uio.h>
21 21
22#include "f2fs.h" 22#include "f2fs.h"
23#include "node.h" 23#include "node.h"
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 497c7c5263c7..8521207de229 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -19,7 +19,6 @@
19#include <linux/mpage.h> 19#include <linux/mpage.h>
20#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
21#include <linux/mount.h> 21#include <linux/mount.h>
22#include <linux/aio.h>
23#include <linux/vfs.h> 22#include <linux/vfs.h>
24#include <linux/parser.h> 23#include <linux/parser.h>
25#include <linux/uio.h> 24#include <linux/uio.h>
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 28d0c7abba1c..b3fa05032234 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,7 +38,6 @@
38#include <linux/device.h> 38#include <linux/device.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/fs.h> 40#include <linux/fs.h>
41#include <linux/aio.h>
42#include <linux/kdev_t.h> 41#include <linux/kdev_t.h>
43#include <linux/kthread.h> 42#include <linux/kthread.h>
44#include <linux/list.h> 43#include <linux/list.h>
@@ -48,6 +47,7 @@
48#include <linux/slab.h> 47#include <linux/slab.h>
49#include <linux/stat.h> 48#include <linux/stat.h>
50#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/uio.h>
51 51
52#include "fuse_i.h" 52#include "fuse_i.h"
53 53
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 39706c57ad3c..95a2797eef66 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,6 @@
19#include <linux/pipe_fs_i.h> 19#include <linux/pipe_fs_i.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/splice.h> 21#include <linux/splice.h>
22#include <linux/aio.h>
23 22
24MODULE_ALIAS_MISCDEV(FUSE_MINOR); 23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
25MODULE_ALIAS("devname:fuse"); 24MODULE_ALIAS("devname:fuse");
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c01ec3bdcfd8..ff102cbf16ea 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,8 +15,8 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/aio.h>
19#include <linux/falloc.h> 18#include <linux/falloc.h>
19#include <linux/uio.h>
20 20
21static const struct file_operations fuse_direct_io_file_operations; 21static const struct file_operations fuse_direct_io_file_operations;
22 22
@@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
528 } 528 }
529} 529}
530 530
531static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
532{
533 if (io->err)
534 return io->err;
535
536 if (io->bytes >= 0 && io->write)
537 return -EIO;
538
539 return io->bytes < 0 ? io->size : io->bytes;
540}
541
531/** 542/**
532 * In case of short read, the caller sets 'pos' to the position of 543 * In case of short read, the caller sets 'pos' to the position of
533 * actual end of fuse request in IO request. Otherwise, if bytes_requested 544 * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
546 */ 557 */
547static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) 558static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
548{ 559{
560 bool is_sync = is_sync_kiocb(io->iocb);
549 int left; 561 int left;
550 562
551 spin_lock(&io->lock); 563 spin_lock(&io->lock);
@@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
555 io->bytes = pos; 567 io->bytes = pos;
556 568
557 left = --io->reqs; 569 left = --io->reqs;
570 if (!left && is_sync)
571 complete(io->done);
558 spin_unlock(&io->lock); 572 spin_unlock(&io->lock);
559 573
560 if (!left) { 574 if (!left && !is_sync) {
561 long res; 575 ssize_t res = fuse_get_res_by_io(io);
562 576
563 if (io->err) 577 if (res >= 0) {
564 res = io->err; 578 struct inode *inode = file_inode(io->iocb->ki_filp);
565 else if (io->bytes >= 0 && io->write) 579 struct fuse_conn *fc = get_fuse_conn(inode);
566 res = -EIO; 580 struct fuse_inode *fi = get_fuse_inode(inode);
567 else {
568 res = io->bytes < 0 ? io->size : io->bytes;
569 581
570 if (!is_sync_kiocb(io->iocb)) { 582 spin_lock(&fc->lock);
571 struct inode *inode = file_inode(io->iocb->ki_filp); 583 fi->attr_version = ++fc->attr_version;
572 struct fuse_conn *fc = get_fuse_conn(inode); 584 spin_unlock(&fc->lock);
573 struct fuse_inode *fi = get_fuse_inode(inode);
574
575 spin_lock(&fc->lock);
576 fi->attr_version = ++fc->attr_version;
577 spin_unlock(&fc->lock);
578 }
579 } 585 }
580 586
581 aio_complete(io->iocb, res, 0); 587 io->iocb->ki_complete(io->iocb, res, 0);
582 kfree(io); 588 kfree(io);
583 } 589 }
584} 590}
@@ -2801,6 +2807,7 @@ static ssize_t
2801fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 2807fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2802 loff_t offset) 2808 loff_t offset)
2803{ 2809{
2810 DECLARE_COMPLETION_ONSTACK(wait);
2804 ssize_t ret = 0; 2811 ssize_t ret = 0;
2805 struct file *file = iocb->ki_filp; 2812 struct file *file = iocb->ki_filp;
2806 struct fuse_file *ff = file->private_data; 2813 struct fuse_file *ff = file->private_data;
@@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2852 if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) 2859 if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
2853 io->async = false; 2860 io->async = false;
2854 2861
2862 if (io->async && is_sync_kiocb(iocb))
2863 io->done = &wait;
2864
2855 if (rw == WRITE) 2865 if (rw == WRITE)
2856 ret = __fuse_direct_write(io, iter, &pos); 2866 ret = __fuse_direct_write(io, iter, &pos);
2857 else 2867 else
@@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2864 if (!is_sync_kiocb(iocb)) 2874 if (!is_sync_kiocb(iocb))
2865 return -EIOCBQUEUED; 2875 return -EIOCBQUEUED;
2866 2876
2867 ret = wait_on_sync_kiocb(iocb); 2877 wait_for_completion(&wait);
2868 } else { 2878 ret = fuse_get_res_by_io(io);
2869 kfree(io);
2870 } 2879 }
2871 2880
2881 kfree(io);
2882
2872 if (rw == WRITE) { 2883 if (rw == WRITE) {
2873 if (ret > 0) 2884 if (ret > 0)
2874 fuse_write_update_size(inode, pos); 2885 fuse_write_update_size(inode, pos);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1cdfb07c1376..7354dc142a50 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -263,6 +263,7 @@ struct fuse_io_priv {
263 int err; 263 int err;
264 struct kiocb *iocb; 264 struct kiocb *iocb;
265 struct file *file; 265 struct file *file;
266 struct completion *done;
266}; 267};
267 268
268/** 269/**
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b3143064af1..1be3b061c05c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); 110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
111 if (error) 111 if (error)
112 goto out; 112 goto out;
113 113 set_cached_acl(inode, type, acl);
114 if (acl)
115 set_cached_acl(inode, type, acl);
116 else
117 forget_cached_acl(inode, type);
118out: 114out:
119 kfree(data); 115 kfree(data);
120 return error; 116 return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4ad4f94edebe..a6e6990aea39 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,7 +20,7 @@
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/aio.h> 23#include <linux/uio.h>
24#include <trace/events/writeback.h> 24#include <trace/events/writeback.h>
25 25
26#include "gfs2.h" 26#include "gfs2.h"
@@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
671 671
672 if (alloc_required) { 672 if (alloc_required) {
673 struct gfs2_alloc_parms ap = { .aflags = 0, }; 673 struct gfs2_alloc_parms ap = { .aflags = 0, };
674 error = gfs2_quota_lock_check(ip); 674 requested = data_blocks + ind_blocks;
675 ap.target = requested;
676 error = gfs2_quota_lock_check(ip, &ap);
675 if (error) 677 if (error)
676 goto out_unlock; 678 goto out_unlock;
677 679
678 requested = data_blocks + ind_blocks;
679 ap.target = requested;
680 error = gfs2_inplace_reserve(ip, &ap); 680 error = gfs2_inplace_reserve(ip, &ap);
681 if (error) 681 if (error)
682 goto out_qunlock; 682 goto out_qunlock;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945ab853e..61296ecbd0e2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
1224 1224
1225 if (gfs2_is_stuffed(ip) && 1225 if (gfs2_is_stuffed(ip) &&
1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1227 error = gfs2_quota_lock_check(ip); 1227 error = gfs2_quota_lock_check(ip, &ap);
1228 if (error) 1228 if (error)
1229 return error; 1229 return error;
1230 1230
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e32bb8e2d7e..8ec43ab5babf 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,7 +25,6 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <linux/dlm.h> 26#include <linux/dlm.h>
27#include <linux/dlm_plock.h> 27#include <linux/dlm_plock.h>
28#include <linux/aio.h>
29#include <linux/delay.h> 28#include <linux/delay.h>
30 29
31#include "gfs2.h" 30#include "gfs2.h"
@@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
429 if (ret) 428 if (ret)
430 goto out_unlock; 429 goto out_unlock;
431 430
432 ret = gfs2_quota_lock_check(ip);
433 if (ret)
434 goto out_unlock;
435 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 431 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
436 ap.target = data_blocks + ind_blocks; 432 ap.target = data_blocks + ind_blocks;
433 ret = gfs2_quota_lock_check(ip, &ap);
434 if (ret)
435 goto out_unlock;
437 ret = gfs2_inplace_reserve(ip, &ap); 436 ret = gfs2_inplace_reserve(ip, &ap);
438 if (ret) 437 if (ret)
439 goto out_quota_unlock; 438 goto out_quota_unlock;
@@ -765,22 +764,30 @@ out:
765 brelse(dibh); 764 brelse(dibh);
766 return error; 765 return error;
767} 766}
768 767/**
769static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, 768 * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
770 unsigned int *data_blocks, unsigned int *ind_blocks) 769 * blocks, determine how many bytes can be written.
770 * @ip: The inode in question.
771 * @len: Max cap of bytes. What we return in *len must be <= this.
772 * @data_blocks: Compute and return the number of data blocks needed
773 * @ind_blocks: Compute and return the number of indirect blocks needed
774 * @max_blocks: The total blocks available to work with.
775 *
776 * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
777 */
778static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
779 unsigned int *data_blocks, unsigned int *ind_blocks,
780 unsigned int max_blocks)
771{ 781{
782 loff_t max = *len;
772 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 783 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
773 unsigned int max_blocks = ip->i_rgd->rd_free_clone;
774 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); 784 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
775 785
776 for (tmp = max_data; tmp > sdp->sd_diptrs;) { 786 for (tmp = max_data; tmp > sdp->sd_diptrs;) {
777 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); 787 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
778 max_data -= tmp; 788 max_data -= tmp;
779 } 789 }
780 /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, 790
781 so it might end up with fewer data blocks */
782 if (max_data <= *data_blocks)
783 return;
784 *data_blocks = max_data; 791 *data_blocks = max_data;
785 *ind_blocks = max_blocks - max_data; 792 *ind_blocks = max_blocks - max_data;
786 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; 793 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
797 struct gfs2_inode *ip = GFS2_I(inode); 804 struct gfs2_inode *ip = GFS2_I(inode);
798 struct gfs2_alloc_parms ap = { .aflags = 0, }; 805 struct gfs2_alloc_parms ap = { .aflags = 0, };
799 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 806 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
800 loff_t bytes, max_bytes; 807 loff_t bytes, max_bytes, max_blks = UINT_MAX;
801 int error; 808 int error;
802 const loff_t pos = offset; 809 const loff_t pos = offset;
803 const loff_t count = len; 810 const loff_t count = len;
@@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
819 826
820 gfs2_size_hint(file, offset, len); 827 gfs2_size_hint(file, offset, len);
821 828
829 gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
830 ap.min_target = data_blocks + ind_blocks;
831
822 while (len > 0) { 832 while (len > 0) {
823 if (len < bytes) 833 if (len < bytes)
824 bytes = len; 834 bytes = len;
@@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
827 offset += bytes; 837 offset += bytes;
828 continue; 838 continue;
829 } 839 }
830 error = gfs2_quota_lock_check(ip); 840
841 /* We need to determine how many bytes we can actually
842 * fallocate without exceeding quota or going over the
843 * end of the fs. We start off optimistically by assuming
844 * we can write max_bytes */
845 max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
846
847 /* Since max_bytes is most likely a theoretical max, we
848 * calculate a more realistic 'bytes' to serve as a good
849 * starting point for the number of bytes we may be able
850 * to write */
851 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
852 ap.target = data_blocks + ind_blocks;
853
854 error = gfs2_quota_lock_check(ip, &ap);
831 if (error) 855 if (error)
832 return error; 856 return error;
833retry: 857 /* ap.allowed tells us how many blocks quota will allow
834 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 858 * us to write. Check if this reduces max_blks */
859 if (ap.allowed && ap.allowed < max_blks)
860 max_blks = ap.allowed;
835 861
836 ap.target = data_blocks + ind_blocks;
837 error = gfs2_inplace_reserve(ip, &ap); 862 error = gfs2_inplace_reserve(ip, &ap);
838 if (error) { 863 if (error)
839 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
840 bytes >>= 1;
841 bytes &= bsize_mask;
842 if (bytes == 0)
843 bytes = sdp->sd_sb.sb_bsize;
844 goto retry;
845 }
846 goto out_qunlock; 864 goto out_qunlock;
847 } 865
848 max_bytes = bytes; 866 /* check if the selected rgrp limits our max_blks further */
849 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, 867 if (ap.allowed && ap.allowed < max_blks)
850 &max_bytes, &data_blocks, &ind_blocks); 868 max_blks = ap.allowed;
869
870 /* Almost done. Calculate bytes that can be written using
871 * max_blks. We also recompute max_bytes, data_blocks and
872 * ind_blocks */
873 calc_max_reserv(ip, &max_bytes, &data_blocks,
874 &ind_blocks, max_blks);
851 875
852 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 876 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
853 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); 877 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -931,6 +955,22 @@ out_uninit:
931 return ret; 955 return ret;
932} 956}
933 957
958static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
959 struct file *out, loff_t *ppos,
960 size_t len, unsigned int flags)
961{
962 int error;
963 struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
964
965 error = gfs2_rs_alloc(ip);
966 if (error)
967 return (ssize_t)error;
968
969 gfs2_size_hint(out, *ppos, len);
970
971 return iter_file_splice_write(pipe, out, ppos, len, flags);
972}
973
934#ifdef CONFIG_GFS2_FS_LOCKING_DLM 974#ifdef CONFIG_GFS2_FS_LOCKING_DLM
935 975
936/** 976/**
@@ -1077,7 +1117,7 @@ const struct file_operations gfs2_file_fops = {
1077 .lock = gfs2_lock, 1117 .lock = gfs2_lock,
1078 .flock = gfs2_flock, 1118 .flock = gfs2_flock,
1079 .splice_read = generic_file_splice_read, 1119 .splice_read = generic_file_splice_read,
1080 .splice_write = iter_file_splice_write, 1120 .splice_write = gfs2_file_splice_write,
1081 .setlease = simple_nosetlease, 1121 .setlease = simple_nosetlease,
1082 .fallocate = gfs2_fallocate, 1122 .fallocate = gfs2_fallocate,
1083}; 1123};
@@ -1107,7 +1147,7 @@ const struct file_operations gfs2_file_fops_nolock = {
1107 .release = gfs2_release, 1147 .release = gfs2_release,
1108 .fsync = gfs2_fsync, 1148 .fsync = gfs2_fsync,
1109 .splice_read = generic_file_splice_read, 1149 .splice_read = generic_file_splice_read,
1110 .splice_write = iter_file_splice_write, 1150 .splice_write = gfs2_file_splice_write,
1111 .setlease = generic_setlease, 1151 .setlease = generic_setlease,
1112 .fallocate = gfs2_fallocate, 1152 .fallocate = gfs2_fallocate,
1113}; 1153};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffba056a..0fa8062f85a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = {
2047 2047
2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2049{ 2049{
2050 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2050 struct dentry *dent;
2051 if (!sdp->debugfs_dir) 2051
2052 return -ENOMEM; 2052 dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2053 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2053 if (IS_ERR_OR_NULL(dent))
2054 S_IFREG | S_IRUGO, 2054 goto fail;
2055 sdp->debugfs_dir, sdp, 2055 sdp->debugfs_dir = dent;
2056 &gfs2_glocks_fops); 2056
2057 if (!sdp->debugfs_dentry_glocks) 2057 dent = debugfs_create_file("glocks",
2058 S_IFREG | S_IRUGO,
2059 sdp->debugfs_dir, sdp,
2060 &gfs2_glocks_fops);
2061 if (IS_ERR_OR_NULL(dent))
2058 goto fail; 2062 goto fail;
2063 sdp->debugfs_dentry_glocks = dent;
2059 2064
2060 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", 2065 dent = debugfs_create_file("glstats",
2061 S_IFREG | S_IRUGO, 2066 S_IFREG | S_IRUGO,
2062 sdp->debugfs_dir, sdp, 2067 sdp->debugfs_dir, sdp,
2063 &gfs2_glstats_fops); 2068 &gfs2_glstats_fops);
2064 if (!sdp->debugfs_dentry_glstats) 2069 if (IS_ERR_OR_NULL(dent))
2065 goto fail; 2070 goto fail;
2071 sdp->debugfs_dentry_glstats = dent;
2066 2072
2067 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", 2073 dent = debugfs_create_file("sbstats",
2068 S_IFREG | S_IRUGO, 2074 S_IFREG | S_IRUGO,
2069 sdp->debugfs_dir, sdp, 2075 sdp->debugfs_dir, sdp,
2070 &gfs2_sbstats_fops); 2076 &gfs2_sbstats_fops);
2071 if (!sdp->debugfs_dentry_sbstats) 2077 if (IS_ERR_OR_NULL(dent))
2072 goto fail; 2078 goto fail;
2079 sdp->debugfs_dentry_sbstats = dent;
2073 2080
2074 return 0; 2081 return 0;
2075fail: 2082fail:
2076 gfs2_delete_debugfs_file(sdp); 2083 gfs2_delete_debugfs_file(sdp);
2077 return -ENOMEM; 2084 return dent ? PTR_ERR(dent) : -ENOMEM;
2078} 2085}
2079 2086
2080void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2087void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2100int gfs2_register_debugfs(void) 2107int gfs2_register_debugfs(void)
2101{ 2108{
2102 gfs2_root = debugfs_create_dir("gfs2", NULL); 2109 gfs2_root = debugfs_create_dir("gfs2", NULL);
2110 if (IS_ERR(gfs2_root))
2111 return PTR_ERR(gfs2_root);
2103 return gfs2_root ? 0 : -ENOMEM; 2112 return gfs2_root ? 0 : -ENOMEM;
2104} 2113}
2105 2114
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc0d634..58b75abf6ab2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@ struct gfs2_blkreserv {
301 * to the allocation code. 301 * to the allocation code.
302 */ 302 */
303struct gfs2_alloc_parms { 303struct gfs2_alloc_parms {
304 u32 target; 304 u64 target;
305 u32 min_target;
305 u32 aflags; 306 u32 aflags;
307 u64 allowed;
306}; 308};
307 309
308enum { 310enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253faac..08bc84d7e768 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; 382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
383 int error; 383 int error;
384 384
385 error = gfs2_quota_lock_check(ip); 385 error = gfs2_quota_lock_check(ip, &ap);
386 if (error) 386 if (error)
387 goto out; 387 goto out;
388 388
@@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
525 int error; 525 int error;
526 526
527 if (da->nr_blocks) { 527 if (da->nr_blocks) {
528 error = gfs2_quota_lock_check(dip); 528 error = gfs2_quota_lock_check(dip, &ap);
529 if (error) 529 if (error)
530 goto fail_quota_locks; 530 goto fail_quota_locks;
531 531
@@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
953 953
954 if (da.nr_blocks) { 954 if (da.nr_blocks) {
955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
956 error = gfs2_quota_lock_check(dip); 956 error = gfs2_quota_lock_check(dip, &ap);
957 if (error) 957 if (error)
958 goto out_gunlock; 958 goto out_gunlock;
959 959
@@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1470 1470
1471 if (da.nr_blocks) { 1471 if (da.nr_blocks) {
1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
1473 error = gfs2_quota_lock_check(ndip); 1473 error = gfs2_quota_lock_check(ndip, &ap);
1474 if (error) 1474 if (error)
1475 goto out_gunlock; 1475 goto out_gunlock;
1476 1476
@@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1669 kuid_t ouid, nuid; 1669 kuid_t ouid, nuid;
1670 kgid_t ogid, ngid; 1670 kgid_t ogid, ngid;
1671 int error; 1671 int error;
1672 struct gfs2_alloc_parms ap;
1672 1673
1673 ouid = inode->i_uid; 1674 ouid = inode->i_uid;
1674 ogid = inode->i_gid; 1675 ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1696 if (error) 1697 if (error)
1697 goto out; 1698 goto out;
1698 1699
1700 ap.target = gfs2_get_inode_blocks(&ip->i_inode);
1701
1699 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1702 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1700 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1703 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1701 error = gfs2_quota_check(ip, nuid, ngid); 1704 error = gfs2_quota_check(ip, nuid, ngid, &ap);
1702 if (error) 1705 if (error)
1703 goto out_gunlock_q; 1706 goto out_gunlock_q;
1704 } 1707 }
@@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1713 1716
1714 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1717 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1715 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1718 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1716 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1719 gfs2_quota_change(ip, -ap.target, ouid, ogid);
1717 gfs2_quota_change(ip, -blocks, ouid, ogid); 1720 gfs2_quota_change(ip, ap.target, nuid, ngid);
1718 gfs2_quota_change(ip, blocks, nuid, ngid);
1719 } 1721 }
1720 1722
1721out_end_trans: 1723out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4d1cfc..5c27e48aa76f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@ restart:
923 if (error) 923 if (error)
924 return error; 924 return error;
925 925
926 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
927 force_refresh = FORCE;
928
926 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 929 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
927 930
928 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 931 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
974 sizeof(struct gfs2_quota_data *), sort_qd, NULL); 977 sizeof(struct gfs2_quota_data *), sort_qd, NULL);
975 978
976 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 979 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
977 int force = NO_FORCE;
978 qd = ip->i_res->rs_qa_qd[x]; 980 qd = ip->i_res->rs_qa_qd[x];
979 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 981 error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
980 force = FORCE;
981 error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
982 if (error) 982 if (error)
983 break; 983 break;
984 } 984 }
@@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1094 return 0; 1094 return 0;
1095} 1095}
1096 1096
1097int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) 1097/**
1098 * gfs2_quota_check - check if allocating new blocks will exceed quota
1099 * @ip: The inode for which this check is being performed
1100 * @uid: The uid to check against
1101 * @gid: The gid to check against
1102 * @ap: The allocation parameters. ap->target contains the requested
1103 * blocks. ap->min_target, if set, contains the minimum blks
1104 * requested.
1105 *
1106 * Returns: 0 on success.
1107 * min_req = ap->min_target ? ap->min_target : ap->target;
1108 * quota must allow atleast min_req blks for success and
1109 * ap->allowed is set to the number of blocks allowed
1110 *
1111 * -EDQUOT otherwise, quota violation. ap->allowed is set to number
1112 * of blocks available.
1113 */
1114int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
1115 struct gfs2_alloc_parms *ap)
1098{ 1116{
1099 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1117 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1100 struct gfs2_quota_data *qd; 1118 struct gfs2_quota_data *qd;
1101 s64 value; 1119 s64 value, warn, limit;
1102 unsigned int x; 1120 unsigned int x;
1103 int error = 0; 1121 int error = 0;
1104 1122
1123 ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
1105 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) 1124 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
1106 return 0; 1125 return 0;
1107 1126
@@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
1115 qid_eq(qd->qd_id, make_kqid_gid(gid)))) 1134 qid_eq(qd->qd_id, make_kqid_gid(gid))))
1116 continue; 1135 continue;
1117 1136
1137 warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
1138 limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
1118 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1139 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
1119 spin_lock(&qd_lock); 1140 spin_lock(&qd_lock);
1120 value += qd->qd_change; 1141 value += qd->qd_change;
1121 spin_unlock(&qd_lock); 1142 spin_unlock(&qd_lock);
1122 1143
1123 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1144 if (limit > 0 && (limit - value) < ap->allowed)
1124 print_message(qd, "exceeded"); 1145 ap->allowed = limit - value;
1125 quota_send_warning(qd->qd_id, 1146 /* If we can't meet the target */
1126 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1147 if (limit && limit < (value + (s64)ap->target)) {
1127 1148 /* If no min_target specified or we don't meet
1128 error = -EDQUOT; 1149 * min_target, return -EDQUOT */
1129 break; 1150 if (!ap->min_target || ap->min_target > ap->allowed) {
1130 } else if (be64_to_cpu(qd->qd_qb.qb_warn) && 1151 print_message(qd, "exceeded");
1131 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && 1152 quota_send_warning(qd->qd_id,
1153 sdp->sd_vfs->s_dev,
1154 QUOTA_NL_BHARDWARN);
1155 error = -EDQUOT;
1156 break;
1157 }
1158 } else if (warn && warn < value &&
1132 time_after_eq(jiffies, qd->qd_last_warn + 1159 time_after_eq(jiffies, qd->qd_last_warn +
1133 gfs2_tune_get(sdp, 1160 gfs2_tune_get(sdp, gt_quota_warn_period)
1134 gt_quota_warn_period) * HZ)) { 1161 * HZ)) {
1135 quota_send_warning(qd->qd_id, 1162 quota_send_warning(qd->qd_id,
1136 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1163 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1137 error = print_message(qd, "warning"); 1164 error = print_message(qd, "warning");
1138 qd->qd_last_warn = jiffies; 1165 qd->qd_last_warn = jiffies;
1139 } 1166 }
1140 } 1167 }
1141
1142 return error; 1168 return error;
1143} 1169}
1144 1170
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506eb3c4a..ad04b3acae2b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip);
24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
25extern void gfs2_quota_unlock(struct gfs2_inode *ip); 25extern void gfs2_quota_unlock(struct gfs2_inode *ip);
26 26
27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
28 struct gfs2_alloc_parms *ap);
28extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 29extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
29 kuid_t uid, kgid_t gid); 30 kuid_t uid, kgid_t gid);
30 31
@@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data);
37 38
38extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); 39extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
39 40
40static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) 41static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
42 struct gfs2_alloc_parms *ap)
41{ 43{
42 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 44 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
43 int ret; 45 int ret;
@@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
48 return ret; 50 return ret;
49 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 51 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
50 return 0; 52 return 0;
51 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); 53 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
52 if (ret) 54 if (ret)
53 gfs2_quota_unlock(ip); 55 gfs2_quota_unlock(ip);
54 return ret; 56 return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207f365c..6af2396a317c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1946 * @ip: the inode to reserve space for 1946 * @ip: the inode to reserve space for
1947 * @ap: the allocation parameters 1947 * @ap: the allocation parameters
1948 * 1948 *
1949 * Returns: errno 1949 * We try our best to find an rgrp that has at least ap->target blocks
1950 * available. After a couple of passes (loops == 2), the prospects of finding
1951 * such an rgrp diminish. At this stage, we return the first rgrp that has
1952 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1953 * the number of blocks available in the chosen rgrp.
1954 *
1955 * Returns: 0 on success,
1956 * -ENOMEM if a suitable rgrp can't be found
1957 * errno otherwise
1950 */ 1958 */
1951 1959
1952int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) 1960int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1953{ 1961{
1954 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1962 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1955 struct gfs2_rgrpd *begin = NULL; 1963 struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2012 /* Skip unuseable resource groups */ 2020 /* Skip unuseable resource groups */
2013 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | 2021 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2014 GFS2_RDF_ERROR)) || 2022 GFS2_RDF_ERROR)) ||
2015 (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) 2023 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2016 goto skip_rgrp; 2024 goto skip_rgrp;
2017 2025
2018 if (sdp->sd_args.ar_rgrplvb) 2026 if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2027 goto check_rgrp; 2035 goto check_rgrp;
2028 2036
2029 /* If rgrp has enough free space, use it */ 2037 /* If rgrp has enough free space, use it */
2030 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { 2038 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2039 (loops == 2 && ap->min_target &&
2040 rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2031 ip->i_rgd = rs->rs_rbm.rgd; 2041 ip->i_rgd = rs->rs_rbm.rgd;
2042 ap->allowed = ip->i_rgd->rd_free_clone;
2032 return 0; 2043 return 0;
2033 } 2044 }
2034
2035check_rgrp: 2045check_rgrp:
2036 /* Check for unlinked inodes which can be reclaimed */ 2046 /* Check for unlinked inodes which can be reclaimed */
2037 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 2047 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4af3afd..68972ecfbb01 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
42 42
43#define GFS2_AF_ORLOV 1 43#define GFS2_AF_ORLOV 1
44extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); 44extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
45 struct gfs2_alloc_parms *ap);
45extern void gfs2_inplace_release(struct gfs2_inode *ip); 46extern void gfs2_inplace_release(struct gfs2_inode *ip);
46 47
47extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 48extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f783f787..fd260ce8869a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
732 if (error) 732 if (error)
733 return error; 733 return error;
734 734
735 error = gfs2_quota_lock_check(ip); 735 error = gfs2_quota_lock_check(ip, &ap);
736 if (error) 736 if (error)
737 return error; 737 return error;
738 738
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d0929bc81782..98d4ea45bb70 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfs_fs.h" 19#include "hfs_fs.h"
20#include "btree.h" 20#include "btree.h"
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 0cf786f2d046..f541196d4ee9 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfsplus_fs.h" 19#include "hfsplus_fs.h"
20#include "hfsplus_raw.h" 20#include "hfsplus_raw.h"
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d72817ac51f6..762c7a3cf43d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
195 /* unchecked xdatum is chained with c->xattr_unchecked */ 195 /* unchecked xdatum is chained with c->xattr_unchecked */
196 list_del_init(&xd->xindex); 196 list_del_init(&xd->xindex);
197 197
198 dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", 198 dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
199 xd->xid, xd->version); 199 xd->xid, xd->version);
200 200
201 return 0; 201 return 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index bd3df1ca3c9b..3197aed10614 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,8 +22,8 @@
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/quotaops.h> 24#include <linux/quotaops.h>
25#include <linux/uio.h>
25#include <linux/writeback.h> 26#include <linux/writeback.h>
26#include <linux/aio.h>
27#include "jfs_incore.h" 27#include "jfs_incore.h"
28#include "jfs_inode.h" 28#include "jfs_inode.h"
29#include "jfs_filsys.h" 29#include "jfs_filsys.h"
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56ae075..4cd9798f4948 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...)
102 vaf.fmt = fmt; 102 vaf.fmt = fmt;
103 vaf.va = &args; 103 vaf.va = &args;
104 104
105 pr_err("ERROR: (device %s): %pf: %pV\n", 105 pr_err("ERROR: (device %s): %ps: %pV\n",
106 sb->s_id, __builtin_return_address(0), &vaf); 106 sb->s_id, __builtin_return_address(0), &vaf);
107 107
108 va_end(args); 108 va_end(args);
diff --git a/fs/namei.c b/fs/namei.c
index c83145af4bfc..76fb76a0818b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -119,15 +119,14 @@
119 * PATH_MAX includes the nul terminator --RR. 119 * PATH_MAX includes the nul terminator --RR.
120 */ 120 */
121 121
122#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) 122#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
123 123
124struct filename * 124struct filename *
125getname_flags(const char __user *filename, int flags, int *empty) 125getname_flags(const char __user *filename, int flags, int *empty)
126{ 126{
127 struct filename *result, *err; 127 struct filename *result;
128 int len;
129 long max;
130 char *kname; 128 char *kname;
129 int len;
131 130
132 result = audit_reusename(filename); 131 result = audit_reusename(filename);
133 if (result) 132 if (result)
@@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
136 result = __getname(); 135 result = __getname();
137 if (unlikely(!result)) 136 if (unlikely(!result))
138 return ERR_PTR(-ENOMEM); 137 return ERR_PTR(-ENOMEM);
139 result->refcnt = 1;
140 138
141 /* 139 /*
142 * First, try to embed the struct filename inside the names_cache 140 * First, try to embed the struct filename inside the names_cache
143 * allocation 141 * allocation
144 */ 142 */
145 kname = (char *)result + sizeof(*result); 143 kname = (char *)result->iname;
146 result->name = kname; 144 result->name = kname;
147 result->separate = false;
148 max = EMBEDDED_NAME_MAX;
149 145
150recopy: 146 len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
151 len = strncpy_from_user(kname, filename, max);
152 if (unlikely(len < 0)) { 147 if (unlikely(len < 0)) {
153 err = ERR_PTR(len); 148 __putname(result);
154 goto error; 149 return ERR_PTR(len);
155 } 150 }
156 151
157 /* 152 /*
@@ -160,43 +155,49 @@ recopy:
160 * names_cache allocation for the pathname, and re-do the copy from 155 * names_cache allocation for the pathname, and re-do the copy from
161 * userland. 156 * userland.
162 */ 157 */
163 if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { 158 if (unlikely(len == EMBEDDED_NAME_MAX)) {
159 const size_t size = offsetof(struct filename, iname[1]);
164 kname = (char *)result; 160 kname = (char *)result;
165 161
166 result = kzalloc(sizeof(*result), GFP_KERNEL); 162 /*
167 if (!result) { 163 * size is chosen that way we to guarantee that
168 err = ERR_PTR(-ENOMEM); 164 * result->iname[0] is within the same object and that
169 result = (struct filename *)kname; 165 * kname can't be equal to result->iname, no matter what.
170 goto error; 166 */
167 result = kzalloc(size, GFP_KERNEL);
168 if (unlikely(!result)) {
169 __putname(kname);
170 return ERR_PTR(-ENOMEM);
171 } 171 }
172 result->name = kname; 172 result->name = kname;
173 result->separate = true; 173 len = strncpy_from_user(kname, filename, PATH_MAX);
174 result->refcnt = 1; 174 if (unlikely(len < 0)) {
175 max = PATH_MAX; 175 __putname(kname);
176 goto recopy; 176 kfree(result);
177 return ERR_PTR(len);
178 }
179 if (unlikely(len == PATH_MAX)) {
180 __putname(kname);
181 kfree(result);
182 return ERR_PTR(-ENAMETOOLONG);
183 }
177 } 184 }
178 185
186 result->refcnt = 1;
179 /* The empty path is special. */ 187 /* The empty path is special. */
180 if (unlikely(!len)) { 188 if (unlikely(!len)) {
181 if (empty) 189 if (empty)
182 *empty = 1; 190 *empty = 1;
183 err = ERR_PTR(-ENOENT); 191 if (!(flags & LOOKUP_EMPTY)) {
184 if (!(flags & LOOKUP_EMPTY)) 192 putname(result);
185 goto error; 193 return ERR_PTR(-ENOENT);
194 }
186 } 195 }
187 196
188 err = ERR_PTR(-ENAMETOOLONG);
189 if (unlikely(len >= PATH_MAX))
190 goto error;
191
192 result->uptr = filename; 197 result->uptr = filename;
193 result->aname = NULL; 198 result->aname = NULL;
194 audit_getname(result); 199 audit_getname(result);
195 return result; 200 return result;
196
197error:
198 putname(result);
199 return err;
200} 201}
201 202
202struct filename * 203struct filename *
@@ -216,8 +217,7 @@ getname_kernel(const char * filename)
216 return ERR_PTR(-ENOMEM); 217 return ERR_PTR(-ENOMEM);
217 218
218 if (len <= EMBEDDED_NAME_MAX) { 219 if (len <= EMBEDDED_NAME_MAX) {
219 result->name = (char *)(result) + sizeof(*result); 220 result->name = (char *)result->iname;
220 result->separate = false;
221 } else if (len <= PATH_MAX) { 221 } else if (len <= PATH_MAX) {
222 struct filename *tmp; 222 struct filename *tmp;
223 223
@@ -227,7 +227,6 @@ getname_kernel(const char * filename)
227 return ERR_PTR(-ENOMEM); 227 return ERR_PTR(-ENOMEM);
228 } 228 }
229 tmp->name = (char *)result; 229 tmp->name = (char *)result;
230 tmp->separate = true;
231 result = tmp; 230 result = tmp;
232 } else { 231 } else {
233 __putname(result); 232 __putname(result);
@@ -249,7 +248,7 @@ void putname(struct filename *name)
249 if (--name->refcnt > 0) 248 if (--name->refcnt > 0)
250 return; 249 return;
251 250
252 if (name->separate) { 251 if (name->name != name->iname) {
253 __putname(name->name); 252 __putname(name->name);
254 kfree(name); 253 kfree(name);
255 } else 254 } else
@@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1851 return err; 1850 return err;
1852} 1851}
1853 1852
1854static int path_init(int dfd, const char *name, unsigned int flags, 1853static int path_init(int dfd, const struct filename *name, unsigned int flags,
1855 struct nameidata *nd) 1854 struct nameidata *nd)
1856{ 1855{
1857 int retval = 0; 1856 int retval = 0;
1857 const char *s = name->name;
1858 1858
1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; 1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1863 if (flags & LOOKUP_ROOT) { 1863 if (flags & LOOKUP_ROOT) {
1864 struct dentry *root = nd->root.dentry; 1864 struct dentry *root = nd->root.dentry;
1865 struct inode *inode = root->d_inode; 1865 struct inode *inode = root->d_inode;
1866 if (*name) { 1866 if (*s) {
1867 if (!d_can_lookup(root)) 1867 if (!d_can_lookup(root))
1868 return -ENOTDIR; 1868 return -ENOTDIR;
1869 retval = inode_permission(inode, MAY_EXEC); 1869 retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1885 nd->root.mnt = NULL; 1885 nd->root.mnt = NULL;
1886 1886
1887 nd->m_seq = read_seqbegin(&mount_lock); 1887 nd->m_seq = read_seqbegin(&mount_lock);
1888 if (*name=='/') { 1888 if (*s == '/') {
1889 if (flags & LOOKUP_RCU) { 1889 if (flags & LOOKUP_RCU) {
1890 rcu_read_lock(); 1890 rcu_read_lock();
1891 nd->seq = set_root_rcu(nd); 1891 nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1919 1919
1920 dentry = f.file->f_path.dentry; 1920 dentry = f.file->f_path.dentry;
1921 1921
1922 if (*name) { 1922 if (*s) {
1923 if (!d_can_lookup(dentry)) { 1923 if (!d_can_lookup(dentry)) {
1924 fdput(f); 1924 fdput(f);
1925 return -ENOTDIR; 1925 return -ENOTDIR;
@@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1949 return -ECHILD; 1949 return -ECHILD;
1950done: 1950done:
1951 current->total_link_count = 0; 1951 current->total_link_count = 0;
1952 return link_path_walk(name, nd); 1952 return link_path_walk(s, nd);
1953} 1953}
1954 1954
1955static void path_cleanup(struct nameidata *nd) 1955static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
1972} 1972}
1973 1973
1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1975static int path_lookupat(int dfd, const char *name, 1975static int path_lookupat(int dfd, const struct filename *name,
1976 unsigned int flags, struct nameidata *nd) 1976 unsigned int flags, struct nameidata *nd)
1977{ 1977{
1978 struct path path; 1978 struct path path;
@@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
2027static int filename_lookup(int dfd, struct filename *name, 2027static int filename_lookup(int dfd, struct filename *name,
2028 unsigned int flags, struct nameidata *nd) 2028 unsigned int flags, struct nameidata *nd)
2029{ 2029{
2030 int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); 2030 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
2031 if (unlikely(retval == -ECHILD)) 2031 if (unlikely(retval == -ECHILD))
2032 retval = path_lookupat(dfd, name->name, flags, nd); 2032 retval = path_lookupat(dfd, name, flags, nd);
2033 if (unlikely(retval == -ESTALE)) 2033 if (unlikely(retval == -ESTALE))
2034 retval = path_lookupat(dfd, name->name, 2034 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
2035 flags | LOOKUP_REVAL, nd);
2036 2035
2037 if (likely(!retval)) 2036 if (likely(!retval))
2038 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); 2037 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
2039 return retval; 2038 return retval;
2040} 2039}
2041 2040
2042static int do_path_lookup(int dfd, const char *name,
2043 unsigned int flags, struct nameidata *nd)
2044{
2045 struct filename *filename = getname_kernel(name);
2046 int retval = PTR_ERR(filename);
2047
2048 if (!IS_ERR(filename)) {
2049 retval = filename_lookup(dfd, filename, flags, nd);
2050 putname(filename);
2051 }
2052 return retval;
2053}
2054
2055/* does lookup, returns the object with parent locked */ 2041/* does lookup, returns the object with parent locked */
2056struct dentry *kern_path_locked(const char *name, struct path *path) 2042struct dentry *kern_path_locked(const char *name, struct path *path)
2057{ 2043{
@@ -2089,9 +2075,15 @@ out:
2089int kern_path(const char *name, unsigned int flags, struct path *path) 2075int kern_path(const char *name, unsigned int flags, struct path *path)
2090{ 2076{
2091 struct nameidata nd; 2077 struct nameidata nd;
2092 int res = do_path_lookup(AT_FDCWD, name, flags, &nd); 2078 struct filename *filename = getname_kernel(name);
2093 if (!res) 2079 int res = PTR_ERR(filename);
2094 *path = nd.path; 2080
2081 if (!IS_ERR(filename)) {
2082 res = filename_lookup(AT_FDCWD, filename, flags, &nd);
2083 putname(filename);
2084 if (!res)
2085 *path = nd.path;
2086 }
2095 return res; 2087 return res;
2096} 2088}
2097EXPORT_SYMBOL(kern_path); 2089EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2108 const char *name, unsigned int flags, 2100 const char *name, unsigned int flags,
2109 struct path *path) 2101 struct path *path)
2110{ 2102{
2111 struct nameidata nd; 2103 struct filename *filename = getname_kernel(name);
2112 int err; 2104 int err = PTR_ERR(filename);
2113 nd.root.dentry = dentry; 2105
2114 nd.root.mnt = mnt;
2115 BUG_ON(flags & LOOKUP_PARENT); 2106 BUG_ON(flags & LOOKUP_PARENT);
2116 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 2107
2117 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); 2108 /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
2118 if (!err) 2109 if (!IS_ERR(filename)) {
2119 *path = nd.path; 2110 struct nameidata nd;
2111 nd.root.dentry = dentry;
2112 nd.root.mnt = mnt;
2113 err = filename_lookup(AT_FDCWD, filename,
2114 flags | LOOKUP_ROOT, &nd);
2115 if (!err)
2116 *path = nd.path;
2117 putname(filename);
2118 }
2120 return err; 2119 return err;
2121} 2120}
2122EXPORT_SYMBOL(vfs_path_lookup); 2121EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
2138 * @len: maximum length @len should be interpreted to 2137 * @len: maximum length @len should be interpreted to
2139 * 2138 *
2140 * Note that this routine is purely a helper for filesystem usage and should 2139 * Note that this routine is purely a helper for filesystem usage and should
2141 * not be called by generic code. Also note that by using this function the 2140 * not be called by generic code.
2142 * nameidata argument is passed to the filesystem methods and a filesystem
2143 * using this helper needs to be prepared for that.
2144 */ 2141 */
2145struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 2142struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2146{ 2143{
@@ -2341,7 +2338,8 @@ out:
2341 * Returns 0 and "path" will be valid on success; Returns error otherwise. 2338 * Returns 0 and "path" will be valid on success; Returns error otherwise.
2342 */ 2339 */
2343static int 2340static int
2344path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) 2341path_mountpoint(int dfd, const struct filename *name, struct path *path,
2342 unsigned int flags)
2345{ 2343{
2346 struct nameidata nd; 2344 struct nameidata nd;
2347 int err; 2345 int err;
@@ -2370,20 +2368,20 @@ out:
2370} 2368}
2371 2369
2372static int 2370static int
2373filename_mountpoint(int dfd, struct filename *s, struct path *path, 2371filename_mountpoint(int dfd, struct filename *name, struct path *path,
2374 unsigned int flags) 2372 unsigned int flags)
2375{ 2373{
2376 int error; 2374 int error;
2377 if (IS_ERR(s)) 2375 if (IS_ERR(name))
2378 return PTR_ERR(s); 2376 return PTR_ERR(name);
2379 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); 2377 error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
2380 if (unlikely(error == -ECHILD)) 2378 if (unlikely(error == -ECHILD))
2381 error = path_mountpoint(dfd, s->name, path, flags); 2379 error = path_mountpoint(dfd, name, path, flags);
2382 if (unlikely(error == -ESTALE)) 2380 if (unlikely(error == -ESTALE))
2383 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); 2381 error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
2384 if (likely(!error)) 2382 if (likely(!error))
2385 audit_inode(s, path->dentry, 0); 2383 audit_inode(name, path->dentry, 0);
2386 putname(s); 2384 putname(name);
2387 return error; 2385 return error;
2388} 2386}
2389 2387
@@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
3156 static const struct qstr name = QSTR_INIT("/", 1); 3154 static const struct qstr name = QSTR_INIT("/", 1);
3157 struct dentry *dentry, *child; 3155 struct dentry *dentry, *child;
3158 struct inode *dir; 3156 struct inode *dir;
3159 int error = path_lookupat(dfd, pathname->name, 3157 int error = path_lookupat(dfd, pathname,
3160 flags | LOOKUP_DIRECTORY, nd); 3158 flags | LOOKUP_DIRECTORY, nd);
3161 if (unlikely(error)) 3159 if (unlikely(error))
3162 return error; 3160 return error;
@@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3229 goto out; 3227 goto out;
3230 } 3228 }
3231 3229
3232 error = path_init(dfd, pathname->name, flags, nd); 3230 error = path_init(dfd, pathname, flags, nd);
3233 if (unlikely(error)) 3231 if (unlikely(error))
3234 goto out; 3232 goto out;
3235 3233
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e907c8cf732e..c3929fb2ab26 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
265 265
266 return -EINVAL; 266 return -EINVAL;
267#else 267#else
268 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); 268 VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
269 269
270 if (rw == READ) 270 if (rw == READ)
271 return nfs_file_direct_read(iocb, iter, pos); 271 return nfs_file_direct_read(iocb, iter, pos);
@@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
393 long res = (long) dreq->error; 393 long res = (long) dreq->error;
394 if (!res) 394 if (!res)
395 res = (long) dreq->count; 395 res = (long) dreq->count;
396 aio_complete(dreq->iocb, res, 0); 396 dreq->iocb->ki_complete(dreq->iocb, res, 0);
397 } 397 }
398 398
399 complete_all(&dreq->completion); 399 complete_all(&dreq->completion);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e679d24c39d3..37b15582e0de 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
26#include <linux/nfs_mount.h> 26#include <linux/nfs_mount.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/aio.h>
30#include <linux/gfp.h> 29#include <linux/gfp.h>
31#include <linux/swap.h> 30#include <linux/swap.h>
32 31
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8b5969538f39..ab4987bc637f 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -26,7 +26,7 @@
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/aio.h> 29#include <linux/uio.h>
30#include "nilfs.h" 30#include "nilfs.h"
31#include "btnode.h" 31#include "btnode.h"
32#include "segment.h" 32#include "segment.h"
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 36ae529511c4..2ff263e6d363 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
8 8
9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o 9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
10 10
11ccflags-y := -DNTFS_VERSION=\"2.1.31\" 11ccflags-y := -DNTFS_VERSION=\"2.1.32\"
12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG 12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW 13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
14 14
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1da9b2d184dc..c1da78dad1af 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -28,7 +28,6 @@
28#include <linux/swap.h> 28#include <linux/swap.h>
29#include <linux/uio.h> 29#include <linux/uio.h>
30#include <linux/writeback.h> 30#include <linux/writeback.h>
31#include <linux/aio.h>
32 31
33#include <asm/page.h> 32#include <asm/page.h>
34#include <asm/uaccess.h> 33#include <asm/uaccess.h>
@@ -329,62 +328,168 @@ err_out:
329 return err; 328 return err;
330} 329}
331 330
332/** 331static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
333 * ntfs_fault_in_pages_readable - 332 size_t *count)
334 *
335 * Fault a number of userspace pages into pagetables.
336 *
337 * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
338 * with more than two userspace pages as well as handling the single page case
339 * elegantly.
340 *
341 * If you find this difficult to understand, then think of the while loop being
342 * the following code, except that we do without the integer variable ret:
343 *
344 * do {
345 * ret = __get_user(c, uaddr);
346 * uaddr += PAGE_SIZE;
347 * } while (!ret && uaddr < end);
348 *
349 * Note, the final __get_user() may well run out-of-bounds of the user buffer,
350 * but _not_ out-of-bounds of the page the user buffer belongs to, and since
351 * this is only a read and not a write, and since it is still in the same page,
352 * it should not matter and this makes the code much simpler.
353 */
354static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
355 int bytes)
356{ 333{
357 const char __user *end; 334 loff_t pos;
358 volatile char c; 335 s64 end, ll;
359 336 ssize_t err;
360 /* Set @end to the first byte outside the last page we care about. */ 337 unsigned long flags;
361 end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); 338 struct inode *vi = file_inode(file);
362 339 ntfs_inode *base_ni, *ni = NTFS_I(vi);
363 while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) 340 ntfs_volume *vol = ni->vol;
364 ;
365}
366
367/**
368 * ntfs_fault_in_pages_readable_iovec -
369 *
370 * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
371 */
372static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
373 size_t iov_ofs, int bytes)
374{
375 do {
376 const char __user *buf;
377 unsigned len;
378 341
379 buf = iov->iov_base + iov_ofs; 342 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
380 len = iov->iov_len - iov_ofs; 343 "0x%llx, count 0x%lx.", vi->i_ino,
381 if (len > bytes) 344 (unsigned)le32_to_cpu(ni->type),
382 len = bytes; 345 (unsigned long long)*ppos, (unsigned long)*count);
383 ntfs_fault_in_pages_readable(buf, len); 346 /* We can write back this queue in page reclaim. */
384 bytes -= len; 347 current->backing_dev_info = inode_to_bdi(vi);
385 iov++; 348 err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
386 iov_ofs = 0; 349 if (unlikely(err))
387 } while (bytes); 350 goto out;
351 /*
352 * All checks have passed. Before we start doing any writing we want
353 * to abort any totally illegal writes.
354 */
355 BUG_ON(NInoMstProtected(ni));
356 BUG_ON(ni->type != AT_DATA);
357 /* If file is encrypted, deny access, just like NT4. */
358 if (NInoEncrypted(ni)) {
359 /* Only $DATA attributes can be encrypted. */
360 /*
361 * Reminder for later: Encrypted files are _always_
362 * non-resident so that the content can always be encrypted.
363 */
364 ntfs_debug("Denying write access to encrypted file.");
365 err = -EACCES;
366 goto out;
367 }
368 if (NInoCompressed(ni)) {
369 /* Only unnamed $DATA attribute can be compressed. */
370 BUG_ON(ni->name_len);
371 /*
372 * Reminder for later: If resident, the data is not actually
373 * compressed. Only on the switch to non-resident does
374 * compression kick in. This is in contrast to encrypted files
375 * (see above).
376 */
377 ntfs_error(vi->i_sb, "Writing to compressed files is not "
378 "implemented yet. Sorry.");
379 err = -EOPNOTSUPP;
380 goto out;
381 }
382 if (*count == 0)
383 goto out;
384 base_ni = ni;
385 if (NInoAttr(ni))
386 base_ni = ni->ext.base_ntfs_ino;
387 err = file_remove_suid(file);
388 if (unlikely(err))
389 goto out;
390 /*
391 * Our ->update_time method always succeeds thus file_update_time()
392 * cannot fail either so there is no need to check the return code.
393 */
394 file_update_time(file);
395 pos = *ppos;
396 /* The first byte after the last cluster being written to. */
397 end = (pos + *count + vol->cluster_size_mask) &
398 ~(u64)vol->cluster_size_mask;
399 /*
400 * If the write goes beyond the allocated size, extend the allocation
401 * to cover the whole of the write, rounded up to the nearest cluster.
402 */
403 read_lock_irqsave(&ni->size_lock, flags);
404 ll = ni->allocated_size;
405 read_unlock_irqrestore(&ni->size_lock, flags);
406 if (end > ll) {
407 /*
408 * Extend the allocation without changing the data size.
409 *
410 * Note we ensure the allocation is big enough to at least
411 * write some data but we do not require the allocation to be
412 * complete, i.e. it may be partial.
413 */
414 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
415 if (likely(ll >= 0)) {
416 BUG_ON(pos >= ll);
417 /* If the extension was partial truncate the write. */
418 if (end > ll) {
419 ntfs_debug("Truncating write to inode 0x%lx, "
420 "attribute type 0x%x, because "
421 "the allocation was only "
422 "partially extended.",
423 vi->i_ino, (unsigned)
424 le32_to_cpu(ni->type));
425 *count = ll - pos;
426 }
427 } else {
428 err = ll;
429 read_lock_irqsave(&ni->size_lock, flags);
430 ll = ni->allocated_size;
431 read_unlock_irqrestore(&ni->size_lock, flags);
432 /* Perform a partial write if possible or fail. */
433 if (pos < ll) {
434 ntfs_debug("Truncating write to inode 0x%lx "
435 "attribute type 0x%x, because "
436 "extending the allocation "
437 "failed (error %d).",
438 vi->i_ino, (unsigned)
439 le32_to_cpu(ni->type),
440 (int)-err);
441 *count = ll - pos;
442 } else {
443 if (err != -ENOSPC)
444 ntfs_error(vi->i_sb, "Cannot perform "
445 "write to inode "
446 "0x%lx, attribute "
447 "type 0x%x, because "
448 "extending the "
449 "allocation failed "
450 "(error %ld).",
451 vi->i_ino, (unsigned)
452 le32_to_cpu(ni->type),
453 (long)-err);
454 else
455 ntfs_debug("Cannot perform write to "
456 "inode 0x%lx, "
457 "attribute type 0x%x, "
458 "because there is not "
459 "space left.",
460 vi->i_ino, (unsigned)
461 le32_to_cpu(ni->type));
462 goto out;
463 }
464 }
465 }
466 /*
467 * If the write starts beyond the initialized size, extend it up to the
468 * beginning of the write and initialize all non-sparse space between
469 * the old initialized size and the new one. This automatically also
470 * increments the vfs inode->i_size to keep it above or equal to the
471 * initialized_size.
472 */
473 read_lock_irqsave(&ni->size_lock, flags);
474 ll = ni->initialized_size;
475 read_unlock_irqrestore(&ni->size_lock, flags);
476 if (pos > ll) {
477 /*
478 * Wait for ongoing direct i/o to complete before proceeding.
479 * New direct i/o cannot start as we hold i_mutex.
480 */
481 inode_dio_wait(vi);
482 err = ntfs_attr_extend_initialized(ni, pos);
483 if (unlikely(err < 0))
484 ntfs_error(vi->i_sb, "Cannot perform write to inode "
485 "0x%lx, attribute type 0x%x, because "
486 "extending the initialized size "
487 "failed (error %d).", vi->i_ino,
488 (unsigned)le32_to_cpu(ni->type),
489 (int)-err);
490 }
491out:
492 return err;
388} 493}
389 494
390/** 495/**
@@ -421,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
421 goto err_out; 526 goto err_out;
422 } 527 }
423 } 528 }
424 err = add_to_page_cache_lru(*cached_page, mapping, index, 529 err = add_to_page_cache_lru(*cached_page, mapping,
425 GFP_KERNEL); 530 index, GFP_KERNEL);
426 if (unlikely(err)) { 531 if (unlikely(err)) {
427 if (err == -EEXIST) 532 if (err == -EEXIST)
428 continue; 533 continue;
@@ -1268,180 +1373,6 @@ rl_not_mapped_enoent:
1268 return err; 1373 return err;
1269} 1374}
1270 1375
1271/*
1272 * Copy as much as we can into the pages and return the number of bytes which
1273 * were successfully copied. If a fault is encountered then clear the pages
1274 * out to (ofs + bytes) and return the number of bytes which were copied.
1275 */
1276static inline size_t ntfs_copy_from_user(struct page **pages,
1277 unsigned nr_pages, unsigned ofs, const char __user *buf,
1278 size_t bytes)
1279{
1280 struct page **last_page = pages + nr_pages;
1281 char *addr;
1282 size_t total = 0;
1283 unsigned len;
1284 int left;
1285
1286 do {
1287 len = PAGE_CACHE_SIZE - ofs;
1288 if (len > bytes)
1289 len = bytes;
1290 addr = kmap_atomic(*pages);
1291 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1292 kunmap_atomic(addr);
1293 if (unlikely(left)) {
1294 /* Do it the slow way. */
1295 addr = kmap(*pages);
1296 left = __copy_from_user(addr + ofs, buf, len);
1297 kunmap(*pages);
1298 if (unlikely(left))
1299 goto err_out;
1300 }
1301 total += len;
1302 bytes -= len;
1303 if (!bytes)
1304 break;
1305 buf += len;
1306 ofs = 0;
1307 } while (++pages < last_page);
1308out:
1309 return total;
1310err_out:
1311 total += len - left;
1312 /* Zero the rest of the target like __copy_from_user(). */
1313 while (++pages < last_page) {
1314 bytes -= len;
1315 if (!bytes)
1316 break;
1317 len = PAGE_CACHE_SIZE;
1318 if (len > bytes)
1319 len = bytes;
1320 zero_user(*pages, 0, len);
1321 }
1322 goto out;
1323}
1324
1325static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
1326 const struct iovec *iov, size_t iov_ofs, size_t bytes)
1327{
1328 size_t total = 0;
1329
1330 while (1) {
1331 const char __user *buf = iov->iov_base + iov_ofs;
1332 unsigned len;
1333 size_t left;
1334
1335 len = iov->iov_len - iov_ofs;
1336 if (len > bytes)
1337 len = bytes;
1338 left = __copy_from_user_inatomic(vaddr, buf, len);
1339 total += len;
1340 bytes -= len;
1341 vaddr += len;
1342 if (unlikely(left)) {
1343 total -= left;
1344 break;
1345 }
1346 if (!bytes)
1347 break;
1348 iov++;
1349 iov_ofs = 0;
1350 }
1351 return total;
1352}
1353
1354static inline void ntfs_set_next_iovec(const struct iovec **iovp,
1355 size_t *iov_ofsp, size_t bytes)
1356{
1357 const struct iovec *iov = *iovp;
1358 size_t iov_ofs = *iov_ofsp;
1359
1360 while (bytes) {
1361 unsigned len;
1362
1363 len = iov->iov_len - iov_ofs;
1364 if (len > bytes)
1365 len = bytes;
1366 bytes -= len;
1367 iov_ofs += len;
1368 if (iov->iov_len == iov_ofs) {
1369 iov++;
1370 iov_ofs = 0;
1371 }
1372 }
1373 *iovp = iov;
1374 *iov_ofsp = iov_ofs;
1375}
1376
1377/*
1378 * This has the same side-effects and return value as ntfs_copy_from_user().
1379 * The difference is that on a fault we need to memset the remainder of the
1380 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
1381 * single-segment behaviour.
1382 *
1383 * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
1384 * atomic and when not atomic. This is ok because it calls
1385 * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
1386 * fact, the only difference between __copy_from_user_inatomic() and
1387 * __copy_from_user() is that the latter calls might_sleep() and the former
1388 * should not zero the tail of the buffer on error. And on many architectures
1389 * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
1390 * makes no difference at all on those architectures.
1391 */
1392static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1393 unsigned nr_pages, unsigned ofs, const struct iovec **iov,
1394 size_t *iov_ofs, size_t bytes)
1395{
1396 struct page **last_page = pages + nr_pages;
1397 char *addr;
1398 size_t copied, len, total = 0;
1399
1400 do {
1401 len = PAGE_CACHE_SIZE - ofs;
1402 if (len > bytes)
1403 len = bytes;
1404 addr = kmap_atomic(*pages);
1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1406 *iov, *iov_ofs, len);
1407 kunmap_atomic(addr);
1408 if (unlikely(copied != len)) {
1409 /* Do it the slow way. */
1410 addr = kmap(*pages);
1411 copied = __ntfs_copy_from_user_iovec_inatomic(addr +
1412 ofs, *iov, *iov_ofs, len);
1413 if (unlikely(copied != len))
1414 goto err_out;
1415 kunmap(*pages);
1416 }
1417 total += len;
1418 ntfs_set_next_iovec(iov, iov_ofs, len);
1419 bytes -= len;
1420 if (!bytes)
1421 break;
1422 ofs = 0;
1423 } while (++pages < last_page);
1424out:
1425 return total;
1426err_out:
1427 BUG_ON(copied > len);
1428 /* Zero the rest of the target like __copy_from_user(). */
1429 memset(addr + ofs + copied, 0, len - copied);
1430 kunmap(*pages);
1431 total += copied;
1432 ntfs_set_next_iovec(iov, iov_ofs, copied);
1433 while (++pages < last_page) {
1434 bytes -= len;
1435 if (!bytes)
1436 break;
1437 len = PAGE_CACHE_SIZE;
1438 if (len > bytes)
1439 len = bytes;
1440 zero_user(*pages, 0, len);
1441 }
1442 goto out;
1443}
1444
1445static inline void ntfs_flush_dcache_pages(struct page **pages, 1376static inline void ntfs_flush_dcache_pages(struct page **pages,
1446 unsigned nr_pages) 1377 unsigned nr_pages)
1447{ 1378{
@@ -1762,86 +1693,83 @@ err_out:
1762 return err; 1693 return err;
1763} 1694}
1764 1695
1765static void ntfs_write_failed(struct address_space *mapping, loff_t to) 1696/*
1697 * Copy as much as we can into the pages and return the number of bytes which
1698 * were successfully copied. If a fault is encountered then clear the pages
1699 * out to (ofs + bytes) and return the number of bytes which were copied.
1700 */
1701static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
1702 unsigned ofs, struct iov_iter *i, size_t bytes)
1766{ 1703{
1767 struct inode *inode = mapping->host; 1704 struct page **last_page = pages + nr_pages;
1705 size_t total = 0;
1706 struct iov_iter data = *i;
1707 unsigned len, copied;
1768 1708
1769 if (to > inode->i_size) { 1709 do {
1770 truncate_pagecache(inode, inode->i_size); 1710 len = PAGE_CACHE_SIZE - ofs;
1771 ntfs_truncate_vfs(inode); 1711 if (len > bytes)
1772 } 1712 len = bytes;
1713 copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
1714 len);
1715 total += copied;
1716 bytes -= copied;
1717 if (!bytes)
1718 break;
1719 iov_iter_advance(&data, copied);
1720 if (copied < len)
1721 goto err;
1722 ofs = 0;
1723 } while (++pages < last_page);
1724out:
1725 return total;
1726err:
1727 /* Zero the rest of the target like __copy_from_user(). */
1728 len = PAGE_CACHE_SIZE - copied;
1729 do {
1730 if (len > bytes)
1731 len = bytes;
1732 zero_user(*pages, copied, len);
1733 bytes -= len;
1734 copied = 0;
1735 len = PAGE_CACHE_SIZE;
1736 } while (++pages < last_page);
1737 goto out;
1773} 1738}
1774 1739
1775/** 1740/**
1776 * ntfs_file_buffered_write - 1741 * ntfs_perform_write - perform buffered write to a file
1777 * 1742 * @file: file to write to
1778 * Locking: The vfs is holding ->i_mutex on the inode. 1743 * @i: iov_iter with data to write
1744 * @pos: byte offset in file at which to begin writing to
1779 */ 1745 */
1780static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, 1746static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
1781 const struct iovec *iov, unsigned long nr_segs, 1747 loff_t pos)
1782 loff_t pos, loff_t *ppos, size_t count)
1783{ 1748{
1784 struct file *file = iocb->ki_filp;
1785 struct address_space *mapping = file->f_mapping; 1749 struct address_space *mapping = file->f_mapping;
1786 struct inode *vi = mapping->host; 1750 struct inode *vi = mapping->host;
1787 ntfs_inode *ni = NTFS_I(vi); 1751 ntfs_inode *ni = NTFS_I(vi);
1788 ntfs_volume *vol = ni->vol; 1752 ntfs_volume *vol = ni->vol;
1789 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; 1753 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
1790 struct page *cached_page = NULL; 1754 struct page *cached_page = NULL;
1791 char __user *buf = NULL;
1792 s64 end, ll;
1793 VCN last_vcn; 1755 VCN last_vcn;
1794 LCN lcn; 1756 LCN lcn;
1795 unsigned long flags; 1757 size_t bytes;
1796 size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ 1758 ssize_t status, written = 0;
1797 ssize_t status, written;
1798 unsigned nr_pages; 1759 unsigned nr_pages;
1799 int err;
1800 1760
1801 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " 1761 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
1802 "pos 0x%llx, count 0x%lx.", 1762 "0x%llx, count 0x%lx.", vi->i_ino,
1803 vi->i_ino, (unsigned)le32_to_cpu(ni->type), 1763 (unsigned)le32_to_cpu(ni->type),
1804 (unsigned long long)pos, (unsigned long)count); 1764 (unsigned long long)pos,
1805 if (unlikely(!count)) 1765 (unsigned long)iov_iter_count(i));
1806 return 0;
1807 BUG_ON(NInoMstProtected(ni));
1808 /*
1809 * If the attribute is not an index root and it is encrypted or
1810 * compressed, we cannot write to it yet. Note we need to check for
1811 * AT_INDEX_ALLOCATION since this is the type of both directory and
1812 * index inodes.
1813 */
1814 if (ni->type != AT_INDEX_ALLOCATION) {
1815 /* If file is encrypted, deny access, just like NT4. */
1816 if (NInoEncrypted(ni)) {
1817 /*
1818 * Reminder for later: Encrypted files are _always_
1819 * non-resident so that the content can always be
1820 * encrypted.
1821 */
1822 ntfs_debug("Denying write access to encrypted file.");
1823 return -EACCES;
1824 }
1825 if (NInoCompressed(ni)) {
1826 /* Only unnamed $DATA attribute can be compressed. */
1827 BUG_ON(ni->type != AT_DATA);
1828 BUG_ON(ni->name_len);
1829 /*
1830 * Reminder for later: If resident, the data is not
1831 * actually compressed. Only on the switch to non-
1832 * resident does compression kick in. This is in
1833 * contrast to encrypted files (see above).
1834 */
1835 ntfs_error(vi->i_sb, "Writing to compressed files is "
1836 "not implemented yet. Sorry.");
1837 return -EOPNOTSUPP;
1838 }
1839 }
1840 /* 1766 /*
1841 * If a previous ntfs_truncate() failed, repeat it and abort if it 1767 * If a previous ntfs_truncate() failed, repeat it and abort if it
1842 * fails again. 1768 * fails again.
1843 */ 1769 */
1844 if (unlikely(NInoTruncateFailed(ni))) { 1770 if (unlikely(NInoTruncateFailed(ni))) {
1771 int err;
1772
1845 inode_dio_wait(vi); 1773 inode_dio_wait(vi);
1846 err = ntfs_truncate(vi); 1774 err = ntfs_truncate(vi);
1847 if (err || NInoTruncateFailed(ni)) { 1775 if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1855 return err; 1783 return err;
1856 } 1784 }
1857 } 1785 }
1858 /* The first byte after the write. */
1859 end = pos + count;
1860 /*
1861 * If the write goes beyond the allocated size, extend the allocation
1862 * to cover the whole of the write, rounded up to the nearest cluster.
1863 */
1864 read_lock_irqsave(&ni->size_lock, flags);
1865 ll = ni->allocated_size;
1866 read_unlock_irqrestore(&ni->size_lock, flags);
1867 if (end > ll) {
1868 /* Extend the allocation without changing the data size. */
1869 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
1870 if (likely(ll >= 0)) {
1871 BUG_ON(pos >= ll);
1872 /* If the extension was partial truncate the write. */
1873 if (end > ll) {
1874 ntfs_debug("Truncating write to inode 0x%lx, "
1875 "attribute type 0x%x, because "
1876 "the allocation was only "
1877 "partially extended.",
1878 vi->i_ino, (unsigned)
1879 le32_to_cpu(ni->type));
1880 end = ll;
1881 count = ll - pos;
1882 }
1883 } else {
1884 err = ll;
1885 read_lock_irqsave(&ni->size_lock, flags);
1886 ll = ni->allocated_size;
1887 read_unlock_irqrestore(&ni->size_lock, flags);
1888 /* Perform a partial write if possible or fail. */
1889 if (pos < ll) {
1890 ntfs_debug("Truncating write to inode 0x%lx, "
1891 "attribute type 0x%x, because "
1892 "extending the allocation "
1893 "failed (error code %i).",
1894 vi->i_ino, (unsigned)
1895 le32_to_cpu(ni->type), err);
1896 end = ll;
1897 count = ll - pos;
1898 } else {
1899 ntfs_error(vol->sb, "Cannot perform write to "
1900 "inode 0x%lx, attribute type "
1901 "0x%x, because extending the "
1902 "allocation failed (error "
1903 "code %i).", vi->i_ino,
1904 (unsigned)
1905 le32_to_cpu(ni->type), err);
1906 return err;
1907 }
1908 }
1909 }
1910 written = 0;
1911 /*
1912 * If the write starts beyond the initialized size, extend it up to the
1913 * beginning of the write and initialize all non-sparse space between
1914 * the old initialized size and the new one. This automatically also
1915 * increments the vfs inode->i_size to keep it above or equal to the
1916 * initialized_size.
1917 */
1918 read_lock_irqsave(&ni->size_lock, flags);
1919 ll = ni->initialized_size;
1920 read_unlock_irqrestore(&ni->size_lock, flags);
1921 if (pos > ll) {
1922 err = ntfs_attr_extend_initialized(ni, pos);
1923 if (err < 0) {
1924 ntfs_error(vol->sb, "Cannot perform write to inode "
1925 "0x%lx, attribute type 0x%x, because "
1926 "extending the initialized size "
1927 "failed (error code %i).", vi->i_ino,
1928 (unsigned)le32_to_cpu(ni->type), err);
1929 status = err;
1930 goto err_out;
1931 }
1932 }
1933 /* 1786 /*
1934 * Determine the number of pages per cluster for non-resident 1787 * Determine the number of pages per cluster for non-resident
1935 * attributes. 1788 * attributes.
@@ -1937,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1937 nr_pages = 1; 1790 nr_pages = 1;
1938 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) 1791 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
1939 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; 1792 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
1940 /* Finally, perform the actual write. */
1941 last_vcn = -1; 1793 last_vcn = -1;
1942 if (likely(nr_segs == 1))
1943 buf = iov->iov_base;
1944 do { 1794 do {
1945 VCN vcn; 1795 VCN vcn;
1946 pgoff_t idx, start_idx; 1796 pgoff_t idx, start_idx;
@@ -1965,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1965 vol->cluster_size_bits, false); 1815 vol->cluster_size_bits, false);
1966 up_read(&ni->runlist.lock); 1816 up_read(&ni->runlist.lock);
1967 if (unlikely(lcn < LCN_HOLE)) { 1817 if (unlikely(lcn < LCN_HOLE)) {
1968 status = -EIO;
1969 if (lcn == LCN_ENOMEM) 1818 if (lcn == LCN_ENOMEM)
1970 status = -ENOMEM; 1819 status = -ENOMEM;
1971 else 1820 else {
1821 status = -EIO;
1972 ntfs_error(vol->sb, "Cannot " 1822 ntfs_error(vol->sb, "Cannot "
1973 "perform write to " 1823 "perform write to "
1974 "inode 0x%lx, " 1824 "inode 0x%lx, "
@@ -1977,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1977 "is corrupt.", 1827 "is corrupt.",
1978 vi->i_ino, (unsigned) 1828 vi->i_ino, (unsigned)
1979 le32_to_cpu(ni->type)); 1829 le32_to_cpu(ni->type));
1830 }
1980 break; 1831 break;
1981 } 1832 }
1982 if (lcn == LCN_HOLE) { 1833 if (lcn == LCN_HOLE) {
@@ -1989,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1989 } 1840 }
1990 } 1841 }
1991 } 1842 }
1992 if (bytes > count) 1843 if (bytes > iov_iter_count(i))
1993 bytes = count; 1844 bytes = iov_iter_count(i);
1845again:
1994 /* 1846 /*
1995 * Bring in the user page(s) that we will copy from _first_. 1847 * Bring in the user page(s) that we will copy from _first_.
1996 * Otherwise there is a nasty deadlock on copying from the same 1848 * Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1999 * pages being swapped out between us bringing them into memory 1851 * pages being swapped out between us bringing them into memory
2000 * and doing the actual copying. 1852 * and doing the actual copying.
2001 */ 1853 */
2002 if (likely(nr_segs == 1)) 1854 if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
2003 ntfs_fault_in_pages_readable(buf, bytes); 1855 status = -EFAULT;
2004 else 1856 break;
2005 ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); 1857 }
2006 /* Get and lock @do_pages starting at index @start_idx. */ 1858 /* Get and lock @do_pages starting at index @start_idx. */
2007 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, 1859 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
2008 pages, &cached_page); 1860 pages, &cached_page);
@@ -2018,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
2018 status = ntfs_prepare_pages_for_non_resident_write( 1870 status = ntfs_prepare_pages_for_non_resident_write(
2019 pages, do_pages, pos, bytes); 1871 pages, do_pages, pos, bytes);
2020 if (unlikely(status)) { 1872 if (unlikely(status)) {
2021 loff_t i_size;
2022
2023 do { 1873 do {
2024 unlock_page(pages[--do_pages]); 1874 unlock_page(pages[--do_pages]);
2025 page_cache_release(pages[do_pages]); 1875 page_cache_release(pages[do_pages]);
2026 } while (do_pages); 1876 } while (do_pages);
2027 /*
2028 * The write preparation may have instantiated
2029 * allocated space outside i_size. Trim this
2030 * off again. We can ignore any errors in this
2031 * case as we will just be waisting a bit of
2032 * allocated space, which is not a disaster.
2033 */
2034 i_size = i_size_read(vi);
2035 if (pos + bytes > i_size) {
2036 ntfs_write_failed(mapping, pos + bytes);
2037 }
2038 break; 1877 break;
2039 } 1878 }
2040 } 1879 }
2041 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; 1880 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
2042 if (likely(nr_segs == 1)) { 1881 copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
2043 copied = ntfs_copy_from_user(pages + u, do_pages - u, 1882 i, bytes);
2044 ofs, buf, bytes);
2045 buf += copied;
2046 } else
2047 copied = ntfs_copy_from_user_iovec(pages + u,
2048 do_pages - u, ofs, &iov, &iov_ofs,
2049 bytes);
2050 ntfs_flush_dcache_pages(pages + u, do_pages - u); 1883 ntfs_flush_dcache_pages(pages + u, do_pages - u);
2051 status = ntfs_commit_pages_after_write(pages, do_pages, pos, 1884 status = 0;
2052 bytes); 1885 if (likely(copied == bytes)) {
2053 if (likely(!status)) { 1886 status = ntfs_commit_pages_after_write(pages, do_pages,
2054 written += copied; 1887 pos, bytes);
2055 count -= copied; 1888 if (!status)
2056 pos += copied; 1889 status = bytes;
2057 if (unlikely(copied != bytes))
2058 status = -EFAULT;
2059 } 1890 }
2060 do { 1891 do {
2061 unlock_page(pages[--do_pages]); 1892 unlock_page(pages[--do_pages]);
2062 page_cache_release(pages[do_pages]); 1893 page_cache_release(pages[do_pages]);
2063 } while (do_pages); 1894 } while (do_pages);
2064 if (unlikely(status)) 1895 if (unlikely(status < 0))
2065 break; 1896 break;
2066 balance_dirty_pages_ratelimited(mapping); 1897 copied = status;
2067 cond_resched(); 1898 cond_resched();
2068 } while (count); 1899 if (unlikely(!copied)) {
2069err_out: 1900 size_t sc;
2070 *ppos = pos; 1901
1902 /*
1903 * We failed to copy anything. Fall back to single
1904 * segment length write.
1905 *
1906 * This is needed to avoid possible livelock in the
1907 * case that all segments in the iov cannot be copied
1908 * at once without a pagefault.
1909 */
1910 sc = iov_iter_single_seg_count(i);
1911 if (bytes > sc)
1912 bytes = sc;
1913 goto again;
1914 }
1915 iov_iter_advance(i, copied);
1916 pos += copied;
1917 written += copied;
1918 balance_dirty_pages_ratelimited(mapping);
1919 if (fatal_signal_pending(current)) {
1920 status = -EINTR;
1921 break;
1922 }
1923 } while (iov_iter_count(i));
2071 if (cached_page) 1924 if (cached_page)
2072 page_cache_release(cached_page); 1925 page_cache_release(cached_page);
2073 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 1926 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
@@ -2077,59 +1930,56 @@ err_out:
2077} 1930}
2078 1931
2079/** 1932/**
2080 * ntfs_file_aio_write_nolock - 1933 * ntfs_file_write_iter_nolock - write data to a file
1934 * @iocb: IO state structure (file, offset, etc.)
1935 * @from: iov_iter with data to write
1936 *
1937 * Basically the same as __generic_file_write_iter() except that it ends
1938 * up calling ntfs_perform_write() instead of generic_perform_write() and that
1939 * O_DIRECT is not implemented.
2081 */ 1940 */
2082static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, 1941static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
2083 const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) 1942 struct iov_iter *from)
2084{ 1943{
2085 struct file *file = iocb->ki_filp; 1944 struct file *file = iocb->ki_filp;
2086 struct address_space *mapping = file->f_mapping; 1945 loff_t pos = iocb->ki_pos;
2087 struct inode *inode = mapping->host; 1946 ssize_t written = 0;
2088 loff_t pos; 1947 ssize_t err;
2089 size_t count; /* after file limit checks */ 1948 size_t count = iov_iter_count(from);
2090 ssize_t written, err;
2091 1949
2092 count = iov_length(iov, nr_segs); 1950 err = ntfs_prepare_file_for_write(file, &pos, &count);
2093 pos = *ppos; 1951 if (count && !err) {
2094 /* We can write back this queue in page reclaim. */ 1952 iov_iter_truncate(from, count);
2095 current->backing_dev_info = inode_to_bdi(inode); 1953 written = ntfs_perform_write(file, from, pos);
2096 written = 0; 1954 if (likely(written >= 0))
2097 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1955 iocb->ki_pos = pos + written;
2098 if (err) 1956 }
2099 goto out;
2100 if (!count)
2101 goto out;
2102 err = file_remove_suid(file);
2103 if (err)
2104 goto out;
2105 err = file_update_time(file);
2106 if (err)
2107 goto out;
2108 written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
2109 count);
2110out:
2111 current->backing_dev_info = NULL; 1957 current->backing_dev_info = NULL;
2112 return written ? written : err; 1958 return written ? written : err;
2113} 1959}
2114 1960
2115/** 1961/**
2116 * ntfs_file_aio_write - 1962 * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
1963 * @iocb: IO state structure
1964 * @from: iov_iter with data to write
1965 *
1966 * Basically the same as generic_file_write_iter() except that it ends up
1967 * calling ntfs_file_write_iter_nolock() instead of
1968 * __generic_file_write_iter().
2117 */ 1969 */
2118static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 1970static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2119 unsigned long nr_segs, loff_t pos)
2120{ 1971{
2121 struct file *file = iocb->ki_filp; 1972 struct file *file = iocb->ki_filp;
2122 struct address_space *mapping = file->f_mapping; 1973 struct inode *vi = file_inode(file);
2123 struct inode *inode = mapping->host;
2124 ssize_t ret; 1974 ssize_t ret;
2125 1975
2126 BUG_ON(iocb->ki_pos != pos); 1976 mutex_lock(&vi->i_mutex);
2127 1977 ret = ntfs_file_write_iter_nolock(iocb, from);
2128 mutex_lock(&inode->i_mutex); 1978 mutex_unlock(&vi->i_mutex);
2129 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2130 mutex_unlock(&inode->i_mutex);
2131 if (ret > 0) { 1979 if (ret > 0) {
2132 int err = generic_write_sync(file, iocb->ki_pos - ret, ret); 1980 ssize_t err;
1981
1982 err = generic_write_sync(file, iocb->ki_pos - ret, ret);
2133 if (err < 0) 1983 if (err < 0)
2134 ret = err; 1984 ret = err;
2135 } 1985 }
@@ -2197,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
2197#endif /* NTFS_RW */ 2047#endif /* NTFS_RW */
2198 2048
2199const struct file_operations ntfs_file_ops = { 2049const struct file_operations ntfs_file_ops = {
2200 .llseek = generic_file_llseek, /* Seek inside file. */ 2050 .llseek = generic_file_llseek,
2201 .read = new_sync_read, /* Read from file. */ 2051 .read = new_sync_read,
2202 .read_iter = generic_file_read_iter, /* Async read from file. */ 2052 .read_iter = generic_file_read_iter,
2203#ifdef NTFS_RW 2053#ifdef NTFS_RW
2204 .write = do_sync_write, /* Write to file. */ 2054 .write = new_sync_write,
2205 .aio_write = ntfs_file_aio_write, /* Async write to file. */ 2055 .write_iter = ntfs_file_write_iter,
2206 /*.release = ,*/ /* Last file is closed. See 2056 .fsync = ntfs_file_fsync,
2207 fs/ext2/file.c::
2208 ext2_release_file() for
2209 how to use this to discard
2210 preallocated space for
2211 write opened files. */
2212 .fsync = ntfs_file_fsync, /* Sync a file to disk. */
2213 /*.aio_fsync = ,*/ /* Sync all outstanding async
2214 i/o operations on a
2215 kiocb. */
2216#endif /* NTFS_RW */ 2057#endif /* NTFS_RW */
2217 /*.ioctl = ,*/ /* Perform function on the 2058 .mmap = generic_file_mmap,
2218 mounted filesystem. */ 2059 .open = ntfs_file_open,
2219 .mmap = generic_file_mmap, /* Mmap file. */ 2060 .splice_read = generic_file_splice_read,
2220 .open = ntfs_file_open, /* Open file. */
2221 .splice_read = generic_file_splice_read /* Zero-copy data send with
2222 the data source being on
2223 the ntfs partition. We do
2224 not need to care about the
2225 data destination. */
2226 /*.sendpage = ,*/ /* Zero-copy data send with
2227 the data destination being
2228 on the ntfs partition. We
2229 do not need to care about
2230 the data source. */
2231}; 2061};
2232 2062
2233const struct inode_operations ntfs_file_inode_ops = { 2063const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 898b9949d363..1d0c21df0d80 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,7 +28,6 @@
28#include <linux/quotaops.h> 28#include <linux/quotaops.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/log2.h> 30#include <linux/log2.h>
31#include <linux/aio.h>
32 31
33#include "aops.h" 32#include "aops.h"
34#include "attrib.h" 33#include "attrib.h"
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1b0463a92b17..8d2bc840c288 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/uio.h>
32 33
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
34 35
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 6cae155d54df..dd59599b022d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,7 +22,7 @@
22#ifndef OCFS2_AOPS_H 22#ifndef OCFS2_AOPS_H
23#define OCFS2_AOPS_H 23#define OCFS2_AOPS_H
24 24
25#include <linux/aio.h> 25#include <linux/fs.h>
26 26
27handle_t *ocfs2_start_walk_page_trans(struct inode *inode, 27handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
28 struct page *page, 28 struct page *page,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ba1790e52ff2..91f03ce98108 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2280 file->f_path.dentry->d_name.name, 2280 file->f_path.dentry->d_name.name,
2281 (unsigned int)from->nr_segs); /* GRRRRR */ 2281 (unsigned int)from->nr_segs); /* GRRRRR */
2282 2282
2283 if (iocb->ki_nbytes == 0) 2283 if (count == 0)
2284 return 0; 2284 return 0;
2285 2285
2286 appending = file->f_flags & O_APPEND ? 1 : 0; 2286 appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2330,8 +2330,7 @@ relock:
2330 } 2330 }
2331 2331
2332 can_do_direct = direct_io; 2332 can_do_direct = direct_io;
2333 ret = ocfs2_prepare_inode_for_write(file, ppos, 2333 ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
2334 iocb->ki_nbytes, appending,
2335 &can_do_direct, &has_refcount); 2334 &can_do_direct, &has_refcount);
2336 if (ret < 0) { 2335 if (ret < 0) {
2337 mlog_errno(ret); 2336 mlog_errno(ret);
@@ -2339,8 +2338,7 @@ relock:
2339 } 2338 }
2340 2339
2341 if (direct_io && !is_sync_kiocb(iocb)) 2340 if (direct_io && !is_sync_kiocb(iocb))
2342 unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, 2341 unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);
2343 *ppos);
2344 2342
2345 /* 2343 /*
2346 * We can't complete the direct I/O as requested, fall back to 2344 * We can't complete the direct I/O as requested, fall back to
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..6a83c47d5904 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
570 uid = make_kuid(current_user_ns(), user); 570 uid = make_kuid(current_user_ns(), user);
571 gid = make_kgid(current_user_ns(), group); 571 gid = make_kgid(current_user_ns(), group);
572 572
573retry_deleg:
573 newattrs.ia_valid = ATTR_CTIME; 574 newattrs.ia_valid = ATTR_CTIME;
574 if (user != (uid_t) -1) { 575 if (user != (uid_t) -1) {
575 if (!uid_valid(uid)) 576 if (!uid_valid(uid))
@@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
586 if (!S_ISDIR(inode->i_mode)) 587 if (!S_ISDIR(inode->i_mode))
587 newattrs.ia_valid |= 588 newattrs.ia_valid |=
588 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 589 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
589retry_deleg:
590 mutex_lock(&inode->i_mutex); 590 mutex_lock(&inode->i_mutex);
591 error = security_path_chown(path, uid, gid); 591 error = security_path_chown(path, uid, gid);
592 if (!error) 592 if (!error)
@@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
988 return ERR_PTR(err); 988 return ERR_PTR(err);
989 if (flags & O_CREAT) 989 if (flags & O_CREAT)
990 return ERR_PTR(-EINVAL); 990 return ERR_PTR(-EINVAL);
991 if (!filename && (flags & O_DIRECTORY))
992 if (!dentry->d_inode->i_op->lookup)
993 return ERR_PTR(-ENOTDIR);
994 return do_file_open_root(dentry, mnt, filename, &op); 991 return do_file_open_root(dentry, mnt, filename, &op);
995} 992}
996EXPORT_SYMBOL(file_open_root); 993EXPORT_SYMBOL(file_open_root);
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e58e2a6..2d084f2d0b83 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23#include <linux/fcntl.h> 23#include <linux/fcntl.h>
24#include <linux/aio.h>
25 24
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
27#include <asm/ioctls.h> 26#include <asm/ioctls.h>
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373128e9..44a549beeafa 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev)
539 mem_address = pdata->mem_address; 539 mem_address = pdata->mem_address;
540 record_size = pdata->record_size; 540 record_size = pdata->record_size;
541 dump_oops = pdata->dump_oops; 541 dump_oops = pdata->dump_oops;
542 ramoops_console_size = pdata->console_size;
543 ramoops_pmsg_size = pdata->pmsg_size;
544 ramoops_ftrace_size = pdata->ftrace_size;
542 545
543 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", 546 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
544 cxt->size, (unsigned long long)cxt->phys_addr, 547 cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/read_write.c b/fs/read_write.c
index 8e1b68786d66..69128b378646 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
9#include <linux/fcntl.h> 9#include <linux/fcntl.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/aio.h>
13#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
14#include <linux/security.h> 13#include <linux/security.h>
15#include <linux/export.h> 14#include <linux/export.h>
@@ -343,13 +342,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
343 342
344 init_sync_kiocb(&kiocb, file); 343 init_sync_kiocb(&kiocb, file);
345 kiocb.ki_pos = *ppos; 344 kiocb.ki_pos = *ppos;
346 kiocb.ki_nbytes = iov_iter_count(iter);
347 345
348 iter->type |= READ; 346 iter->type |= READ;
349 ret = file->f_op->read_iter(&kiocb, iter); 347 ret = file->f_op->read_iter(&kiocb, iter);
350 if (ret == -EIOCBQUEUED) 348 BUG_ON(ret == -EIOCBQUEUED);
351 ret = wait_on_sync_kiocb(&kiocb);
352
353 if (ret > 0) 349 if (ret > 0)
354 *ppos = kiocb.ki_pos; 350 *ppos = kiocb.ki_pos;
355 return ret; 351 return ret;
@@ -366,13 +362,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
366 362
367 init_sync_kiocb(&kiocb, file); 363 init_sync_kiocb(&kiocb, file);
368 kiocb.ki_pos = *ppos; 364 kiocb.ki_pos = *ppos;
369 kiocb.ki_nbytes = iov_iter_count(iter);
370 365
371 iter->type |= WRITE; 366 iter->type |= WRITE;
372 ret = file->f_op->write_iter(&kiocb, iter); 367 ret = file->f_op->write_iter(&kiocb, iter);
373 if (ret == -EIOCBQUEUED) 368 BUG_ON(ret == -EIOCBQUEUED);
374 ret = wait_on_sync_kiocb(&kiocb);
375
376 if (ret > 0) 369 if (ret > 0)
377 *ppos = kiocb.ki_pos; 370 *ppos = kiocb.ki_pos;
378 return ret; 371 return ret;
@@ -426,11 +419,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
426 419
427 init_sync_kiocb(&kiocb, filp); 420 init_sync_kiocb(&kiocb, filp);
428 kiocb.ki_pos = *ppos; 421 kiocb.ki_pos = *ppos;
429 kiocb.ki_nbytes = len;
430 422
431 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 423 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
432 if (-EIOCBQUEUED == ret) 424 BUG_ON(ret == -EIOCBQUEUED);
433 ret = wait_on_sync_kiocb(&kiocb);
434 *ppos = kiocb.ki_pos; 425 *ppos = kiocb.ki_pos;
435 return ret; 426 return ret;
436} 427}
@@ -446,12 +437,10 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
446 437
447 init_sync_kiocb(&kiocb, filp); 438 init_sync_kiocb(&kiocb, filp);
448 kiocb.ki_pos = *ppos; 439 kiocb.ki_pos = *ppos;
449 kiocb.ki_nbytes = len;
450 iov_iter_init(&iter, READ, &iov, 1, len); 440 iov_iter_init(&iter, READ, &iov, 1, len);
451 441
452 ret = filp->f_op->read_iter(&kiocb, &iter); 442 ret = filp->f_op->read_iter(&kiocb, &iter);
453 if (-EIOCBQUEUED == ret) 443 BUG_ON(ret == -EIOCBQUEUED);
454 ret = wait_on_sync_kiocb(&kiocb);
455 *ppos = kiocb.ki_pos; 444 *ppos = kiocb.ki_pos;
456 return ret; 445 return ret;
457} 446}
@@ -510,11 +499,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
510 499
511 init_sync_kiocb(&kiocb, filp); 500 init_sync_kiocb(&kiocb, filp);
512 kiocb.ki_pos = *ppos; 501 kiocb.ki_pos = *ppos;
513 kiocb.ki_nbytes = len;
514 502
515 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 503 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
516 if (-EIOCBQUEUED == ret) 504 BUG_ON(ret == -EIOCBQUEUED);
517 ret = wait_on_sync_kiocb(&kiocb);
518 *ppos = kiocb.ki_pos; 505 *ppos = kiocb.ki_pos;
519 return ret; 506 return ret;
520} 507}
@@ -530,12 +517,10 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
530 517
531 init_sync_kiocb(&kiocb, filp); 518 init_sync_kiocb(&kiocb, filp);
532 kiocb.ki_pos = *ppos; 519 kiocb.ki_pos = *ppos;
533 kiocb.ki_nbytes = len;
534 iov_iter_init(&iter, WRITE, &iov, 1, len); 520 iov_iter_init(&iter, WRITE, &iov, 1, len);
535 521
536 ret = filp->f_op->write_iter(&kiocb, &iter); 522 ret = filp->f_op->write_iter(&kiocb, &iter);
537 if (-EIOCBQUEUED == ret) 523 BUG_ON(ret == -EIOCBQUEUED);
538 ret = wait_on_sync_kiocb(&kiocb);
539 *ppos = kiocb.ki_pos; 524 *ppos = kiocb.ki_pos;
540 return ret; 525 return ret;
541} 526}
@@ -710,60 +695,47 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
710} 695}
711EXPORT_SYMBOL(iov_shorten); 696EXPORT_SYMBOL(iov_shorten);
712 697
713static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, 698static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
714 unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) 699 loff_t *ppos, iter_fn_t fn)
715{ 700{
716 struct kiocb kiocb; 701 struct kiocb kiocb;
717 struct iov_iter iter;
718 ssize_t ret; 702 ssize_t ret;
719 703
720 init_sync_kiocb(&kiocb, filp); 704 init_sync_kiocb(&kiocb, filp);
721 kiocb.ki_pos = *ppos; 705 kiocb.ki_pos = *ppos;
722 kiocb.ki_nbytes = len;
723 706
724 iov_iter_init(&iter, rw, iov, nr_segs, len); 707 ret = fn(&kiocb, iter);
725 ret = fn(&kiocb, &iter); 708 BUG_ON(ret == -EIOCBQUEUED);
726 if (ret == -EIOCBQUEUED)
727 ret = wait_on_sync_kiocb(&kiocb);
728 *ppos = kiocb.ki_pos; 709 *ppos = kiocb.ki_pos;
729 return ret; 710 return ret;
730} 711}
731 712
732static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 713static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter,
733 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 714 loff_t *ppos, iov_fn_t fn)
734{ 715{
735 struct kiocb kiocb; 716 struct kiocb kiocb;
736 ssize_t ret; 717 ssize_t ret;
737 718
738 init_sync_kiocb(&kiocb, filp); 719 init_sync_kiocb(&kiocb, filp);
739 kiocb.ki_pos = *ppos; 720 kiocb.ki_pos = *ppos;
740 kiocb.ki_nbytes = len;
741 721
742 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 722 ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos);
743 if (ret == -EIOCBQUEUED) 723 BUG_ON(ret == -EIOCBQUEUED);
744 ret = wait_on_sync_kiocb(&kiocb);
745 *ppos = kiocb.ki_pos; 724 *ppos = kiocb.ki_pos;
746 return ret; 725 return ret;
747} 726}
748 727
749/* Do it by hand, with file-ops */ 728/* Do it by hand, with file-ops */
750static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 729static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
751 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 730 loff_t *ppos, io_fn_t fn)
752{ 731{
753 struct iovec *vector = iov;
754 ssize_t ret = 0; 732 ssize_t ret = 0;
755 733
756 while (nr_segs > 0) { 734 while (iov_iter_count(iter)) {
757 void __user *base; 735 struct iovec iovec = iov_iter_iovec(iter);
758 size_t len;
759 ssize_t nr; 736 ssize_t nr;
760 737
761 base = vector->iov_base; 738 nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
762 len = vector->iov_len;
763 vector++;
764 nr_segs--;
765
766 nr = fn(filp, base, len, ppos);
767 739
768 if (nr < 0) { 740 if (nr < 0) {
769 if (!ret) 741 if (!ret)
@@ -771,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
771 break; 743 break;
772 } 744 }
773 ret += nr; 745 ret += nr;
774 if (nr != len) 746 if (nr != iovec.iov_len)
775 break; 747 break;
748 iov_iter_advance(iter, nr);
776 } 749 }
777 750
778 return ret; 751 return ret;
@@ -863,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file,
863 size_t tot_len; 836 size_t tot_len;
864 struct iovec iovstack[UIO_FASTIOV]; 837 struct iovec iovstack[UIO_FASTIOV];
865 struct iovec *iov = iovstack; 838 struct iovec *iov = iovstack;
839 struct iov_iter iter;
866 ssize_t ret; 840 ssize_t ret;
867 io_fn_t fn; 841 io_fn_t fn;
868 iov_fn_t fnv; 842 iov_fn_t fnv;
869 iter_fn_t iter_fn; 843 iter_fn_t iter_fn;
870 844
871 ret = rw_copy_check_uvector(type, uvector, nr_segs, 845 ret = import_iovec(type, uvector, nr_segs,
872 ARRAY_SIZE(iovstack), iovstack, &iov); 846 ARRAY_SIZE(iovstack), &iov, &iter);
873 if (ret <= 0) 847 if (ret < 0)
874 goto out; 848 return ret;
875 849
876 tot_len = ret; 850 tot_len = iov_iter_count(&iter);
851 if (!tot_len)
852 goto out;
877 ret = rw_verify_area(type, file, pos, tot_len); 853 ret = rw_verify_area(type, file, pos, tot_len);
878 if (ret < 0) 854 if (ret < 0)
879 goto out; 855 goto out;
@@ -891,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file,
891 } 867 }
892 868
893 if (iter_fn) 869 if (iter_fn)
894 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 870 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
895 pos, iter_fn);
896 else if (fnv) 871 else if (fnv)
897 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 872 ret = do_sync_readv_writev(file, &iter, pos, fnv);
898 pos, fnv);
899 else 873 else
900 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 874 ret = do_loop_readv_writev(file, &iter, pos, fn);
901 875
902 if (type != READ) 876 if (type != READ)
903 file_end_write(file); 877 file_end_write(file);
904 878
905out: 879out:
906 if (iov != iovstack) 880 kfree(iov);
907 kfree(iov);
908 if ((ret + (type == READ)) > 0) { 881 if ((ret + (type == READ)) > 0) {
909 if (type == READ) 882 if (type == READ)
910 fsnotify_access(file); 883 fsnotify_access(file);
@@ -1043,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1043 compat_ssize_t tot_len; 1016 compat_ssize_t tot_len;
1044 struct iovec iovstack[UIO_FASTIOV]; 1017 struct iovec iovstack[UIO_FASTIOV];
1045 struct iovec *iov = iovstack; 1018 struct iovec *iov = iovstack;
1019 struct iov_iter iter;
1046 ssize_t ret; 1020 ssize_t ret;
1047 io_fn_t fn; 1021 io_fn_t fn;
1048 iov_fn_t fnv; 1022 iov_fn_t fnv;
1049 iter_fn_t iter_fn; 1023 iter_fn_t iter_fn;
1050 1024
1051 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 1025 ret = compat_import_iovec(type, uvector, nr_segs,
1052 UIO_FASTIOV, iovstack, &iov); 1026 UIO_FASTIOV, &iov, &iter);
1053 if (ret <= 0) 1027 if (ret < 0)
1054 goto out; 1028 return ret;
1055 1029
1056 tot_len = ret; 1030 tot_len = iov_iter_count(&iter);
1031 if (!tot_len)
1032 goto out;
1057 ret = rw_verify_area(type, file, pos, tot_len); 1033 ret = rw_verify_area(type, file, pos, tot_len);
1058 if (ret < 0) 1034 if (ret < 0)
1059 goto out; 1035 goto out;
@@ -1071,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1071 } 1047 }
1072 1048
1073 if (iter_fn) 1049 if (iter_fn)
1074 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 1050 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
1075 pos, iter_fn);
1076 else if (fnv) 1051 else if (fnv)
1077 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 1052 ret = do_sync_readv_writev(file, &iter, pos, fnv);
1078 pos, fnv);
1079 else 1053 else
1080 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 1054 ret = do_loop_readv_writev(file, &iter, pos, fn);
1081 1055
1082 if (type != READ) 1056 if (type != READ)
1083 file_end_write(file); 1057 file_end_write(file);
1084 1058
1085out: 1059out:
1086 if (iov != iovstack) 1060 kfree(iov);
1087 kfree(iov);
1088 if ((ret + (type == READ)) > 0) { 1061 if ((ret + (type == READ)) > 0) {
1089 if (type == READ) 1062 if (type == READ)
1090 fsnotify_access(file); 1063 fsnotify_access(file);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e72401e1f995..9312b7842e03 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,7 +18,7 @@
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/aio.h> 21#include <linux/uio.h>
22 22
23int reiserfs_commit_write(struct file *f, struct page *page, 23int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
diff --git a/fs/splice.c b/fs/splice.c
index 7968da96bebb..41cbb16299e0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,7 +32,6 @@
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h> 33#include <linux/socket.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include "internal.h" 35#include "internal.h"
37 36
38/* 37/*
@@ -1534,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
1534 struct iovec iovstack[UIO_FASTIOV]; 1533 struct iovec iovstack[UIO_FASTIOV];
1535 struct iovec *iov = iovstack; 1534 struct iovec *iov = iovstack;
1536 struct iov_iter iter; 1535 struct iov_iter iter;
1537 ssize_t count;
1538 1536
1539 pipe = get_pipe_info(file); 1537 pipe = get_pipe_info(file);
1540 if (!pipe) 1538 if (!pipe)
1541 return -EBADF; 1539 return -EBADF;
1542 1540
1543 ret = rw_copy_check_uvector(READ, uiov, nr_segs, 1541 ret = import_iovec(READ, uiov, nr_segs,
1544 ARRAY_SIZE(iovstack), iovstack, &iov); 1542 ARRAY_SIZE(iovstack), &iov, &iter);
1545 if (ret <= 0) 1543 if (ret < 0)
1546 goto out; 1544 return ret;
1547
1548 count = ret;
1549 iov_iter_init(&iter, READ, iov, nr_segs, count);
1550 1545
1546 sd.total_len = iov_iter_count(&iter);
1551 sd.len = 0; 1547 sd.len = 0;
1552 sd.total_len = count;
1553 sd.flags = flags; 1548 sd.flags = flags;
1554 sd.u.data = &iter; 1549 sd.u.data = &iter;
1555 sd.pos = 0; 1550 sd.pos = 0;
1556 1551
1557 pipe_lock(pipe); 1552 if (sd.total_len) {
1558 ret = __splice_from_pipe(pipe, &sd, pipe_to_user); 1553 pipe_lock(pipe);
1559 pipe_unlock(pipe); 1554 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
1560 1555 pipe_unlock(pipe);
1561out: 1556 }
1562 if (iov != iovstack)
1563 kfree(iov);
1564 1557
1558 kfree(iov);
1565 return ret; 1559 return ret;
1566} 1560}
1567 1561
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3cef9927..19636af5e75c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
66{ 66{
67 int retval; 67 int retval;
68 68
69 retval = security_inode_getattr(path->mnt, path->dentry); 69 retval = security_inode_getattr(path);
70 if (retval) 70 if (retval)
71 return retval; 71 return retval;
72 return vfs_getattr_nosec(path, stat); 72 return vfs_getattr_nosec(path, stat);
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 000000000000..82fa35b656c4
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
1tracefs-objs := inode.o
2
3obj-$(CONFIG_TRACING) += tracefs.o
4
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..d92bdf3b079a
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
1/*
2 * inode.c - part of tracefs, a pseudo file system for activating tracing
3 *
4 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
5 *
6 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * tracefs is the file system that is used by the tracing infrastructure.
13 *
14 */
15
16#include <linux/module.h>
17#include <linux/fs.h>
18#include <linux/mount.h>
19#include <linux/kobject.h>
20#include <linux/namei.h>
21#include <linux/tracefs.h>
22#include <linux/fsnotify.h>
23#include <linux/seq_file.h>
24#include <linux/parser.h>
25#include <linux/magic.h>
26#include <linux/slab.h>
27
28#define TRACEFS_DEFAULT_MODE 0700
29
30static struct vfsmount *tracefs_mount;
31static int tracefs_mount_count;
32static bool tracefs_registered;
33
34static ssize_t default_read_file(struct file *file, char __user *buf,
35 size_t count, loff_t *ppos)
36{
37 return 0;
38}
39
40static ssize_t default_write_file(struct file *file, const char __user *buf,
41 size_t count, loff_t *ppos)
42{
43 return count;
44}
45
46static const struct file_operations tracefs_file_operations = {
47 .read = default_read_file,
48 .write = default_write_file,
49 .open = simple_open,
50 .llseek = noop_llseek,
51};
52
53static struct tracefs_dir_ops {
54 int (*mkdir)(const char *name);
55 int (*rmdir)(const char *name);
56} tracefs_ops;
57
58static char *get_dname(struct dentry *dentry)
59{
60 const char *dname;
61 char *name;
62 int len = dentry->d_name.len;
63
64 dname = dentry->d_name.name;
65 name = kmalloc(len + 1, GFP_KERNEL);
66 if (!name)
67 return NULL;
68 memcpy(name, dname, len);
69 name[len] = 0;
70 return name;
71}
72
73static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
74{
75 char *name;
76 int ret;
77
78 name = get_dname(dentry);
79 if (!name)
80 return -ENOMEM;
81
82 /*
83 * The mkdir call can call the generic functions that create
84 * the files within the tracefs system. It is up to the individual
85 * mkdir routine to handle races.
86 */
87 mutex_unlock(&inode->i_mutex);
88 ret = tracefs_ops.mkdir(name);
89 mutex_lock(&inode->i_mutex);
90
91 kfree(name);
92
93 return ret;
94}
95
96static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
97{
98 char *name;
99 int ret;
100
101 name = get_dname(dentry);
102 if (!name)
103 return -ENOMEM;
104
105 /*
106 * The rmdir call can call the generic functions that create
107 * the files within the tracefs system. It is up to the individual
108 * rmdir routine to handle races.
109 * This time we need to unlock not only the parent (inode) but
110 * also the directory that is being deleted.
111 */
112 mutex_unlock(&inode->i_mutex);
113 mutex_unlock(&dentry->d_inode->i_mutex);
114
115 ret = tracefs_ops.rmdir(name);
116
117 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
118 mutex_lock(&dentry->d_inode->i_mutex);
119
120 kfree(name);
121
122 return ret;
123}
124
125static const struct inode_operations tracefs_dir_inode_operations = {
126 .lookup = simple_lookup,
127 .mkdir = tracefs_syscall_mkdir,
128 .rmdir = tracefs_syscall_rmdir,
129};
130
131static struct inode *tracefs_get_inode(struct super_block *sb)
132{
133 struct inode *inode = new_inode(sb);
134 if (inode) {
135 inode->i_ino = get_next_ino();
136 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
137 }
138 return inode;
139}
140
141struct tracefs_mount_opts {
142 kuid_t uid;
143 kgid_t gid;
144 umode_t mode;
145};
146
147enum {
148 Opt_uid,
149 Opt_gid,
150 Opt_mode,
151 Opt_err
152};
153
154static const match_table_t tokens = {
155 {Opt_uid, "uid=%u"},
156 {Opt_gid, "gid=%u"},
157 {Opt_mode, "mode=%o"},
158 {Opt_err, NULL}
159};
160
161struct tracefs_fs_info {
162 struct tracefs_mount_opts mount_opts;
163};
164
165static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
166{
167 substring_t args[MAX_OPT_ARGS];
168 int option;
169 int token;
170 kuid_t uid;
171 kgid_t gid;
172 char *p;
173
174 opts->mode = TRACEFS_DEFAULT_MODE;
175
176 while ((p = strsep(&data, ",")) != NULL) {
177 if (!*p)
178 continue;
179
180 token = match_token(p, tokens, args);
181 switch (token) {
182 case Opt_uid:
183 if (match_int(&args[0], &option))
184 return -EINVAL;
185 uid = make_kuid(current_user_ns(), option);
186 if (!uid_valid(uid))
187 return -EINVAL;
188 opts->uid = uid;
189 break;
190 case Opt_gid:
191 if (match_int(&args[0], &option))
192 return -EINVAL;
193 gid = make_kgid(current_user_ns(), option);
194 if (!gid_valid(gid))
195 return -EINVAL;
196 opts->gid = gid;
197 break;
198 case Opt_mode:
199 if (match_octal(&args[0], &option))
200 return -EINVAL;
201 opts->mode = option & S_IALLUGO;
202 break;
203 /*
204 * We might like to report bad mount options here;
205 * but traditionally tracefs has ignored all mount options
206 */
207 }
208 }
209
210 return 0;
211}
212
213static int tracefs_apply_options(struct super_block *sb)
214{
215 struct tracefs_fs_info *fsi = sb->s_fs_info;
216 struct inode *inode = sb->s_root->d_inode;
217 struct tracefs_mount_opts *opts = &fsi->mount_opts;
218
219 inode->i_mode &= ~S_IALLUGO;
220 inode->i_mode |= opts->mode;
221
222 inode->i_uid = opts->uid;
223 inode->i_gid = opts->gid;
224
225 return 0;
226}
227
228static int tracefs_remount(struct super_block *sb, int *flags, char *data)
229{
230 int err;
231 struct tracefs_fs_info *fsi = sb->s_fs_info;
232
233 sync_filesystem(sb);
234 err = tracefs_parse_options(data, &fsi->mount_opts);
235 if (err)
236 goto fail;
237
238 tracefs_apply_options(sb);
239
240fail:
241 return err;
242}
243
244static int tracefs_show_options(struct seq_file *m, struct dentry *root)
245{
246 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
247 struct tracefs_mount_opts *opts = &fsi->mount_opts;
248
249 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
250 seq_printf(m, ",uid=%u",
251 from_kuid_munged(&init_user_ns, opts->uid));
252 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
253 seq_printf(m, ",gid=%u",
254 from_kgid_munged(&init_user_ns, opts->gid));
255 if (opts->mode != TRACEFS_DEFAULT_MODE)
256 seq_printf(m, ",mode=%o", opts->mode);
257
258 return 0;
259}
260
261static const struct super_operations tracefs_super_operations = {
262 .statfs = simple_statfs,
263 .remount_fs = tracefs_remount,
264 .show_options = tracefs_show_options,
265};
266
267static int trace_fill_super(struct super_block *sb, void *data, int silent)
268{
269 static struct tree_descr trace_files[] = {{""}};
270 struct tracefs_fs_info *fsi;
271 int err;
272
273 save_mount_options(sb, data);
274
275 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
276 sb->s_fs_info = fsi;
277 if (!fsi) {
278 err = -ENOMEM;
279 goto fail;
280 }
281
282 err = tracefs_parse_options(data, &fsi->mount_opts);
283 if (err)
284 goto fail;
285
286 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
287 if (err)
288 goto fail;
289
290 sb->s_op = &tracefs_super_operations;
291
292 tracefs_apply_options(sb);
293
294 return 0;
295
296fail:
297 kfree(fsi);
298 sb->s_fs_info = NULL;
299 return err;
300}
301
302static struct dentry *trace_mount(struct file_system_type *fs_type,
303 int flags, const char *dev_name,
304 void *data)
305{
306 return mount_single(fs_type, flags, data, trace_fill_super);
307}
308
309static struct file_system_type trace_fs_type = {
310 .owner = THIS_MODULE,
311 .name = "tracefs",
312 .mount = trace_mount,
313 .kill_sb = kill_litter_super,
314};
315MODULE_ALIAS_FS("tracefs");
316
317static struct dentry *start_creating(const char *name, struct dentry *parent)
318{
319 struct dentry *dentry;
320 int error;
321
322 pr_debug("tracefs: creating file '%s'\n",name);
323
324 error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
325 &tracefs_mount_count);
326 if (error)
327 return ERR_PTR(error);
328
329 /* If the parent is not specified, we create it in the root.
330 * We need the root dentry to do this, which is in the super
331 * block. A pointer to that is in the struct vfsmount that we
332 * have around.
333 */
334 if (!parent)
335 parent = tracefs_mount->mnt_root;
336
337 mutex_lock(&parent->d_inode->i_mutex);
338 dentry = lookup_one_len(name, parent, strlen(name));
339 if (!IS_ERR(dentry) && dentry->d_inode) {
340 dput(dentry);
341 dentry = ERR_PTR(-EEXIST);
342 }
343 if (IS_ERR(dentry))
344 mutex_unlock(&parent->d_inode->i_mutex);
345 return dentry;
346}
347
348static struct dentry *failed_creating(struct dentry *dentry)
349{
350 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
351 dput(dentry);
352 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
353 return NULL;
354}
355
356static struct dentry *end_creating(struct dentry *dentry)
357{
358 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
359 return dentry;
360}
361
362/**
363 * tracefs_create_file - create a file in the tracefs filesystem
364 * @name: a pointer to a string containing the name of the file to create.
365 * @mode: the permission that the file should have.
366 * @parent: a pointer to the parent dentry for this file. This should be a
367 * directory dentry if set. If this parameter is NULL, then the
368 * file will be created in the root of the tracefs filesystem.
369 * @data: a pointer to something that the caller will want to get to later
370 * on. The inode.i_private pointer will point to this value on
371 * the open() call.
372 * @fops: a pointer to a struct file_operations that should be used for
373 * this file.
374 *
375 * This is the basic "create a file" function for tracefs. It allows for a
376 * wide range of flexibility in creating a file, or a directory (if you want
377 * to create a directory, the tracefs_create_dir() function is
378 * recommended to be used instead.)
379 *
380 * This function will return a pointer to a dentry if it succeeds. This
381 * pointer must be passed to the tracefs_remove() function when the file is
382 * to be removed (no automatic cleanup happens if your module is unloaded,
383 * you are responsible here.) If an error occurs, %NULL will be returned.
384 *
385 * If tracefs is not enabled in the kernel, the value -%ENODEV will be
386 * returned.
387 */
388struct dentry *tracefs_create_file(const char *name, umode_t mode,
389 struct dentry *parent, void *data,
390 const struct file_operations *fops)
391{
392 struct dentry *dentry;
393 struct inode *inode;
394
395 if (!(mode & S_IFMT))
396 mode |= S_IFREG;
397 BUG_ON(!S_ISREG(mode));
398 dentry = start_creating(name, parent);
399
400 if (IS_ERR(dentry))
401 return NULL;
402
403 inode = tracefs_get_inode(dentry->d_sb);
404 if (unlikely(!inode))
405 return failed_creating(dentry);
406
407 inode->i_mode = mode;
408 inode->i_fop = fops ? fops : &tracefs_file_operations;
409 inode->i_private = data;
410 d_instantiate(dentry, inode);
411 fsnotify_create(dentry->d_parent->d_inode, dentry);
412 return end_creating(dentry);
413}
414
415static struct dentry *__create_dir(const char *name, struct dentry *parent,
416 const struct inode_operations *ops)
417{
418 struct dentry *dentry = start_creating(name, parent);
419 struct inode *inode;
420
421 if (IS_ERR(dentry))
422 return NULL;
423
424 inode = tracefs_get_inode(dentry->d_sb);
425 if (unlikely(!inode))
426 return failed_creating(dentry);
427
428 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
429 inode->i_op = ops;
430 inode->i_fop = &simple_dir_operations;
431
432 /* directory inodes start off with i_nlink == 2 (for "." entry) */
433 inc_nlink(inode);
434 d_instantiate(dentry, inode);
435 inc_nlink(dentry->d_parent->d_inode);
436 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
437 return end_creating(dentry);
438}
439
440/**
441 * tracefs_create_dir - create a directory in the tracefs filesystem
442 * @name: a pointer to a string containing the name of the directory to
443 * create.
444 * @parent: a pointer to the parent dentry for this file. This should be a
445 * directory dentry if set. If this parameter is NULL, then the
446 * directory will be created in the root of the tracefs filesystem.
447 *
448 * This function creates a directory in tracefs with the given name.
449 *
450 * This function will return a pointer to a dentry if it succeeds. This
451 * pointer must be passed to the tracefs_remove() function when the file is
452 * to be removed. If an error occurs, %NULL will be returned.
453 *
454 * If tracing is not enabled in the kernel, the value -%ENODEV will be
455 * returned.
456 */
457struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
458{
459 return __create_dir(name, parent, &simple_dir_inode_operations);
460}
461
462/**
463 * tracefs_create_instance_dir - create the tracing instances directory
464 * @name: The name of the instances directory to create
465 * @parent: The parent directory that the instances directory will exist
466 * @mkdir: The function to call when a mkdir is performed.
467 * @rmdir: The function to call when a rmdir is performed.
468 *
469 * Only one instances directory is allowed.
470 *
471 * The instances directory is special as it allows for mkdir and rmdir to
472 * to be done by userspace. When a mkdir or rmdir is performed, the inode
473 * locks are released and the methhods passed in (@mkdir and @rmdir) are
474 * called without locks and with the name of the directory being created
475 * within the instances directory.
476 *
477 * Returns the dentry of the instances directory.
478 */
479struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
480 int (*mkdir)(const char *name),
481 int (*rmdir)(const char *name))
482{
483 struct dentry *dentry;
484
485 /* Only allow one instance of the instances directory. */
486 if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
487 return NULL;
488
489 dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
490 if (!dentry)
491 return NULL;
492
493 tracefs_ops.mkdir = mkdir;
494 tracefs_ops.rmdir = rmdir;
495
496 return dentry;
497}
498
499static inline int tracefs_positive(struct dentry *dentry)
500{
501 return dentry->d_inode && !d_unhashed(dentry);
502}
503
504static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
505{
506 int ret = 0;
507
508 if (tracefs_positive(dentry)) {
509 if (dentry->d_inode) {
510 dget(dentry);
511 switch (dentry->d_inode->i_mode & S_IFMT) {
512 case S_IFDIR:
513 ret = simple_rmdir(parent->d_inode, dentry);
514 break;
515 default:
516 simple_unlink(parent->d_inode, dentry);
517 break;
518 }
519 if (!ret)
520 d_delete(dentry);
521 dput(dentry);
522 }
523 }
524 return ret;
525}
526
527/**
528 * tracefs_remove - removes a file or directory from the tracefs filesystem
529 * @dentry: a pointer to a the dentry of the file or directory to be
530 * removed.
531 *
532 * This function removes a file or directory in tracefs that was previously
533 * created with a call to another tracefs function (like
534 * tracefs_create_file() or variants thereof.)
535 */
536void tracefs_remove(struct dentry *dentry)
537{
538 struct dentry *parent;
539 int ret;
540
541 if (IS_ERR_OR_NULL(dentry))
542 return;
543
544 parent = dentry->d_parent;
545 if (!parent || !parent->d_inode)
546 return;
547
548 mutex_lock(&parent->d_inode->i_mutex);
549 ret = __tracefs_remove(dentry, parent);
550 mutex_unlock(&parent->d_inode->i_mutex);
551 if (!ret)
552 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
553}
554
555/**
556 * tracefs_remove_recursive - recursively removes a directory
557 * @dentry: a pointer to a the dentry of the directory to be removed.
558 *
559 * This function recursively removes a directory tree in tracefs that
560 * was previously created with a call to another tracefs function
561 * (like tracefs_create_file() or variants thereof.)
562 */
563void tracefs_remove_recursive(struct dentry *dentry)
564{
565 struct dentry *child, *parent;
566
567 if (IS_ERR_OR_NULL(dentry))
568 return;
569
570 parent = dentry->d_parent;
571 if (!parent || !parent->d_inode)
572 return;
573
574 parent = dentry;
575 down:
576 mutex_lock(&parent->d_inode->i_mutex);
577 loop:
578 /*
579 * The parent->d_subdirs is protected by the d_lock. Outside that
580 * lock, the child can be unlinked and set to be freed which can
581 * use the d_u.d_child as the rcu head and corrupt this list.
582 */
583 spin_lock(&parent->d_lock);
584 list_for_each_entry(child, &parent->d_subdirs, d_child) {
585 if (!tracefs_positive(child))
586 continue;
587
588 /* perhaps simple_empty(child) makes more sense */
589 if (!list_empty(&child->d_subdirs)) {
590 spin_unlock(&parent->d_lock);
591 mutex_unlock(&parent->d_inode->i_mutex);
592 parent = child;
593 goto down;
594 }
595
596 spin_unlock(&parent->d_lock);
597
598 if (!__tracefs_remove(child, parent))
599 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
600
601 /*
602 * The parent->d_lock protects agaist child from unlinking
603 * from d_subdirs. When releasing the parent->d_lock we can
604 * no longer trust that the next pointer is valid.
605 * Restart the loop. We'll skip this one with the
606 * tracefs_positive() check.
607 */
608 goto loop;
609 }
610 spin_unlock(&parent->d_lock);
611
612 mutex_unlock(&parent->d_inode->i_mutex);
613 child = parent;
614 parent = parent->d_parent;
615 mutex_lock(&parent->d_inode->i_mutex);
616
617 if (child != dentry)
618 /* go up */
619 goto loop;
620
621 if (!__tracefs_remove(child, parent))
622 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
623 mutex_unlock(&parent->d_inode->i_mutex);
624}
625
626/**
627 * tracefs_initialized - Tells whether tracefs has been registered
628 */
629bool tracefs_initialized(void)
630{
631 return tracefs_registered;
632}
633
634static struct kobject *trace_kobj;
635
636static int __init tracefs_init(void)
637{
638 int retval;
639
640 trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
641 if (!trace_kobj)
642 return -EINVAL;
643
644 retval = register_filesystem(&trace_fs_type);
645 if (!retval)
646 tracefs_registered = true;
647
648 return retval;
649}
650core_initcall(tracefs_init);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e627c0acf626..c3d15fe83403 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,7 +50,6 @@
50 */ 50 */
51 51
52#include "ubifs.h" 52#include "ubifs.h"
53#include <linux/aio.h>
54#include <linux/mount.h> 53#include <linux/mount.h>
55#include <linux/namei.h> 54#include <linux/namei.h>
56#include <linux/slab.h> 55#include <linux/slab.h>
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 08f3555fbeac..7f885cc8b0b7 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,7 +34,7 @@
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/buffer_head.h> 36#include <linux/buffer_head.h>
37#include <linux/aio.h> 37#include <linux/uio.h>
38 38
39#include "udf_i.h" 39#include "udf_i.h"
40#include "udf_sb.h" 40#include "udf_sb.h"
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
122 struct file *file = iocb->ki_filp; 122 struct file *file = iocb->ki_filp;
123 struct inode *inode = file_inode(file); 123 struct inode *inode = file_inode(file);
124 int err, pos; 124 int err, pos;
125 size_t count = iocb->ki_nbytes; 125 size_t count = iov_iter_count(from);
126 struct udf_inode_info *iinfo = UDF_I(inode); 126 struct udf_inode_info *iinfo = UDF_I(inode);
127 127
128 mutex_lock(&inode->i_mutex); 128 mutex_lock(&inode->i_mutex);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a445d599098d..9c1fbd23913d 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,7 +38,7 @@
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/crc-itu-t.h> 39#include <linux/crc-itu-t.h>
40#include <linux/mpage.h> 40#include <linux/mpage.h>
41#include <linux/aio.h> 41#include <linux/uio.h>
42 42
43#include "udf_i.h" 43#include "udf_i.h"
44#include "udf_sb.h" 44#include "udf_sb.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1b8704..4f8cdc59bc38 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,7 +31,6 @@
31#include "xfs_bmap.h" 31#include "xfs_bmap.h"
32#include "xfs_bmap_util.h" 32#include "xfs_bmap_util.h"
33#include "xfs_bmap_btree.h" 33#include "xfs_bmap_btree.h"
34#include <linux/aio.h>
35#include <linux/gfp.h> 34#include <linux/gfp.h>
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/pagevec.h> 36#include <linux/pagevec.h>
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a2e1cb8a568b..f44212fae653 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,7 +38,6 @@
38#include "xfs_icache.h" 38#include "xfs_icache.h"
39#include "xfs_pnfs.h" 39#include "xfs_pnfs.h"
40 40
41#include <linux/aio.h>
42#include <linux/dcache.h> 41#include <linux/dcache.h>
43#include <linux/falloc.h> 42#include <linux/falloc.h>
44#include <linux/pagevec.h> 43#include <linux/pagevec.h>