summaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2015-04-11 22:24:41 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2015-04-11 22:24:41 -0400
commitc0fec3a98bd6c4d992f191ee1aa0b3599213f3d4 (patch)
tree24b4533146ce10bcf4af6fca0971311ece523d58 /fs/aio.c
parentc1b8940b42bb6487b10f2267a96b486276ce9ff7 (diff)
parente2e40f2c1ed433c5e224525c8c862fd32e5d3df2 (diff)
Merge branch 'iocb' into for-next
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c150
1 files changed, 83 insertions, 67 deletions
diff --git a/fs/aio.c b/fs/aio.c
index f8e52a1854c1..435ca29eca31 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -151,6 +151,38 @@ struct kioctx {
151 unsigned id; 151 unsigned id;
152}; 152};
153 153
154/*
155 * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
156 * cancelled or completed (this makes a certain amount of sense because
157 * successful cancellation - io_cancel() - does deliver the completion to
158 * userspace).
159 *
160 * And since most things don't implement kiocb cancellation and we'd really like
161 * kiocb completion to be lockless when possible, we use ki_cancel to
162 * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
163 * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
164 */
165#define KIOCB_CANCELLED ((void *) (~0ULL))
166
167struct aio_kiocb {
168 struct kiocb common;
169
170 struct kioctx *ki_ctx;
171 kiocb_cancel_fn *ki_cancel;
172
173 struct iocb __user *ki_user_iocb; /* user's aiocb */
174 __u64 ki_user_data; /* user's data for completion */
175
176 struct list_head ki_list; /* the aio core uses this
177 * for cancellation */
178
179 /*
180 * If the aio_resfd field of the userspace iocb is not zero,
181 * this is the underlying eventfd context to deliver events to.
182 */
183 struct eventfd_ctx *ki_eventfd;
184};
185
154/*------ sysctl variables----*/ 186/*------ sysctl variables----*/
155static DEFINE_SPINLOCK(aio_nr_lock); 187static DEFINE_SPINLOCK(aio_nr_lock);
156unsigned long aio_nr; /* current system wide number of aio requests */ 188unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
220 if (IS_ERR(aio_mnt)) 252 if (IS_ERR(aio_mnt))
221 panic("Failed to create aio fs mount."); 253 panic("Failed to create aio fs mount.");
222 254
223 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 255 kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
224 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 256 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
225 257
226 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); 258 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -480,8 +512,9 @@ static int aio_setup_ring(struct kioctx *ctx)
480#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 512#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
481#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 513#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
482 514
483void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) 515void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
484{ 516{
517 struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
485 struct kioctx *ctx = req->ki_ctx; 518 struct kioctx *ctx = req->ki_ctx;
486 unsigned long flags; 519 unsigned long flags;
487 520
@@ -496,7 +529,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
496} 529}
497EXPORT_SYMBOL(kiocb_set_cancel_fn); 530EXPORT_SYMBOL(kiocb_set_cancel_fn);
498 531
499static int kiocb_cancel(struct kiocb *kiocb) 532static int kiocb_cancel(struct aio_kiocb *kiocb)
500{ 533{
501 kiocb_cancel_fn *old, *cancel; 534 kiocb_cancel_fn *old, *cancel;
502 535
@@ -514,7 +547,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
514 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); 547 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
515 } while (cancel != old); 548 } while (cancel != old);
516 549
517 return cancel(kiocb); 550 return cancel(&kiocb->common);
518} 551}
519 552
520static void free_ioctx(struct work_struct *work) 553static void free_ioctx(struct work_struct *work)
@@ -550,13 +583,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
550static void free_ioctx_users(struct percpu_ref *ref) 583static void free_ioctx_users(struct percpu_ref *ref)
551{ 584{
552 struct kioctx *ctx = container_of(ref, struct kioctx, users); 585 struct kioctx *ctx = container_of(ref, struct kioctx, users);
553 struct kiocb *req; 586 struct aio_kiocb *req;
554 587
555 spin_lock_irq(&ctx->ctx_lock); 588 spin_lock_irq(&ctx->ctx_lock);
556 589
557 while (!list_empty(&ctx->active_reqs)) { 590 while (!list_empty(&ctx->active_reqs)) {
558 req = list_first_entry(&ctx->active_reqs, 591 req = list_first_entry(&ctx->active_reqs,
559 struct kiocb, ki_list); 592 struct aio_kiocb, ki_list);
560 593
561 list_del_init(&req->ki_list); 594 list_del_init(&req->ki_list);
562 kiocb_cancel(req); 595 kiocb_cancel(req);
@@ -778,22 +811,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
778 return 0; 811 return 0;
779} 812}
780 813
781/* wait_on_sync_kiocb:
782 * Waits on the given sync kiocb to complete.
783 */
784ssize_t wait_on_sync_kiocb(struct kiocb *req)
785{
786 while (!req->ki_ctx) {
787 set_current_state(TASK_UNINTERRUPTIBLE);
788 if (req->ki_ctx)
789 break;
790 io_schedule();
791 }
792 __set_current_state(TASK_RUNNING);
793 return req->ki_user_data;
794}
795EXPORT_SYMBOL(wait_on_sync_kiocb);
796
797/* 814/*
798 * exit_aio: called when the last user of mm goes away. At this point, there is 815 * exit_aio: called when the last user of mm goes away. At this point, there is
799 * no way for any new requests to be submited or any of the io_* syscalls to be 816 * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -948,9 +965,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
948 * Allocate a slot for an aio request. 965 * Allocate a slot for an aio request.
949 * Returns NULL if no requests are free. 966 * Returns NULL if no requests are free.
950 */ 967 */
951static inline struct kiocb *aio_get_req(struct kioctx *ctx) 968static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
952{ 969{
953 struct kiocb *req; 970 struct aio_kiocb *req;
954 971
955 if (!get_reqs_available(ctx)) { 972 if (!get_reqs_available(ctx)) {
956 user_refill_reqs_available(ctx); 973 user_refill_reqs_available(ctx);
@@ -971,10 +988,10 @@ out_put:
971 return NULL; 988 return NULL;
972} 989}
973 990
974static void kiocb_free(struct kiocb *req) 991static void kiocb_free(struct aio_kiocb *req)
975{ 992{
976 if (req->ki_filp) 993 if (req->common.ki_filp)
977 fput(req->ki_filp); 994 fput(req->common.ki_filp);
978 if (req->ki_eventfd != NULL) 995 if (req->ki_eventfd != NULL)
979 eventfd_ctx_put(req->ki_eventfd); 996 eventfd_ctx_put(req->ki_eventfd);
980 kmem_cache_free(kiocb_cachep, req); 997 kmem_cache_free(kiocb_cachep, req);
@@ -1010,8 +1027,9 @@ out:
1010/* aio_complete 1027/* aio_complete
1011 * Called when the io request on the given iocb is complete. 1028 * Called when the io request on the given iocb is complete.
1012 */ 1029 */
1013void aio_complete(struct kiocb *iocb, long res, long res2) 1030static void aio_complete(struct kiocb *kiocb, long res, long res2)
1014{ 1031{
1032 struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
1015 struct kioctx *ctx = iocb->ki_ctx; 1033 struct kioctx *ctx = iocb->ki_ctx;
1016 struct aio_ring *ring; 1034 struct aio_ring *ring;
1017 struct io_event *ev_page, *event; 1035 struct io_event *ev_page, *event;
@@ -1025,13 +1043,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1025 * ref, no other paths have a way to get another ref 1043 * ref, no other paths have a way to get another ref
1026 * - the sync task helpfully left a reference to itself in the iocb 1044 * - the sync task helpfully left a reference to itself in the iocb
1027 */ 1045 */
1028 if (is_sync_kiocb(iocb)) { 1046 BUG_ON(is_sync_kiocb(kiocb));
1029 iocb->ki_user_data = res;
1030 smp_wmb();
1031 iocb->ki_ctx = ERR_PTR(-EXDEV);
1032 wake_up_process(iocb->ki_obj.tsk);
1033 return;
1034 }
1035 1047
1036 if (iocb->ki_list.next) { 1048 if (iocb->ki_list.next) {
1037 unsigned long flags; 1049 unsigned long flags;
@@ -1057,7 +1069,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1057 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1069 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1058 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 1070 event = ev_page + pos % AIO_EVENTS_PER_PAGE;
1059 1071
1060 event->obj = (u64)(unsigned long)iocb->ki_obj.user; 1072 event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
1061 event->data = iocb->ki_user_data; 1073 event->data = iocb->ki_user_data;
1062 event->res = res; 1074 event->res = res;
1063 event->res2 = res2; 1075 event->res2 = res2;
@@ -1066,7 +1078,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1066 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1078 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1067 1079
1068 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", 1080 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
1069 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, 1081 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
1070 res, res2); 1082 res, res2);
1071 1083
1072 /* after flagging the request as done, we 1084 /* after flagging the request as done, we
@@ -1113,7 +1125,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1113 1125
1114 percpu_ref_put(&ctx->reqs); 1126 percpu_ref_put(&ctx->reqs);
1115} 1127}
1116EXPORT_SYMBOL(aio_complete);
1117 1128
1118/* aio_read_events_ring 1129/* aio_read_events_ring
1119 * Pull an event off of the ioctx's event ring. Returns the number of 1130 * Pull an event off of the ioctx's event ring. Returns the number of
@@ -1344,12 +1355,13 @@ typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
1344static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, 1355static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
1345 int rw, char __user *buf, 1356 int rw, char __user *buf,
1346 unsigned long *nr_segs, 1357 unsigned long *nr_segs,
1358 size_t *len,
1347 struct iovec **iovec, 1359 struct iovec **iovec,
1348 bool compat) 1360 bool compat)
1349{ 1361{
1350 ssize_t ret; 1362 ssize_t ret;
1351 1363
1352 *nr_segs = kiocb->ki_nbytes; 1364 *nr_segs = *len;
1353 1365
1354#ifdef CONFIG_COMPAT 1366#ifdef CONFIG_COMPAT
1355 if (compat) 1367 if (compat)
@@ -1364,21 +1376,22 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
1364 if (ret < 0) 1376 if (ret < 0)
1365 return ret; 1377 return ret;
1366 1378
1367 /* ki_nbytes now reflect bytes instead of segs */ 1379 /* len now reflect bytes instead of segs */
1368 kiocb->ki_nbytes = ret; 1380 *len = ret;
1369 return 0; 1381 return 0;
1370} 1382}
1371 1383
1372static ssize_t aio_setup_single_vector(struct kiocb *kiocb, 1384static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1373 int rw, char __user *buf, 1385 int rw, char __user *buf,
1374 unsigned long *nr_segs, 1386 unsigned long *nr_segs,
1387 size_t len,
1375 struct iovec *iovec) 1388 struct iovec *iovec)
1376{ 1389{
1377 if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) 1390 if (unlikely(!access_ok(!rw, buf, len)))
1378 return -EFAULT; 1391 return -EFAULT;
1379 1392
1380 iovec->iov_base = buf; 1393 iovec->iov_base = buf;
1381 iovec->iov_len = kiocb->ki_nbytes; 1394 iovec->iov_len = len;
1382 *nr_segs = 1; 1395 *nr_segs = 1;
1383 return 0; 1396 return 0;
1384} 1397}
@@ -1388,7 +1401,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1388 * Performs the initial checks and io submission. 1401 * Performs the initial checks and io submission.
1389 */ 1402 */
1390static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, 1403static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1391 char __user *buf, bool compat) 1404 char __user *buf, size_t len, bool compat)
1392{ 1405{
1393 struct file *file = req->ki_filp; 1406 struct file *file = req->ki_filp;
1394 ssize_t ret; 1407 ssize_t ret;
@@ -1423,21 +1436,21 @@ rw_common:
1423 if (!rw_op && !iter_op) 1436 if (!rw_op && !iter_op)
1424 return -EINVAL; 1437 return -EINVAL;
1425 1438
1426 ret = (opcode == IOCB_CMD_PREADV || 1439 if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
1427 opcode == IOCB_CMD_PWRITEV) 1440 ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs,
1428 ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, 1441 &len, &iovec, compat);
1429 &iovec, compat) 1442 else
1430 : aio_setup_single_vector(req, rw, buf, &nr_segs, 1443 ret = aio_setup_single_vector(req, rw, buf, &nr_segs,
1431 iovec); 1444 len, iovec);
1432 if (!ret) 1445 if (!ret)
1433 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1446 ret = rw_verify_area(rw, file, &req->ki_pos, len);
1434 if (ret < 0) { 1447 if (ret < 0) {
1435 if (iovec != inline_vecs) 1448 if (iovec != inline_vecs)
1436 kfree(iovec); 1449 kfree(iovec);
1437 return ret; 1450 return ret;
1438 } 1451 }
1439 1452
1440 req->ki_nbytes = ret; 1453 len = ret;
1441 1454
1442 /* XXX: move/kill - rw_verify_area()? */ 1455 /* XXX: move/kill - rw_verify_area()? */
1443 /* This matches the pread()/pwrite() logic */ 1456 /* This matches the pread()/pwrite() logic */
@@ -1450,7 +1463,7 @@ rw_common:
1450 file_start_write(file); 1463 file_start_write(file);
1451 1464
1452 if (iter_op) { 1465 if (iter_op) {
1453 iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); 1466 iov_iter_init(&iter, rw, iovec, nr_segs, len);
1454 ret = iter_op(req, &iter); 1467 ret = iter_op(req, &iter);
1455 } else { 1468 } else {
1456 ret = rw_op(req, iovec, nr_segs, req->ki_pos); 1469 ret = rw_op(req, iovec, nr_segs, req->ki_pos);
@@ -1500,7 +1513,7 @@ rw_common:
1500static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1513static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1501 struct iocb *iocb, bool compat) 1514 struct iocb *iocb, bool compat)
1502{ 1515{
1503 struct kiocb *req; 1516 struct aio_kiocb *req;
1504 ssize_t ret; 1517 ssize_t ret;
1505 1518
1506 /* enforce forwards compatibility on users */ 1519 /* enforce forwards compatibility on users */
@@ -1523,11 +1536,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1523 if (unlikely(!req)) 1536 if (unlikely(!req))
1524 return -EAGAIN; 1537 return -EAGAIN;
1525 1538
1526 req->ki_filp = fget(iocb->aio_fildes); 1539 req->common.ki_filp = fget(iocb->aio_fildes);
1527 if (unlikely(!req->ki_filp)) { 1540 if (unlikely(!req->common.ki_filp)) {
1528 ret = -EBADF; 1541 ret = -EBADF;
1529 goto out_put_req; 1542 goto out_put_req;
1530 } 1543 }
1544 req->common.ki_pos = iocb->aio_offset;
1545 req->common.ki_complete = aio_complete;
1546 req->common.ki_flags = 0;
1531 1547
1532 if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1548 if (iocb->aio_flags & IOCB_FLAG_RESFD) {
1533 /* 1549 /*
@@ -1542,6 +1558,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1542 req->ki_eventfd = NULL; 1558 req->ki_eventfd = NULL;
1543 goto out_put_req; 1559 goto out_put_req;
1544 } 1560 }
1561
1562 req->common.ki_flags |= IOCB_EVENTFD;
1545 } 1563 }
1546 1564
1547 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); 1565 ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1550,13 +1568,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1550 goto out_put_req; 1568 goto out_put_req;
1551 } 1569 }
1552 1570
1553 req->ki_obj.user = user_iocb; 1571 req->ki_user_iocb = user_iocb;
1554 req->ki_user_data = iocb->aio_data; 1572 req->ki_user_data = iocb->aio_data;
1555 req->ki_pos = iocb->aio_offset;
1556 req->ki_nbytes = iocb->aio_nbytes;
1557 1573
1558 ret = aio_run_iocb(req, iocb->aio_lio_opcode, 1574 ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
1559 (char __user *)(unsigned long)iocb->aio_buf, 1575 (char __user *)(unsigned long)iocb->aio_buf,
1576 iocb->aio_nbytes,
1560 compat); 1577 compat);
1561 if (ret) 1578 if (ret)
1562 goto out_put_req; 1579 goto out_put_req;
@@ -1643,10 +1660,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1643/* lookup_kiocb 1660/* lookup_kiocb
1644 * Finds a given iocb for cancellation. 1661 * Finds a given iocb for cancellation.
1645 */ 1662 */
1646static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, 1663static struct aio_kiocb *
1647 u32 key) 1664lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
1648{ 1665{
1649 struct list_head *pos; 1666 struct aio_kiocb *kiocb;
1650 1667
1651 assert_spin_locked(&ctx->ctx_lock); 1668 assert_spin_locked(&ctx->ctx_lock);
1652 1669
@@ -1654,9 +1671,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
1654 return NULL; 1671 return NULL;
1655 1672
1656 /* TODO: use a hash or array, this sucks. */ 1673 /* TODO: use a hash or array, this sucks. */
1657 list_for_each(pos, &ctx->active_reqs) { 1674 list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
1658 struct kiocb *kiocb = list_kiocb(pos); 1675 if (kiocb->ki_user_iocb == iocb)
1659 if (kiocb->ki_obj.user == iocb)
1660 return kiocb; 1676 return kiocb;
1661 } 1677 }
1662 return NULL; 1678 return NULL;
@@ -1676,7 +1692,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1676 struct io_event __user *, result) 1692 struct io_event __user *, result)
1677{ 1693{
1678 struct kioctx *ctx; 1694 struct kioctx *ctx;
1679 struct kiocb *kiocb; 1695 struct aio_kiocb *kiocb;
1680 u32 key; 1696 u32 key;
1681 int ret; 1697 int ret;
1682 1698