diff options
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 94 |
1 files changed, 65 insertions, 29 deletions
@@ -151,6 +151,38 @@ struct kioctx { | |||
151 | unsigned id; | 151 | unsigned id; |
152 | }; | 152 | }; |
153 | 153 | ||
154 | /* | ||
155 | * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either | ||
156 | * cancelled or completed (this makes a certain amount of sense because | ||
157 | * successful cancellation - io_cancel() - does deliver the completion to | ||
158 | * userspace). | ||
159 | * | ||
160 | * And since most things don't implement kiocb cancellation and we'd really like | ||
161 | * kiocb completion to be lockless when possible, we use ki_cancel to | ||
162 | * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED | ||
163 | * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). | ||
164 | */ | ||
165 | #define KIOCB_CANCELLED ((void *) (~0ULL)) | ||
166 | |||
167 | struct aio_kiocb { | ||
168 | struct kiocb common; | ||
169 | |||
170 | struct kioctx *ki_ctx; | ||
171 | kiocb_cancel_fn *ki_cancel; | ||
172 | |||
173 | struct iocb __user *ki_user_iocb; /* user's aiocb */ | ||
174 | __u64 ki_user_data; /* user's data for completion */ | ||
175 | |||
176 | struct list_head ki_list; /* the aio core uses this | ||
177 | * for cancellation */ | ||
178 | |||
179 | /* | ||
180 | * If the aio_resfd field of the userspace iocb is not zero, | ||
181 | * this is the underlying eventfd context to deliver events to. | ||
182 | */ | ||
183 | struct eventfd_ctx *ki_eventfd; | ||
184 | }; | ||
185 | |||
154 | /*------ sysctl variables----*/ | 186 | /*------ sysctl variables----*/ |
155 | static DEFINE_SPINLOCK(aio_nr_lock); | 187 | static DEFINE_SPINLOCK(aio_nr_lock); |
156 | unsigned long aio_nr; /* current system wide number of aio requests */ | 188 | unsigned long aio_nr; /* current system wide number of aio requests */ |
@@ -220,7 +252,7 @@ static int __init aio_setup(void) | |||
220 | if (IS_ERR(aio_mnt)) | 252 | if (IS_ERR(aio_mnt)) |
221 | panic("Failed to create aio fs mount."); | 253 | panic("Failed to create aio fs mount."); |
222 | 254 | ||
223 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 255 | kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
224 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 256 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
225 | 257 | ||
226 | pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); | 258 | pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); |
@@ -480,8 +512,9 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
480 | #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) | 512 | #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) |
481 | #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) | 513 | #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) |
482 | 514 | ||
483 | void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) | 515 | void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) |
484 | { | 516 | { |
517 | struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); | ||
485 | struct kioctx *ctx = req->ki_ctx; | 518 | struct kioctx *ctx = req->ki_ctx; |
486 | unsigned long flags; | 519 | unsigned long flags; |
487 | 520 | ||
@@ -496,7 +529,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) | |||
496 | } | 529 | } |
497 | EXPORT_SYMBOL(kiocb_set_cancel_fn); | 530 | EXPORT_SYMBOL(kiocb_set_cancel_fn); |
498 | 531 | ||
499 | static int kiocb_cancel(struct kiocb *kiocb) | 532 | static int kiocb_cancel(struct aio_kiocb *kiocb) |
500 | { | 533 | { |
501 | kiocb_cancel_fn *old, *cancel; | 534 | kiocb_cancel_fn *old, *cancel; |
502 | 535 | ||
@@ -514,7 +547,7 @@ static int kiocb_cancel(struct kiocb *kiocb) | |||
514 | cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); | 547 | cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); |
515 | } while (cancel != old); | 548 | } while (cancel != old); |
516 | 549 | ||
517 | return cancel(kiocb); | 550 | return cancel(&kiocb->common); |
518 | } | 551 | } |
519 | 552 | ||
520 | static void free_ioctx(struct work_struct *work) | 553 | static void free_ioctx(struct work_struct *work) |
@@ -550,13 +583,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |||
550 | static void free_ioctx_users(struct percpu_ref *ref) | 583 | static void free_ioctx_users(struct percpu_ref *ref) |
551 | { | 584 | { |
552 | struct kioctx *ctx = container_of(ref, struct kioctx, users); | 585 | struct kioctx *ctx = container_of(ref, struct kioctx, users); |
553 | struct kiocb *req; | 586 | struct aio_kiocb *req; |
554 | 587 | ||
555 | spin_lock_irq(&ctx->ctx_lock); | 588 | spin_lock_irq(&ctx->ctx_lock); |
556 | 589 | ||
557 | while (!list_empty(&ctx->active_reqs)) { | 590 | while (!list_empty(&ctx->active_reqs)) { |
558 | req = list_first_entry(&ctx->active_reqs, | 591 | req = list_first_entry(&ctx->active_reqs, |
559 | struct kiocb, ki_list); | 592 | struct aio_kiocb, ki_list); |
560 | 593 | ||
561 | list_del_init(&req->ki_list); | 594 | list_del_init(&req->ki_list); |
562 | kiocb_cancel(req); | 595 | kiocb_cancel(req); |
@@ -932,9 +965,9 @@ static void user_refill_reqs_available(struct kioctx *ctx) | |||
932 | * Allocate a slot for an aio request. | 965 | * Allocate a slot for an aio request. |
933 | * Returns NULL if no requests are free. | 966 | * Returns NULL if no requests are free. |
934 | */ | 967 | */ |
935 | static inline struct kiocb *aio_get_req(struct kioctx *ctx) | 968 | static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) |
936 | { | 969 | { |
937 | struct kiocb *req; | 970 | struct aio_kiocb *req; |
938 | 971 | ||
939 | if (!get_reqs_available(ctx)) { | 972 | if (!get_reqs_available(ctx)) { |
940 | user_refill_reqs_available(ctx); | 973 | user_refill_reqs_available(ctx); |
@@ -955,10 +988,10 @@ out_put: | |||
955 | return NULL; | 988 | return NULL; |
956 | } | 989 | } |
957 | 990 | ||
958 | static void kiocb_free(struct kiocb *req) | 991 | static void kiocb_free(struct aio_kiocb *req) |
959 | { | 992 | { |
960 | if (req->ki_filp) | 993 | if (req->common.ki_filp) |
961 | fput(req->ki_filp); | 994 | fput(req->common.ki_filp); |
962 | if (req->ki_eventfd != NULL) | 995 | if (req->ki_eventfd != NULL) |
963 | eventfd_ctx_put(req->ki_eventfd); | 996 | eventfd_ctx_put(req->ki_eventfd); |
964 | kmem_cache_free(kiocb_cachep, req); | 997 | kmem_cache_free(kiocb_cachep, req); |
@@ -994,8 +1027,9 @@ out: | |||
994 | /* aio_complete | 1027 | /* aio_complete |
995 | * Called when the io request on the given iocb is complete. | 1028 | * Called when the io request on the given iocb is complete. |
996 | */ | 1029 | */ |
997 | void aio_complete(struct kiocb *iocb, long res, long res2) | 1030 | static void aio_complete(struct kiocb *kiocb, long res, long res2) |
998 | { | 1031 | { |
1032 | struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); | ||
999 | struct kioctx *ctx = iocb->ki_ctx; | 1033 | struct kioctx *ctx = iocb->ki_ctx; |
1000 | struct aio_ring *ring; | 1034 | struct aio_ring *ring; |
1001 | struct io_event *ev_page, *event; | 1035 | struct io_event *ev_page, *event; |
@@ -1009,7 +1043,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
1009 | * ref, no other paths have a way to get another ref | 1043 | * ref, no other paths have a way to get another ref |
1010 | * - the sync task helpfully left a reference to itself in the iocb | 1044 | * - the sync task helpfully left a reference to itself in the iocb |
1011 | */ | 1045 | */ |
1012 | BUG_ON(is_sync_kiocb(iocb)); | 1046 | BUG_ON(is_sync_kiocb(kiocb)); |
1013 | 1047 | ||
1014 | if (iocb->ki_list.next) { | 1048 | if (iocb->ki_list.next) { |
1015 | unsigned long flags; | 1049 | unsigned long flags; |
@@ -1035,7 +1069,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
1035 | ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | 1069 | ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); |
1036 | event = ev_page + pos % AIO_EVENTS_PER_PAGE; | 1070 | event = ev_page + pos % AIO_EVENTS_PER_PAGE; |
1037 | 1071 | ||
1038 | event->obj = (u64)(unsigned long)iocb->ki_obj.user; | 1072 | event->obj = (u64)(unsigned long)iocb->ki_user_iocb; |
1039 | event->data = iocb->ki_user_data; | 1073 | event->data = iocb->ki_user_data; |
1040 | event->res = res; | 1074 | event->res = res; |
1041 | event->res2 = res2; | 1075 | event->res2 = res2; |
@@ -1044,7 +1078,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
1044 | flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | 1078 | flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); |
1045 | 1079 | ||
1046 | pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", | 1080 | pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", |
1047 | ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, | 1081 | ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, |
1048 | res, res2); | 1082 | res, res2); |
1049 | 1083 | ||
1050 | /* after flagging the request as done, we | 1084 | /* after flagging the request as done, we |
@@ -1091,7 +1125,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
1091 | 1125 | ||
1092 | percpu_ref_put(&ctx->reqs); | 1126 | percpu_ref_put(&ctx->reqs); |
1093 | } | 1127 | } |
1094 | EXPORT_SYMBOL(aio_complete); | ||
1095 | 1128 | ||
1096 | /* aio_read_events_ring | 1129 | /* aio_read_events_ring |
1097 | * Pull an event off of the ioctx's event ring. Returns the number of | 1130 | * Pull an event off of the ioctx's event ring. Returns the number of |
@@ -1480,7 +1513,7 @@ rw_common: | |||
1480 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | 1513 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
1481 | struct iocb *iocb, bool compat) | 1514 | struct iocb *iocb, bool compat) |
1482 | { | 1515 | { |
1483 | struct kiocb *req; | 1516 | struct aio_kiocb *req; |
1484 | ssize_t ret; | 1517 | ssize_t ret; |
1485 | 1518 | ||
1486 | /* enforce forwards compatibility on users */ | 1519 | /* enforce forwards compatibility on users */ |
@@ -1503,11 +1536,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1503 | if (unlikely(!req)) | 1536 | if (unlikely(!req)) |
1504 | return -EAGAIN; | 1537 | return -EAGAIN; |
1505 | 1538 | ||
1506 | req->ki_filp = fget(iocb->aio_fildes); | 1539 | req->common.ki_filp = fget(iocb->aio_fildes); |
1507 | if (unlikely(!req->ki_filp)) { | 1540 | if (unlikely(!req->common.ki_filp)) { |
1508 | ret = -EBADF; | 1541 | ret = -EBADF; |
1509 | goto out_put_req; | 1542 | goto out_put_req; |
1510 | } | 1543 | } |
1544 | req->common.ki_pos = iocb->aio_offset; | ||
1545 | req->common.ki_complete = aio_complete; | ||
1546 | req->common.ki_flags = 0; | ||
1511 | 1547 | ||
1512 | if (iocb->aio_flags & IOCB_FLAG_RESFD) { | 1548 | if (iocb->aio_flags & IOCB_FLAG_RESFD) { |
1513 | /* | 1549 | /* |
@@ -1522,6 +1558,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1522 | req->ki_eventfd = NULL; | 1558 | req->ki_eventfd = NULL; |
1523 | goto out_put_req; | 1559 | goto out_put_req; |
1524 | } | 1560 | } |
1561 | |||
1562 | req->common.ki_flags |= IOCB_EVENTFD; | ||
1525 | } | 1563 | } |
1526 | 1564 | ||
1527 | ret = put_user(KIOCB_KEY, &user_iocb->aio_key); | 1565 | ret = put_user(KIOCB_KEY, &user_iocb->aio_key); |
@@ -1530,11 +1568,10 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1530 | goto out_put_req; | 1568 | goto out_put_req; |
1531 | } | 1569 | } |
1532 | 1570 | ||
1533 | req->ki_obj.user = user_iocb; | 1571 | req->ki_user_iocb = user_iocb; |
1534 | req->ki_user_data = iocb->aio_data; | 1572 | req->ki_user_data = iocb->aio_data; |
1535 | req->ki_pos = iocb->aio_offset; | ||
1536 | 1573 | ||
1537 | ret = aio_run_iocb(req, iocb->aio_lio_opcode, | 1574 | ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, |
1538 | (char __user *)(unsigned long)iocb->aio_buf, | 1575 | (char __user *)(unsigned long)iocb->aio_buf, |
1539 | iocb->aio_nbytes, | 1576 | iocb->aio_nbytes, |
1540 | compat); | 1577 | compat); |
@@ -1623,10 +1660,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1623 | /* lookup_kiocb | 1660 | /* lookup_kiocb |
1624 | * Finds a given iocb for cancellation. | 1661 | * Finds a given iocb for cancellation. |
1625 | */ | 1662 | */ |
1626 | static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, | 1663 | static struct aio_kiocb * |
1627 | u32 key) | 1664 | lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) |
1628 | { | 1665 | { |
1629 | struct list_head *pos; | 1666 | struct aio_kiocb *kiocb; |
1630 | 1667 | ||
1631 | assert_spin_locked(&ctx->ctx_lock); | 1668 | assert_spin_locked(&ctx->ctx_lock); |
1632 | 1669 | ||
@@ -1634,9 +1671,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, | |||
1634 | return NULL; | 1671 | return NULL; |
1635 | 1672 | ||
1636 | /* TODO: use a hash or array, this sucks. */ | 1673 | /* TODO: use a hash or array, this sucks. */ |
1637 | list_for_each(pos, &ctx->active_reqs) { | 1674 | list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { |
1638 | struct kiocb *kiocb = list_kiocb(pos); | 1675 | if (kiocb->ki_user_iocb == iocb) |
1639 | if (kiocb->ki_obj.user == iocb) | ||
1640 | return kiocb; | 1676 | return kiocb; |
1641 | } | 1677 | } |
1642 | return NULL; | 1678 | return NULL; |
@@ -1656,7 +1692,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, | |||
1656 | struct io_event __user *, result) | 1692 | struct io_event __user *, result) |
1657 | { | 1693 | { |
1658 | struct kioctx *ctx; | 1694 | struct kioctx *ctx; |
1659 | struct kiocb *kiocb; | 1695 | struct aio_kiocb *kiocb; |
1660 | u32 key; | 1696 | u32 key; |
1661 | int ret; | 1697 | int ret; |
1662 | 1698 | ||