diff options
author | Kent Overstreet <koverstreet@google.com> | 2013-05-07 19:18:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-07 21:38:29 -0400 |
commit | 3e845ce01a391d7c5d59ff2f28db5381bf02fa27 (patch) | |
tree | 62213380f984fec969e70b1c1328feab15ec681f | |
parent | 0460fef2a9215680f7f85415b57731b7e0fdf673 (diff) |
aio: change reqs_active to include unreaped completions
The aio code tries really hard to avoid having to deal with the
completion ringbuffer overflowing. To do that, it has to keep track of
the number of outstanding kiocbs, and the number of completions
currently in the ringbuffer - and it's got to check that every time we
allocate a kiocb. Ouch.
But - we can improve this quite a bit if we just change reqs_active to
mean "number of outstanding requests and unreaped completions" - that
means kiocb allocation doesn't have to look at the ringbuffer, which is
a fairly significant win.
Signed-off-by: Kent Overstreet <koverstreet@google.com>
Cc: Zach Brown <zab@redhat.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Asai Thambi S P <asamymuthupa@micron.com>
Cc: Selvan Mani <smani@micron.com>
Cc: Sam Bradshaw <sbradshaw@micron.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/aio.c | 47 |
1 files changed, 32 insertions, 15 deletions
@@ -71,12 +71,6 @@ struct aio_ring_info { | |||
71 | struct page *internal_pages[AIO_RING_PAGES]; | 71 | struct page *internal_pages[AIO_RING_PAGES]; |
72 | }; | 72 | }; |
73 | 73 | ||
74 | static inline unsigned aio_ring_avail(struct aio_ring_info *info, | ||
75 | struct aio_ring *ring) | ||
76 | { | ||
77 | return (ring->head + info->nr - 1 - ring->tail) % info->nr; | ||
78 | } | ||
79 | |||
80 | struct kioctx { | 74 | struct kioctx { |
81 | atomic_t users; | 75 | atomic_t users; |
82 | atomic_t dead; | 76 | atomic_t dead; |
@@ -92,7 +86,13 @@ struct kioctx { | |||
92 | atomic_t reqs_active; | 86 | atomic_t reqs_active; |
93 | struct list_head active_reqs; /* used for cancellation */ | 87 | struct list_head active_reqs; /* used for cancellation */ |
94 | 88 | ||
95 | /* sys_io_setup currently limits this to an unsigned int */ | 89 | /* |
90 | * This is what userspace passed to io_setup(), it's not used for | ||
91 | * anything but counting against the global max_reqs quota. | ||
92 | * | ||
93 | * The real limit is ring->nr - 1, which will be larger (see | ||
94 | * aio_setup_ring()) | ||
95 | */ | ||
96 | unsigned max_reqs; | 96 | unsigned max_reqs; |
97 | 97 | ||
98 | struct aio_ring_info ring_info; | 98 | struct aio_ring_info ring_info; |
@@ -284,8 +284,11 @@ static void free_ioctx_rcu(struct rcu_head *head) | |||
284 | */ | 284 | */ |
285 | static void free_ioctx(struct kioctx *ctx) | 285 | static void free_ioctx(struct kioctx *ctx) |
286 | { | 286 | { |
287 | struct aio_ring_info *info = &ctx->ring_info; | ||
288 | struct aio_ring *ring; | ||
287 | struct io_event res; | 289 | struct io_event res; |
288 | struct kiocb *req; | 290 | struct kiocb *req; |
291 | unsigned head, avail; | ||
289 | 292 | ||
290 | spin_lock_irq(&ctx->ctx_lock); | 293 | spin_lock_irq(&ctx->ctx_lock); |
291 | 294 | ||
@@ -299,7 +302,21 @@ static void free_ioctx(struct kioctx *ctx) | |||
299 | 302 | ||
300 | spin_unlock_irq(&ctx->ctx_lock); | 303 | spin_unlock_irq(&ctx->ctx_lock); |
301 | 304 | ||
302 | wait_event(ctx->wait, !atomic_read(&ctx->reqs_active)); | 305 | ring = kmap_atomic(info->ring_pages[0]); |
306 | head = ring->head; | ||
307 | kunmap_atomic(ring); | ||
308 | |||
309 | while (atomic_read(&ctx->reqs_active) > 0) { | ||
310 | wait_event(ctx->wait, head != info->tail); | ||
311 | |||
312 | avail = (head <= info->tail ? info->tail : info->nr) - head; | ||
313 | |||
314 | atomic_sub(avail, &ctx->reqs_active); | ||
315 | head += avail; | ||
316 | head %= info->nr; | ||
317 | } | ||
318 | |||
319 | WARN_ON(atomic_read(&ctx->reqs_active) < 0); | ||
303 | 320 | ||
304 | aio_free_ring(ctx); | 321 | aio_free_ring(ctx); |
305 | 322 | ||
@@ -548,7 +565,6 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) | |||
548 | unsigned short allocated, to_alloc; | 565 | unsigned short allocated, to_alloc; |
549 | long avail; | 566 | long avail; |
550 | struct kiocb *req, *n; | 567 | struct kiocb *req, *n; |
551 | struct aio_ring *ring; | ||
552 | 568 | ||
553 | to_alloc = min(batch->count, KIOCB_BATCH_SIZE); | 569 | to_alloc = min(batch->count, KIOCB_BATCH_SIZE); |
554 | for (allocated = 0; allocated < to_alloc; allocated++) { | 570 | for (allocated = 0; allocated < to_alloc; allocated++) { |
@@ -563,10 +579,8 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) | |||
563 | goto out; | 579 | goto out; |
564 | 580 | ||
565 | spin_lock_irq(&ctx->ctx_lock); | 581 | spin_lock_irq(&ctx->ctx_lock); |
566 | ring = kmap_atomic(ctx->ring_info.ring_pages[0]); | ||
567 | 582 | ||
568 | avail = aio_ring_avail(&ctx->ring_info, ring) - | 583 | avail = ctx->ring_info.nr - atomic_read(&ctx->reqs_active) - 1; |
569 | atomic_read(&ctx->reqs_active); | ||
570 | BUG_ON(avail < 0); | 584 | BUG_ON(avail < 0); |
571 | if (avail < allocated) { | 585 | if (avail < allocated) { |
572 | /* Trim back the number of requests. */ | 586 | /* Trim back the number of requests. */ |
@@ -581,7 +595,6 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) | |||
581 | batch->count -= allocated; | 595 | batch->count -= allocated; |
582 | atomic_add(allocated, &ctx->reqs_active); | 596 | atomic_add(allocated, &ctx->reqs_active); |
583 | 597 | ||
584 | kunmap_atomic(ring); | ||
585 | spin_unlock_irq(&ctx->ctx_lock); | 598 | spin_unlock_irq(&ctx->ctx_lock); |
586 | 599 | ||
587 | out: | 600 | out: |
@@ -688,8 +701,11 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
688 | * when the event got cancelled. | 701 | * when the event got cancelled. |
689 | */ | 702 | */ |
690 | if (unlikely(xchg(&iocb->ki_cancel, | 703 | if (unlikely(xchg(&iocb->ki_cancel, |
691 | KIOCB_CANCELLED) == KIOCB_CANCELLED)) | 704 | KIOCB_CANCELLED) == KIOCB_CANCELLED)) { |
705 | atomic_dec(&ctx->reqs_active); | ||
706 | /* Still need the wake_up in case free_ioctx is waiting */ | ||
692 | goto put_rq; | 707 | goto put_rq; |
708 | } | ||
693 | 709 | ||
694 | /* | 710 | /* |
695 | * Add a completion event to the ring buffer. Must be done holding | 711 | * Add a completion event to the ring buffer. Must be done holding |
@@ -746,7 +762,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
746 | put_rq: | 762 | put_rq: |
747 | /* everything turned out well, dispose of the aiocb. */ | 763 | /* everything turned out well, dispose of the aiocb. */ |
748 | aio_put_req(iocb); | 764 | aio_put_req(iocb); |
749 | atomic_dec(&ctx->reqs_active); | ||
750 | 765 | ||
751 | /* | 766 | /* |
752 | * We have to order our ring_info tail store above and test | 767 | * We have to order our ring_info tail store above and test |
@@ -825,6 +840,8 @@ static long aio_read_events_ring(struct kioctx *ctx, | |||
825 | flush_dcache_page(info->ring_pages[0]); | 840 | flush_dcache_page(info->ring_pages[0]); |
826 | 841 | ||
827 | pr_debug("%li h%u t%u\n", ret, head, info->tail); | 842 | pr_debug("%li h%u t%u\n", ret, head, info->tail); |
843 | |||
844 | atomic_sub(ret, &ctx->reqs_active); | ||
828 | out: | 845 | out: |
829 | mutex_unlock(&info->ring_lock); | 846 | mutex_unlock(&info->ring_lock); |
830 | 847 | ||