diff options
author | Kent Overstreet <koverstreet@google.com> | 2013-04-25 20:58:39 -0400 |
---|---|---|
committer | Benjamin LaHaise <bcrl@kvack.org> | 2013-07-30 11:53:11 -0400 |
commit | e1bdd5f27a5b14e24a658d5511bebceb67679d83 (patch) | |
tree | 3c18d12918a5ebe02bc38f63dd29031ea40673e0 | |
parent | 34e83fc618085e00dc9803286c581f51966673bd (diff) |
aio: percpu reqs_available
See the previous patch ("aio: reqs_active -> reqs_available") for why we
want to do this - this basically implements a per cpu allocator for
reqs_available that doesn't actually allocate anything.
Note that we need to increase the size of the ringbuffer we allocate,
since a single thread won't necessarily be able to use all the
reqs_available slots - some (up to about half) might be on other per cpu
lists, unavailable for the current thread.
We size the ringbuffer based on the nr_events userspace passed to
io_setup(), so this is a slight behaviour change - but nr_events wasn't
being used as a hard limit before, it was being rounded up to the next
page before so this doesn't change the actual semantics.
Signed-off-by: Kent Overstreet <koverstreet@google.com>
Cc: Zach Brown <zab@redhat.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Asai Thambi S P <asamymuthupa@micron.com>
Cc: Selvan Mani <smani@micron.com>
Cc: Sam Bradshaw <sbradshaw@micron.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Reviewed-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
-rw-r--r-- | fs/aio.c | 106 |
1 files changed, 99 insertions, 7 deletions
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/mman.h> | 27 | #include <linux/mman.h> |
28 | #include <linux/mmu_context.h> | 28 | #include <linux/mmu_context.h> |
29 | #include <linux/percpu.h> | ||
29 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
30 | #include <linux/timer.h> | 31 | #include <linux/timer.h> |
31 | #include <linux/aio.h> | 32 | #include <linux/aio.h> |
@@ -64,6 +65,10 @@ struct aio_ring { | |||
64 | 65 | ||
65 | #define AIO_RING_PAGES 8 | 66 | #define AIO_RING_PAGES 8 |
66 | 67 | ||
68 | struct kioctx_cpu { | ||
69 | unsigned reqs_available; | ||
70 | }; | ||
71 | |||
67 | struct kioctx { | 72 | struct kioctx { |
68 | atomic_t users; | 73 | atomic_t users; |
69 | atomic_t dead; | 74 | atomic_t dead; |
@@ -72,6 +77,13 @@ struct kioctx { | |||
72 | unsigned long user_id; | 77 | unsigned long user_id; |
73 | struct hlist_node list; | 78 | struct hlist_node list; |
74 | 79 | ||
80 | struct __percpu kioctx_cpu *cpu; | ||
81 | |||
82 | /* | ||
83 | * For percpu reqs_available, number of slots we move to/from global | ||
84 | * counter at a time: | ||
85 | */ | ||
86 | unsigned req_batch; | ||
75 | /* | 87 | /* |
76 | * This is what userspace passed to io_setup(), it's not used for | 88 | * This is what userspace passed to io_setup(), it's not used for |
77 | * anything but counting against the global max_reqs quota. | 89 | * anything but counting against the global max_reqs quota. |
@@ -99,6 +111,8 @@ struct kioctx { | |||
99 | * so we avoid overflowing it: it's decremented (if positive) | 111 | * so we avoid overflowing it: it's decremented (if positive) |
100 | * when allocating a kiocb and incremented when the resulting | 112 | * when allocating a kiocb and incremented when the resulting |
101 | * io_event is pulled off the ringbuffer. | 113 | * io_event is pulled off the ringbuffer. |
114 | * | ||
115 | * We batch accesses to it with a percpu version. | ||
102 | */ | 116 | */ |
103 | atomic_t reqs_available; | 117 | atomic_t reqs_available; |
104 | } ____cacheline_aligned_in_smp; | 118 | } ____cacheline_aligned_in_smp; |
@@ -379,6 +393,8 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb, | |||
379 | static void free_ioctx_rcu(struct rcu_head *head) | 393 | static void free_ioctx_rcu(struct rcu_head *head) |
380 | { | 394 | { |
381 | struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); | 395 | struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); |
396 | |||
397 | free_percpu(ctx->cpu); | ||
382 | kmem_cache_free(kioctx_cachep, ctx); | 398 | kmem_cache_free(kioctx_cachep, ctx); |
383 | } | 399 | } |
384 | 400 | ||
@@ -392,7 +408,7 @@ static void free_ioctx(struct kioctx *ctx) | |||
392 | struct aio_ring *ring; | 408 | struct aio_ring *ring; |
393 | struct io_event res; | 409 | struct io_event res; |
394 | struct kiocb *req; | 410 | struct kiocb *req; |
395 | unsigned head, avail; | 411 | unsigned cpu, head, avail; |
396 | 412 | ||
397 | spin_lock_irq(&ctx->ctx_lock); | 413 | spin_lock_irq(&ctx->ctx_lock); |
398 | 414 | ||
@@ -406,6 +422,13 @@ static void free_ioctx(struct kioctx *ctx) | |||
406 | 422 | ||
407 | spin_unlock_irq(&ctx->ctx_lock); | 423 | spin_unlock_irq(&ctx->ctx_lock); |
408 | 424 | ||
425 | for_each_possible_cpu(cpu) { | ||
426 | struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); | ||
427 | |||
428 | atomic_add(kcpu->reqs_available, &ctx->reqs_available); | ||
429 | kcpu->reqs_available = 0; | ||
430 | } | ||
431 | |||
409 | ring = kmap_atomic(ctx->ring_pages[0]); | 432 | ring = kmap_atomic(ctx->ring_pages[0]); |
410 | head = ring->head; | 433 | head = ring->head; |
411 | kunmap_atomic(ring); | 434 | kunmap_atomic(ring); |
@@ -454,6 +477,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
454 | struct kioctx *ctx; | 477 | struct kioctx *ctx; |
455 | int err = -ENOMEM; | 478 | int err = -ENOMEM; |
456 | 479 | ||
480 | /* | ||
481 | * We keep track of the number of available ringbuffer slots, to prevent | ||
482 | * overflow (reqs_available), and we also use percpu counters for this. | ||
483 | * | ||
484 | * So since up to half the slots might be on other cpu's percpu counters | ||
485 | * and unavailable, double nr_events so userspace sees what they | ||
486 | * expected: additionally, we move req_batch slots to/from percpu | ||
487 | * counters at a time, so make sure that isn't 0: | ||
488 | */ | ||
489 | nr_events = max(nr_events, num_possible_cpus() * 4); | ||
490 | nr_events *= 2; | ||
491 | |||
457 | /* Prevent overflows */ | 492 | /* Prevent overflows */ |
458 | if ((nr_events > (0x10000000U / sizeof(struct io_event))) || | 493 | if ((nr_events > (0x10000000U / sizeof(struct io_event))) || |
459 | (nr_events > (0x10000000U / sizeof(struct kiocb)))) { | 494 | (nr_events > (0x10000000U / sizeof(struct kiocb)))) { |
@@ -479,10 +514,16 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
479 | 514 | ||
480 | INIT_LIST_HEAD(&ctx->active_reqs); | 515 | INIT_LIST_HEAD(&ctx->active_reqs); |
481 | 516 | ||
482 | if (aio_setup_ring(ctx) < 0) | 517 | ctx->cpu = alloc_percpu(struct kioctx_cpu); |
518 | if (!ctx->cpu) | ||
483 | goto out_freectx; | 519 | goto out_freectx; |
484 | 520 | ||
521 | if (aio_setup_ring(ctx) < 0) | ||
522 | goto out_freepcpu; | ||
523 | |||
485 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); | 524 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); |
525 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); | ||
526 | BUG_ON(!ctx->req_batch); | ||
486 | 527 | ||
487 | /* limit the number of system wide aios */ | 528 | /* limit the number of system wide aios */ |
488 | spin_lock(&aio_nr_lock); | 529 | spin_lock(&aio_nr_lock); |
@@ -506,6 +547,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
506 | out_cleanup: | 547 | out_cleanup: |
507 | err = -EAGAIN; | 548 | err = -EAGAIN; |
508 | aio_free_ring(ctx); | 549 | aio_free_ring(ctx); |
550 | out_freepcpu: | ||
551 | free_percpu(ctx->cpu); | ||
509 | out_freectx: | 552 | out_freectx: |
510 | if (ctx->aio_ring_file) | 553 | if (ctx->aio_ring_file) |
511 | fput(ctx->aio_ring_file); | 554 | fput(ctx->aio_ring_file); |
@@ -610,6 +653,52 @@ void exit_aio(struct mm_struct *mm) | |||
610 | } | 653 | } |
611 | } | 654 | } |
612 | 655 | ||
656 | static void put_reqs_available(struct kioctx *ctx, unsigned nr) | ||
657 | { | ||
658 | struct kioctx_cpu *kcpu; | ||
659 | |||
660 | preempt_disable(); | ||
661 | kcpu = this_cpu_ptr(ctx->cpu); | ||
662 | |||
663 | kcpu->reqs_available += nr; | ||
664 | while (kcpu->reqs_available >= ctx->req_batch * 2) { | ||
665 | kcpu->reqs_available -= ctx->req_batch; | ||
666 | atomic_add(ctx->req_batch, &ctx->reqs_available); | ||
667 | } | ||
668 | |||
669 | preempt_enable(); | ||
670 | } | ||
671 | |||
672 | static bool get_reqs_available(struct kioctx *ctx) | ||
673 | { | ||
674 | struct kioctx_cpu *kcpu; | ||
675 | bool ret = false; | ||
676 | |||
677 | preempt_disable(); | ||
678 | kcpu = this_cpu_ptr(ctx->cpu); | ||
679 | |||
680 | if (!kcpu->reqs_available) { | ||
681 | int old, avail = atomic_read(&ctx->reqs_available); | ||
682 | |||
683 | do { | ||
684 | if (avail < ctx->req_batch) | ||
685 | goto out; | ||
686 | |||
687 | old = avail; | ||
688 | avail = atomic_cmpxchg(&ctx->reqs_available, | ||
689 | avail, avail - ctx->req_batch); | ||
690 | } while (avail != old); | ||
691 | |||
692 | kcpu->reqs_available += ctx->req_batch; | ||
693 | } | ||
694 | |||
695 | ret = true; | ||
696 | kcpu->reqs_available--; | ||
697 | out: | ||
698 | preempt_enable(); | ||
699 | return ret; | ||
700 | } | ||
701 | |||
613 | /* aio_get_req | 702 | /* aio_get_req |
614 | * Allocate a slot for an aio request. Increments the ki_users count | 703 | * Allocate a slot for an aio request. Increments the ki_users count |
615 | * of the kioctx so that the kioctx stays around until all requests are | 704 | * of the kioctx so that the kioctx stays around until all requests are |
@@ -624,7 +713,7 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
624 | { | 713 | { |
625 | struct kiocb *req; | 714 | struct kiocb *req; |
626 | 715 | ||
627 | if (atomic_dec_if_positive(&ctx->reqs_available) <= 0) | 716 | if (!get_reqs_available(ctx)) |
628 | return NULL; | 717 | return NULL; |
629 | 718 | ||
630 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); | 719 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); |
@@ -633,10 +722,9 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
633 | 722 | ||
634 | atomic_set(&req->ki_users, 2); | 723 | atomic_set(&req->ki_users, 2); |
635 | req->ki_ctx = ctx; | 724 | req->ki_ctx = ctx; |
636 | |||
637 | return req; | 725 | return req; |
638 | out_put: | 726 | out_put: |
639 | atomic_inc(&ctx->reqs_available); | 727 | put_reqs_available(ctx, 1); |
640 | return NULL; | 728 | return NULL; |
641 | } | 729 | } |
642 | 730 | ||
@@ -725,6 +813,10 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
725 | */ | 813 | */ |
726 | if (unlikely(xchg(&iocb->ki_cancel, | 814 | if (unlikely(xchg(&iocb->ki_cancel, |
727 | KIOCB_CANCELLED) == KIOCB_CANCELLED)) { | 815 | KIOCB_CANCELLED) == KIOCB_CANCELLED)) { |
816 | /* | ||
817 | * Can't use the percpu reqs_available here - could race with | ||
818 | * free_ioctx() | ||
819 | */ | ||
728 | atomic_inc(&ctx->reqs_available); | 820 | atomic_inc(&ctx->reqs_available); |
729 | /* Still need the wake_up in case free_ioctx is waiting */ | 821 | /* Still need the wake_up in case free_ioctx is waiting */ |
730 | goto put_rq; | 822 | goto put_rq; |
@@ -863,7 +955,7 @@ static long aio_read_events_ring(struct kioctx *ctx, | |||
863 | 955 | ||
864 | pr_debug("%li h%u t%u\n", ret, head, ctx->tail); | 956 | pr_debug("%li h%u t%u\n", ret, head, ctx->tail); |
865 | 957 | ||
866 | atomic_add(ret, &ctx->reqs_available); | 958 | put_reqs_available(ctx, ret); |
867 | out: | 959 | out: |
868 | mutex_unlock(&ctx->ring_lock); | 960 | mutex_unlock(&ctx->ring_lock); |
869 | 961 | ||
@@ -1247,7 +1339,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1247 | aio_put_req(req); /* drop extra ref to req */ | 1339 | aio_put_req(req); /* drop extra ref to req */ |
1248 | return 0; | 1340 | return 0; |
1249 | out_put_req: | 1341 | out_put_req: |
1250 | atomic_inc(&ctx->reqs_available); | 1342 | put_reqs_available(ctx, 1); |
1251 | aio_put_req(req); /* drop extra ref to req */ | 1343 | aio_put_req(req); /* drop extra ref to req */ |
1252 | aio_put_req(req); /* drop i/o ref to req */ | 1344 | aio_put_req(req); /* drop i/o ref to req */ |
1253 | return ret; | 1345 | return ret; |