aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2015-04-15 13:17:23 -0400
committerJens Axboe <axboe@fb.com>2015-04-15 13:17:23 -0400
commitdc48e56d761610da4ea1088d1bea0a030b8e3e43 (patch)
tree1e72267221c05aff3c4455910df7a787b4beff78
parent2963e3f7e8e3465895897a175560210120b932ac (diff)
aio: fix serial draining in exit_aio()
exit_aio() currently serializes killing io contexts. Each context killing ends up having to do percpu_ref_kill(), which in turns has to wait for an RCU grace period. This can take a long time, depending on the number of contexts. And there's no point in doing them serially, when we could be waiting for all of them in one fell swoop. This patches makes my fio thread offload test case exit 0.2s instead of almost 6s. Reviewed-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--fs/aio.c45
1 files changed, 30 insertions, 15 deletions
diff --git a/fs/aio.c b/fs/aio.c
index f8e52a1854c1..cabb5edd9bc1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
77 unsigned reqs_available; 77 unsigned reqs_available;
78}; 78};
79 79
80struct ctx_rq_wait {
81 struct completion comp;
82 atomic_t count;
83};
84
80struct kioctx { 85struct kioctx {
81 struct percpu_ref users; 86 struct percpu_ref users;
82 atomic_t dead; 87 atomic_t dead;
@@ -115,7 +120,7 @@ struct kioctx {
115 /* 120 /*
116 * signals when all in-flight requests are done 121 * signals when all in-flight requests are done
117 */ 122 */
118 struct completion *requests_done; 123 struct ctx_rq_wait *rq_wait;
119 124
120 struct { 125 struct {
121 /* 126 /*
@@ -535,8 +540,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
535 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); 540 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
536 541
537 /* At this point we know that there are no any in-flight requests */ 542 /* At this point we know that there are no any in-flight requests */
538 if (ctx->requests_done) 543 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
539 complete(ctx->requests_done); 544 complete(&ctx->rq_wait->comp);
540 545
541 INIT_WORK(&ctx->free_work, free_ioctx); 546 INIT_WORK(&ctx->free_work, free_ioctx);
542 schedule_work(&ctx->free_work); 547 schedule_work(&ctx->free_work);
@@ -744,7 +749,7 @@ err:
744 * the rapid destruction of the kioctx. 749 * the rapid destruction of the kioctx.
745 */ 750 */
746static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, 751static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
747 struct completion *requests_done) 752 struct ctx_rq_wait *wait)
748{ 753{
749 struct kioctx_table *table; 754 struct kioctx_table *table;
750 755
@@ -773,7 +778,7 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
773 if (ctx->mmap_size) 778 if (ctx->mmap_size)
774 vm_munmap(ctx->mmap_base, ctx->mmap_size); 779 vm_munmap(ctx->mmap_base, ctx->mmap_size);
775 780
776 ctx->requests_done = requests_done; 781 ctx->rq_wait = wait;
777 percpu_ref_kill(&ctx->users); 782 percpu_ref_kill(&ctx->users);
778 return 0; 783 return 0;
779} 784}
@@ -805,18 +810,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
805void exit_aio(struct mm_struct *mm) 810void exit_aio(struct mm_struct *mm)
806{ 811{
807 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); 812 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
808 int i; 813 struct ctx_rq_wait wait;
814 int i, skipped;
809 815
810 if (!table) 816 if (!table)
811 return; 817 return;
812 818
819 atomic_set(&wait.count, table->nr);
820 init_completion(&wait.comp);
821
822 skipped = 0;
813 for (i = 0; i < table->nr; ++i) { 823 for (i = 0; i < table->nr; ++i) {
814 struct kioctx *ctx = table->table[i]; 824 struct kioctx *ctx = table->table[i];
815 struct completion requests_done =
816 COMPLETION_INITIALIZER_ONSTACK(requests_done);
817 825
818 if (!ctx) 826 if (!ctx) {
827 skipped++;
819 continue; 828 continue;
829 }
830
820 /* 831 /*
821 * We don't need to bother with munmap() here - exit_mmap(mm) 832 * We don't need to bother with munmap() here - exit_mmap(mm)
822 * is coming and it'll unmap everything. And we simply can't, 833 * is coming and it'll unmap everything. And we simply can't,
@@ -825,10 +836,12 @@ void exit_aio(struct mm_struct *mm)
825 * that it needs to unmap the area, just set it to 0. 836 * that it needs to unmap the area, just set it to 0.
826 */ 837 */
827 ctx->mmap_size = 0; 838 ctx->mmap_size = 0;
828 kill_ioctx(mm, ctx, &requests_done); 839 kill_ioctx(mm, ctx, &wait);
840 }
829 841
842 if (!atomic_sub_and_test(skipped, &wait.count)) {
830 /* Wait until all IO for the context are done. */ 843 /* Wait until all IO for the context are done. */
831 wait_for_completion(&requests_done); 844 wait_for_completion(&wait.comp);
832 } 845 }
833 846
834 RCU_INIT_POINTER(mm->ioctx_table, NULL); 847 RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -1313,15 +1326,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1313{ 1326{
1314 struct kioctx *ioctx = lookup_ioctx(ctx); 1327 struct kioctx *ioctx = lookup_ioctx(ctx);
1315 if (likely(NULL != ioctx)) { 1328 if (likely(NULL != ioctx)) {
1316 struct completion requests_done = 1329 struct ctx_rq_wait wait;
1317 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1318 int ret; 1330 int ret;
1319 1331
1332 init_completion(&wait.comp);
1333 atomic_set(&wait.count, 1);
1334
1320 /* Pass requests_done to kill_ioctx() where it can be set 1335 /* Pass requests_done to kill_ioctx() where it can be set
1321 * in a thread-safe way. If we try to set it here then we have 1336 * in a thread-safe way. If we try to set it here then we have
1322 * a race condition if two io_destroy() called simultaneously. 1337 * a race condition if two io_destroy() called simultaneously.
1323 */ 1338 */
1324 ret = kill_ioctx(current->mm, ioctx, &requests_done); 1339 ret = kill_ioctx(current->mm, ioctx, &wait);
1325 percpu_ref_put(&ioctx->users); 1340 percpu_ref_put(&ioctx->users);
1326 1341
1327 /* Wait until all IO for the context are done. Otherwise kernel 1342 /* Wait until all IO for the context are done. Otherwise kernel
@@ -1329,7 +1344,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1329 * is destroyed. 1344 * is destroyed.
1330 */ 1345 */
1331 if (!ret) 1346 if (!ret)
1332 wait_for_completion(&requests_done); 1347 wait_for_completion(&wait.comp);
1333 1348
1334 return ret; 1349 return ret;
1335 } 1350 }