Merge branch 'for-4.1/core' of git://git.kernel.dk/linux-block

Pull block layer core bits from Jens Axboe: "This is the core pull request for 4.1. Not a lot of stuff in here for this round, mostly little fixes or optimizations. This pull request contains: - An optimization that speeds up queue runs on blk-mq, especially for the case where there's a large difference between nr_cpu_ids and the actual mapped software queues on a hardware queue. From Chong Yuan. - Honor node local allocations for requests on legacy devices. From David Rientjes. - Cleanup of blk_mq_rq_to_pdu() from me. - exit_aio() fixup from me, greatly speeding up exiting multiple IO contexts off exit_group(). For my particular test case, fio exit took ~6 seconds. A typical case of both exposing RCU grace periods to user space, and serializing exit of them. - Make blk_mq_queue_enter() honor the gfp mask passed in, so we only wait if __GFP_WAIT is set. From Keith Busch. - blk-mq exports and two added helpers from Mike Snitzer, which will be used by the dm-mq code. - Cleanups of blk-mq queue init from Wei Fang and Xiaoguang Wang" * 'for-4.1/core' of git://git.kernel.dk/linux-block: blk-mq: reduce unnecessary software queue looping aio: fix serial draining in exit_aio() blk-mq: cleanup blk_mq_rq_to_pdu() blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue() block: remove redundant check about 'set->nr_hw_queues' in blk_mq_alloc_tag_set() block: allocate request memory local to request queue blk-mq: don't wait in blk_mq_queue_enter() if __GFP_WAIT isn't set blk-mq: export blk_mq_run_hw_queues blk-mq: add blk_mq_init_allocated_queue and export blk_mq_register_disk
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-04-16 21:49:16 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-04-16 21:49:16 -0400
commit: d82312c80860b8b83cd4473ac6eafd244e712061 (patch)
tree: 028b2e843e9d59d35aeb8924582864f18aa4ca36 /fs/aio.c
parent: 7d69cff26ceadce8638cb65191285932a3de3d4c (diff)
parent: 889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (diff)
1 files changed, 30 insertions, 15 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 5785c4b58fea..fa8b16f47f1a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
        unsigned                reqs_available;
 };
+struct ctx_rq_wait {
+        struct completion comp;
+        atomic_t count;
+};
 struct kioctx {
        struct percpu_ref       users;
        atomic_t                dead;
@@ -115,7 +120,7 @@ struct kioctx {
        /*
         * signals when all in-flight requests are done
         */
-        struct completion *requests_done;
+        struct ctx_rq_wait      *rq_wait;
        struct {
                /*
@@ -572,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
        struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
        /* At this point we know that there are no any in-flight requests */
-        if (ctx->requests_done)
+        if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
-                complete(ctx->requests_done);
+                complete(&ctx->rq_wait->comp);
        INIT_WORK(&ctx->free_work, free_ioctx);
        schedule_work(&ctx->free_work);
@@ -783,7 +788,7 @@ err:
 *      the rapid destruction of the kioctx.
 */
 static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
-                struct completion *requests_done)
+                      struct ctx_rq_wait *wait)
 {
        struct kioctx_table *table;
@@ -813,7 +818,7 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
        if (ctx->mmap_size)
                vm_munmap(ctx->mmap_base, ctx->mmap_size);
-        ctx->requests_done = requests_done;
+        ctx->rq_wait = wait;
        percpu_ref_kill(&ctx->users);
        return 0;
 }
@@ -829,18 +834,24 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 void exit_aio(struct mm_struct *mm)
 {
        struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
-        int i;
+        struct ctx_rq_wait wait;
+        int i, skipped;
        if (!table)
                return;
+        atomic_set(&wait.count, table->nr);
+        init_completion(&wait.comp);
+        skipped = 0;
        for (i = 0; i < table->nr; ++i) {
                struct kioctx *ctx = table->table[i];
-                struct completion requests_done =
-                        COMPLETION_INITIALIZER_ONSTACK(requests_done);
-                if (!ctx)
+                if (!ctx) {
+                        skipped++;
                        continue;
+                }
                /*
                 * We don't need to bother with munmap() here - exit_mmap(mm)
                 * is coming and it'll unmap everything. And we simply can't,
@@ -849,10 +860,12 @@ void exit_aio(struct mm_struct *mm)
                 * that it needs to unmap the area, just set it to 0.
                 */
                ctx->mmap_size = 0;
-                kill_ioctx(mm, ctx, &requests_done);
+                kill_ioctx(mm, ctx, &wait);
+        }
+        if (!atomic_sub_and_test(skipped, &wait.count)) {
                /* Wait until all IO for the context are done. */
-                wait_for_completion(&requests_done);
+                wait_for_completion(&wait.comp);
        }
        RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -1331,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 {
        struct kioctx *ioctx = lookup_ioctx(ctx);
        if (likely(NULL != ioctx)) {
-                struct completion requests_done =
+                struct ctx_rq_wait wait;
-                        COMPLETION_INITIALIZER_ONSTACK(requests_done);
                int ret;
+                init_completion(&wait.comp);
+                atomic_set(&wait.count, 1);
                /* Pass requests_done to kill_ioctx() where it can be set
                 * in a thread-safe way. If we try to set it here then we have
                 * a race condition if two io_destroy() called simultaneously.
                 */
-                ret = kill_ioctx(current->mm, ioctx, &requests_done);
+                ret = kill_ioctx(current->mm, ioctx, &wait);
                percpu_ref_put(&ioctx->users);
                /* Wait until all IO for the context are done. Otherwise kernel
@@ -1347,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
                 * is destroyed.
                 */
                if (!ret)
-                        wait_for_completion(&requests_done);
+                        wait_for_completion(&wait.comp);
                return ret;
        }
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-04-16 21:49:16 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-04-16 21:49:16 -0400
commit	d82312c80860b8b83cd4473ac6eafd244e712061 (patch)
tree	028b2e843e9d59d35aeb8924582864f18aa4ca36 /fs/aio.c
parent	7d69cff26ceadce8638cb65191285932a3de3d4c (diff)
parent	889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (diff)