aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-16 21:49:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-16 21:49:16 -0400
commitd82312c80860b8b83cd4473ac6eafd244e712061 (patch)
tree028b2e843e9d59d35aeb8924582864f18aa4ca36
parent7d69cff26ceadce8638cb65191285932a3de3d4c (diff)
parent889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (diff)
Merge branch 'for-4.1/core' of git://git.kernel.dk/linux-block
Pull block layer core bits from Jens Axboe: "This is the core pull request for 4.1. Not a lot of stuff in here for this round, mostly little fixes or optimizations. This pull request contains: - An optimization that speeds up queue runs on blk-mq, especially for the case where there's a large difference between nr_cpu_ids and the actual mapped software queues on a hardware queue. From Chong Yuan. - Honor node local allocations for requests on legacy devices. From David Rientjes. - Cleanup of blk_mq_rq_to_pdu() from me. - exit_aio() fixup from me, greatly speeding up exiting multiple IO contexts off exit_group(). For my particular test case, fio exit took ~6 seconds. A typical case of both exposing RCU grace periods to user space, and serializing exit of them. - Make blk_mq_queue_enter() honor the gfp mask passed in, so we only wait if __GFP_WAIT is set. From Keith Busch. - blk-mq exports and two added helpers from Mike Snitzer, which will be used by the dm-mq code. - Cleanups of blk-mq queue init from Wei Fang and Xiaoguang Wang" * 'for-4.1/core' of git://git.kernel.dk/linux-block: blk-mq: reduce unnecessary software queue looping aio: fix serial draining in exit_aio() blk-mq: cleanup blk_mq_rq_to_pdu() blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue() block: remove redundant check about 'set->nr_hw_queues' in blk_mq_alloc_tag_set() block: allocate request memory local to request queue blk-mq: don't wait in blk_mq_queue_enter() if __GFP_WAIT isn't set blk-mq: export blk_mq_run_hw_queues blk-mq: add blk_mq_init_allocated_queue and export blk_mq_register_disk
-rw-r--r--block/blk-core.c19
-rw-r--r--block/blk-mq-sysfs.c1
-rw-r--r--block/blk-mq.c67
-rw-r--r--fs/aio.c45
-rw-r--r--include/linux/blk-mq.h7
5 files changed, 93 insertions, 46 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 794c3e7f01cf..fd154b94447a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -557,6 +557,18 @@ void blk_cleanup_queue(struct request_queue *q)
557} 557}
558EXPORT_SYMBOL(blk_cleanup_queue); 558EXPORT_SYMBOL(blk_cleanup_queue);
559 559
560/* Allocate memory local to the request queue */
561static void *alloc_request_struct(gfp_t gfp_mask, void *data)
562{
563 int nid = (int)(long)data;
564 return kmem_cache_alloc_node(request_cachep, gfp_mask, nid);
565}
566
567static void free_request_struct(void *element, void *unused)
568{
569 kmem_cache_free(request_cachep, element);
570}
571
560int blk_init_rl(struct request_list *rl, struct request_queue *q, 572int blk_init_rl(struct request_list *rl, struct request_queue *q,
561 gfp_t gfp_mask) 573 gfp_t gfp_mask)
562{ 574{
@@ -569,9 +581,10 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q,
569 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); 581 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
570 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); 582 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
571 583
572 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 584 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct,
573 mempool_free_slab, request_cachep, 585 free_request_struct,
574 gfp_mask, q->node); 586 (void *)(long)q->node, gfp_mask,
587 q->node);
575 if (!rl->rq_pool) 588 if (!rl->rq_pool)
576 return -ENOMEM; 589 return -ENOMEM;
577 590
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 1630a20d5dcf..b79685e06b70 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -436,6 +436,7 @@ int blk_mq_register_disk(struct gendisk *disk)
436 436
437 return 0; 437 return 0;
438} 438}
439EXPORT_SYMBOL_GPL(blk_mq_register_disk);
439 440
440void blk_mq_sysfs_unregister(struct request_queue *q) 441void blk_mq_sysfs_unregister(struct request_queue *q)
441{ 442{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 33c428530193..c82de08f3721 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(all_q_mutex);
33static LIST_HEAD(all_q_list); 33static LIST_HEAD(all_q_list);
34 34
35static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); 35static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
36static void blk_mq_run_queues(struct request_queue *q);
37 36
38/* 37/*
39 * Check if any of the ctx's have pending work in this hardware queue 38 * Check if any of the ctx's have pending work in this hardware queue
@@ -78,7 +77,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
78 clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); 77 clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
79} 78}
80 79
81static int blk_mq_queue_enter(struct request_queue *q) 80static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
82{ 81{
83 while (true) { 82 while (true) {
84 int ret; 83 int ret;
@@ -86,6 +85,9 @@ static int blk_mq_queue_enter(struct request_queue *q)
86 if (percpu_ref_tryget_live(&q->mq_usage_counter)) 85 if (percpu_ref_tryget_live(&q->mq_usage_counter))
87 return 0; 86 return 0;
88 87
88 if (!(gfp & __GFP_WAIT))
89 return -EBUSY;
90
89 ret = wait_event_interruptible(q->mq_freeze_wq, 91 ret = wait_event_interruptible(q->mq_freeze_wq,
90 !q->mq_freeze_depth || blk_queue_dying(q)); 92 !q->mq_freeze_depth || blk_queue_dying(q));
91 if (blk_queue_dying(q)) 93 if (blk_queue_dying(q))
@@ -118,7 +120,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
118 120
119 if (freeze) { 121 if (freeze) {
120 percpu_ref_kill(&q->mq_usage_counter); 122 percpu_ref_kill(&q->mq_usage_counter);
121 blk_mq_run_queues(q); 123 blk_mq_run_hw_queues(q, false);
122 } 124 }
123} 125}
124EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); 126EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
@@ -257,7 +259,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
257 struct blk_mq_alloc_data alloc_data; 259 struct blk_mq_alloc_data alloc_data;
258 int ret; 260 int ret;
259 261
260 ret = blk_mq_queue_enter(q); 262 ret = blk_mq_queue_enter(q, gfp);
261 if (ret) 263 if (ret)
262 return ERR_PTR(ret); 264 return ERR_PTR(ret);
263 265
@@ -904,7 +906,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
904 &hctx->run_work, 0); 906 &hctx->run_work, 0);
905} 907}
906 908
907static void blk_mq_run_queues(struct request_queue *q) 909void blk_mq_run_hw_queues(struct request_queue *q, bool async)
908{ 910{
909 struct blk_mq_hw_ctx *hctx; 911 struct blk_mq_hw_ctx *hctx;
910 int i; 912 int i;
@@ -915,9 +917,10 @@ static void blk_mq_run_queues(struct request_queue *q)
915 test_bit(BLK_MQ_S_STOPPED, &hctx->state)) 917 test_bit(BLK_MQ_S_STOPPED, &hctx->state))
916 continue; 918 continue;
917 919
918 blk_mq_run_hw_queue(hctx, false); 920 blk_mq_run_hw_queue(hctx, async);
919 } 921 }
920} 922}
923EXPORT_SYMBOL(blk_mq_run_hw_queues);
921 924
922void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) 925void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
923{ 926{
@@ -1186,7 +1189,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
1186 int rw = bio_data_dir(bio); 1189 int rw = bio_data_dir(bio);
1187 struct blk_mq_alloc_data alloc_data; 1190 struct blk_mq_alloc_data alloc_data;
1188 1191
1189 if (unlikely(blk_mq_queue_enter(q))) { 1192 if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) {
1190 bio_endio(bio, -EIO); 1193 bio_endio(bio, -EIO);
1191 return NULL; 1194 return NULL;
1192 } 1195 }
@@ -1517,8 +1520,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
1517 if (!bitmap->map) 1520 if (!bitmap->map)
1518 return -ENOMEM; 1521 return -ENOMEM;
1519 1522
1520 bitmap->map_size = num_maps;
1521
1522 total = nr_cpu_ids; 1523 total = nr_cpu_ids;
1523 for (i = 0; i < num_maps; i++) { 1524 for (i = 0; i < num_maps; i++) {
1524 bitmap->map[i].depth = min(total, bitmap->bits_per_word); 1525 bitmap->map[i].depth = min(total, bitmap->bits_per_word);
@@ -1759,8 +1760,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
1759 continue; 1760 continue;
1760 1761
1761 hctx = q->mq_ops->map_queue(q, i); 1762 hctx = q->mq_ops->map_queue(q, i);
1762 cpumask_set_cpu(i, hctx->cpumask);
1763 hctx->nr_ctx++;
1764 1763
1765 /* 1764 /*
1766 * Set local node, IFF we have more than one hw queue. If 1765 * Set local node, IFF we have more than one hw queue. If
@@ -1797,6 +1796,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1797 } 1796 }
1798 1797
1799 queue_for_each_hw_ctx(q, hctx, i) { 1798 queue_for_each_hw_ctx(q, hctx, i) {
1799 struct blk_mq_ctxmap *map = &hctx->ctx_map;
1800
1800 /* 1801 /*
1801 * If no software queues are mapped to this hardware queue, 1802 * If no software queues are mapped to this hardware queue,
1802 * disable it and free the request entries. 1803 * disable it and free the request entries.
@@ -1813,6 +1814,13 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1813 } 1814 }
1814 1815
1815 /* 1816 /*
1817 * Set the map size to the number of mapped software queues.
1818 * This is more accurate and more efficient than looping
1819 * over all possibly mapped software queues.
1820 */
1821 map->map_size = hctx->nr_ctx / map->bits_per_word;
1822
1823 /*
1816 * Initialize batch roundrobin counts 1824 * Initialize batch roundrobin counts
1817 */ 1825 */
1818 hctx->next_cpu = cpumask_first(hctx->cpumask); 1826 hctx->next_cpu = cpumask_first(hctx->cpumask);
@@ -1889,9 +1897,25 @@ void blk_mq_release(struct request_queue *q)
1889 1897
1890struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) 1898struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1891{ 1899{
1900 struct request_queue *uninit_q, *q;
1901
1902 uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
1903 if (!uninit_q)
1904 return ERR_PTR(-ENOMEM);
1905
1906 q = blk_mq_init_allocated_queue(set, uninit_q);
1907 if (IS_ERR(q))
1908 blk_cleanup_queue(uninit_q);
1909
1910 return q;
1911}
1912EXPORT_SYMBOL(blk_mq_init_queue);
1913
1914struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
1915 struct request_queue *q)
1916{
1892 struct blk_mq_hw_ctx **hctxs; 1917 struct blk_mq_hw_ctx **hctxs;
1893 struct blk_mq_ctx __percpu *ctx; 1918 struct blk_mq_ctx __percpu *ctx;
1894 struct request_queue *q;
1895 unsigned int *map; 1919 unsigned int *map;
1896 int i; 1920 int i;
1897 1921
@@ -1926,20 +1950,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1926 hctxs[i]->queue_num = i; 1950 hctxs[i]->queue_num = i;
1927 } 1951 }
1928 1952
1929 q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
1930 if (!q)
1931 goto err_hctxs;
1932
1933 /* 1953 /*
1934 * Init percpu_ref in atomic mode so that it's faster to shutdown. 1954 * Init percpu_ref in atomic mode so that it's faster to shutdown.
1935 * See blk_register_queue() for details. 1955 * See blk_register_queue() for details.
1936 */ 1956 */
1937 if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, 1957 if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
1938 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) 1958 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
1939 goto err_mq_usage; 1959 goto err_hctxs;
1940 1960
1941 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); 1961 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
1942 blk_queue_rq_timeout(q, 30000); 1962 blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000);
1943 1963
1944 q->nr_queues = nr_cpu_ids; 1964 q->nr_queues = nr_cpu_ids;
1945 q->nr_hw_queues = set->nr_hw_queues; 1965 q->nr_hw_queues = set->nr_hw_queues;
@@ -1965,9 +1985,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1965 else 1985 else
1966 blk_queue_make_request(q, blk_sq_make_request); 1986 blk_queue_make_request(q, blk_sq_make_request);
1967 1987
1968 if (set->timeout)
1969 blk_queue_rq_timeout(q, set->timeout);
1970
1971 /* 1988 /*
1972 * Do this after blk_queue_make_request() overrides it... 1989 * Do this after blk_queue_make_request() overrides it...
1973 */ 1990 */
@@ -1979,7 +1996,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1979 blk_mq_init_cpu_queues(q, set->nr_hw_queues); 1996 blk_mq_init_cpu_queues(q, set->nr_hw_queues);
1980 1997
1981 if (blk_mq_init_hw_queues(q, set)) 1998 if (blk_mq_init_hw_queues(q, set))
1982 goto err_mq_usage; 1999 goto err_hctxs;
1983 2000
1984 mutex_lock(&all_q_mutex); 2001 mutex_lock(&all_q_mutex);
1985 list_add_tail(&q->all_q_node, &all_q_list); 2002 list_add_tail(&q->all_q_node, &all_q_list);
@@ -1991,8 +2008,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1991 2008
1992 return q; 2009 return q;
1993 2010
1994err_mq_usage:
1995 blk_cleanup_queue(q);
1996err_hctxs: 2011err_hctxs:
1997 kfree(map); 2012 kfree(map);
1998 for (i = 0; i < set->nr_hw_queues; i++) { 2013 for (i = 0; i < set->nr_hw_queues; i++) {
@@ -2007,7 +2022,7 @@ err_percpu:
2007 free_percpu(ctx); 2022 free_percpu(ctx);
2008 return ERR_PTR(-ENOMEM); 2023 return ERR_PTR(-ENOMEM);
2009} 2024}
2010EXPORT_SYMBOL(blk_mq_init_queue); 2025EXPORT_SYMBOL(blk_mq_init_allocated_queue);
2011 2026
2012void blk_mq_free_queue(struct request_queue *q) 2027void blk_mq_free_queue(struct request_queue *q)
2013{ 2028{
@@ -2159,7 +2174,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
2159 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) 2174 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
2160 return -EINVAL; 2175 return -EINVAL;
2161 2176
2162 if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) 2177 if (!set->ops->queue_rq || !set->ops->map_queue)
2163 return -EINVAL; 2178 return -EINVAL;
2164 2179
2165 if (set->queue_depth > BLK_MQ_MAX_DEPTH) { 2180 if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
diff --git a/fs/aio.c b/fs/aio.c
index 5785c4b58fea..fa8b16f47f1a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
77 unsigned reqs_available; 77 unsigned reqs_available;
78}; 78};
79 79
80struct ctx_rq_wait {
81 struct completion comp;
82 atomic_t count;
83};
84
80struct kioctx { 85struct kioctx {
81 struct percpu_ref users; 86 struct percpu_ref users;
82 atomic_t dead; 87 atomic_t dead;
@@ -115,7 +120,7 @@ struct kioctx {
115 /* 120 /*
116 * signals when all in-flight requests are done 121 * signals when all in-flight requests are done
117 */ 122 */
118 struct completion *requests_done; 123 struct ctx_rq_wait *rq_wait;
119 124
120 struct { 125 struct {
121 /* 126 /*
@@ -572,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
572 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); 577 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
573 578
574 /* At this point we know that there are no any in-flight requests */ 579 /* At this point we know that there are no any in-flight requests */
575 if (ctx->requests_done) 580 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
576 complete(ctx->requests_done); 581 complete(&ctx->rq_wait->comp);
577 582
578 INIT_WORK(&ctx->free_work, free_ioctx); 583 INIT_WORK(&ctx->free_work, free_ioctx);
579 schedule_work(&ctx->free_work); 584 schedule_work(&ctx->free_work);
@@ -783,7 +788,7 @@ err:
783 * the rapid destruction of the kioctx. 788 * the rapid destruction of the kioctx.
784 */ 789 */
785static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, 790static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
786 struct completion *requests_done) 791 struct ctx_rq_wait *wait)
787{ 792{
788 struct kioctx_table *table; 793 struct kioctx_table *table;
789 794
@@ -813,7 +818,7 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
813 if (ctx->mmap_size) 818 if (ctx->mmap_size)
814 vm_munmap(ctx->mmap_base, ctx->mmap_size); 819 vm_munmap(ctx->mmap_base, ctx->mmap_size);
815 820
816 ctx->requests_done = requests_done; 821 ctx->rq_wait = wait;
817 percpu_ref_kill(&ctx->users); 822 percpu_ref_kill(&ctx->users);
818 return 0; 823 return 0;
819} 824}
@@ -829,18 +834,24 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
829void exit_aio(struct mm_struct *mm) 834void exit_aio(struct mm_struct *mm)
830{ 835{
831 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); 836 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
832 int i; 837 struct ctx_rq_wait wait;
838 int i, skipped;
833 839
834 if (!table) 840 if (!table)
835 return; 841 return;
836 842
843 atomic_set(&wait.count, table->nr);
844 init_completion(&wait.comp);
845
846 skipped = 0;
837 for (i = 0; i < table->nr; ++i) { 847 for (i = 0; i < table->nr; ++i) {
838 struct kioctx *ctx = table->table[i]; 848 struct kioctx *ctx = table->table[i];
839 struct completion requests_done =
840 COMPLETION_INITIALIZER_ONSTACK(requests_done);
841 849
842 if (!ctx) 850 if (!ctx) {
851 skipped++;
843 continue; 852 continue;
853 }
854
844 /* 855 /*
845 * We don't need to bother with munmap() here - exit_mmap(mm) 856 * We don't need to bother with munmap() here - exit_mmap(mm)
846 * is coming and it'll unmap everything. And we simply can't, 857 * is coming and it'll unmap everything. And we simply can't,
@@ -849,10 +860,12 @@ void exit_aio(struct mm_struct *mm)
849 * that it needs to unmap the area, just set it to 0. 860 * that it needs to unmap the area, just set it to 0.
850 */ 861 */
851 ctx->mmap_size = 0; 862 ctx->mmap_size = 0;
852 kill_ioctx(mm, ctx, &requests_done); 863 kill_ioctx(mm, ctx, &wait);
864 }
853 865
866 if (!atomic_sub_and_test(skipped, &wait.count)) {
854 /* Wait until all IO for the context are done. */ 867 /* Wait until all IO for the context are done. */
855 wait_for_completion(&requests_done); 868 wait_for_completion(&wait.comp);
856 } 869 }
857 870
858 RCU_INIT_POINTER(mm->ioctx_table, NULL); 871 RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -1331,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1331{ 1344{
1332 struct kioctx *ioctx = lookup_ioctx(ctx); 1345 struct kioctx *ioctx = lookup_ioctx(ctx);
1333 if (likely(NULL != ioctx)) { 1346 if (likely(NULL != ioctx)) {
1334 struct completion requests_done = 1347 struct ctx_rq_wait wait;
1335 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1336 int ret; 1348 int ret;
1337 1349
1350 init_completion(&wait.comp);
1351 atomic_set(&wait.count, 1);
1352
1338 /* Pass requests_done to kill_ioctx() where it can be set 1353 /* Pass requests_done to kill_ioctx() where it can be set
1339 * in a thread-safe way. If we try to set it here then we have 1354 * in a thread-safe way. If we try to set it here then we have
1340 * a race condition if two io_destroy() called simultaneously. 1355 * a race condition if two io_destroy() called simultaneously.
1341 */ 1356 */
1342 ret = kill_ioctx(current->mm, ioctx, &requests_done); 1357 ret = kill_ioctx(current->mm, ioctx, &wait);
1343 percpu_ref_put(&ioctx->users); 1358 percpu_ref_put(&ioctx->users);
1344 1359
1345 /* Wait until all IO for the context are done. Otherwise kernel 1360 /* Wait until all IO for the context are done. Otherwise kernel
@@ -1347,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1347 * is destroyed. 1362 * is destroyed.
1348 */ 1363 */
1349 if (!ret) 1364 if (!ret)
1350 wait_for_completion(&requests_done); 1365 wait_for_completion(&wait.comp);
1351 1366
1352 return ret; 1367 return ret;
1353 } 1368 }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 7aec86127335..8210e8797c12 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -164,6 +164,8 @@ enum {
164 << BLK_MQ_F_ALLOC_POLICY_START_BIT) 164 << BLK_MQ_F_ALLOC_POLICY_START_BIT)
165 165
166struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); 166struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
167struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
168 struct request_queue *q);
167void blk_mq_finish_init(struct request_queue *q); 169void blk_mq_finish_init(struct request_queue *q);
168int blk_mq_register_disk(struct gendisk *); 170int blk_mq_register_disk(struct gendisk *);
169void blk_mq_unregister_disk(struct gendisk *); 171void blk_mq_unregister_disk(struct gendisk *);
@@ -218,6 +220,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
218void blk_mq_stop_hw_queues(struct request_queue *q); 220void blk_mq_stop_hw_queues(struct request_queue *q);
219void blk_mq_start_hw_queues(struct request_queue *q); 221void blk_mq_start_hw_queues(struct request_queue *q);
220void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); 222void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
223void blk_mq_run_hw_queues(struct request_queue *q, bool async);
221void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 224void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
222void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, 225void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
223 void *priv); 226 void *priv);
@@ -227,7 +230,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q);
227 230
228/* 231/*
229 * Driver command data is immediately after the request. So subtract request 232 * Driver command data is immediately after the request. So subtract request
230 * size to get back to the original request. 233 * size to get back to the original request, add request size to get the PDU.
231 */ 234 */
232static inline struct request *blk_mq_rq_from_pdu(void *pdu) 235static inline struct request *blk_mq_rq_from_pdu(void *pdu)
233{ 236{
@@ -235,7 +238,7 @@ static inline struct request *blk_mq_rq_from_pdu(void *pdu)
235} 238}
236static inline void *blk_mq_rq_to_pdu(struct request *rq) 239static inline void *blk_mq_rq_to_pdu(struct request *rq)
237{ 240{
238 return (void *) rq + sizeof(*rq); 241 return rq + 1;
239} 242}
240 243
241#define queue_for_each_hw_ctx(q, hctx, i) \ 244#define queue_for_each_hw_ctx(q, hctx, i) \