diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 21:49:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 21:49:16 -0400 |
commit | d82312c80860b8b83cd4473ac6eafd244e712061 (patch) | |
tree | 028b2e843e9d59d35aeb8924582864f18aa4ca36 | |
parent | 7d69cff26ceadce8638cb65191285932a3de3d4c (diff) | |
parent | 889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (diff) |
Merge branch 'for-4.1/core' of git://git.kernel.dk/linux-block
Pull block layer core bits from Jens Axboe:
"This is the core pull request for 4.1. Not a lot of stuff in here for
this round, mostly little fixes or optimizations. This pull request
contains:
- An optimization that speeds up queue runs on blk-mq, especially for
the case where there's a large difference between nr_cpu_ids and
the actual mapped software queues on a hardware queue. From Chong
Yuan.
- Honor node local allocations for requests on legacy devices. From
David Rientjes.
- Cleanup of blk_mq_rq_to_pdu() from me.
- exit_aio() fixup from me, greatly speeding up exiting multiple IO
contexts off exit_group(). For my particular test case, fio exit
took ~6 seconds. A typical case of both exposing RCU grace periods
to user space, and serializing exit of them.
- Make blk_mq_queue_enter() honor the gfp mask passed in, so we only
wait if __GFP_WAIT is set. From Keith Busch.
- blk-mq exports and two added helpers from Mike Snitzer, which will
be used by the dm-mq code.
- Cleanups of blk-mq queue init from Wei Fang and Xiaoguang Wang"
* 'for-4.1/core' of git://git.kernel.dk/linux-block:
blk-mq: reduce unnecessary software queue looping
aio: fix serial draining in exit_aio()
blk-mq: cleanup blk_mq_rq_to_pdu()
blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue()
block: remove redundant check about 'set->nr_hw_queues' in blk_mq_alloc_tag_set()
block: allocate request memory local to request queue
blk-mq: don't wait in blk_mq_queue_enter() if __GFP_WAIT isn't set
blk-mq: export blk_mq_run_hw_queues
blk-mq: add blk_mq_init_allocated_queue and export blk_mq_register_disk
-rw-r--r-- | block/blk-core.c | 19 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 67 | ||||
-rw-r--r-- | fs/aio.c | 45 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 7 |
5 files changed, 93 insertions, 46 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 794c3e7f01cf..fd154b94447a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -557,6 +557,18 @@ void blk_cleanup_queue(struct request_queue *q) | |||
557 | } | 557 | } |
558 | EXPORT_SYMBOL(blk_cleanup_queue); | 558 | EXPORT_SYMBOL(blk_cleanup_queue); |
559 | 559 | ||
560 | /* Allocate memory local to the request queue */ | ||
561 | static void *alloc_request_struct(gfp_t gfp_mask, void *data) | ||
562 | { | ||
563 | int nid = (int)(long)data; | ||
564 | return kmem_cache_alloc_node(request_cachep, gfp_mask, nid); | ||
565 | } | ||
566 | |||
567 | static void free_request_struct(void *element, void *unused) | ||
568 | { | ||
569 | kmem_cache_free(request_cachep, element); | ||
570 | } | ||
571 | |||
560 | int blk_init_rl(struct request_list *rl, struct request_queue *q, | 572 | int blk_init_rl(struct request_list *rl, struct request_queue *q, |
561 | gfp_t gfp_mask) | 573 | gfp_t gfp_mask) |
562 | { | 574 | { |
@@ -569,9 +581,10 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, | |||
569 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); | 581 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
570 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); | 582 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
571 | 583 | ||
572 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 584 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct, |
573 | mempool_free_slab, request_cachep, | 585 | free_request_struct, |
574 | gfp_mask, q->node); | 586 | (void *)(long)q->node, gfp_mask, |
587 | q->node); | ||
575 | if (!rl->rq_pool) | 588 | if (!rl->rq_pool) |
576 | return -ENOMEM; | 589 | return -ENOMEM; |
577 | 590 | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 1630a20d5dcf..b79685e06b70 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -436,6 +436,7 @@ int blk_mq_register_disk(struct gendisk *disk) | |||
436 | 436 | ||
437 | return 0; | 437 | return 0; |
438 | } | 438 | } |
439 | EXPORT_SYMBOL_GPL(blk_mq_register_disk); | ||
439 | 440 | ||
440 | void blk_mq_sysfs_unregister(struct request_queue *q) | 441 | void blk_mq_sysfs_unregister(struct request_queue *q) |
441 | { | 442 | { |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 33c428530193..c82de08f3721 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(all_q_mutex); | |||
33 | static LIST_HEAD(all_q_list); | 33 | static LIST_HEAD(all_q_list); |
34 | 34 | ||
35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); | 35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); |
36 | static void blk_mq_run_queues(struct request_queue *q); | ||
37 | 36 | ||
38 | /* | 37 | /* |
39 | * Check if any of the ctx's have pending work in this hardware queue | 38 | * Check if any of the ctx's have pending work in this hardware queue |
@@ -78,7 +77,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, | |||
78 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); | 77 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); |
79 | } | 78 | } |
80 | 79 | ||
81 | static int blk_mq_queue_enter(struct request_queue *q) | 80 | static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) |
82 | { | 81 | { |
83 | while (true) { | 82 | while (true) { |
84 | int ret; | 83 | int ret; |
@@ -86,6 +85,9 @@ static int blk_mq_queue_enter(struct request_queue *q) | |||
86 | if (percpu_ref_tryget_live(&q->mq_usage_counter)) | 85 | if (percpu_ref_tryget_live(&q->mq_usage_counter)) |
87 | return 0; | 86 | return 0; |
88 | 87 | ||
88 | if (!(gfp & __GFP_WAIT)) | ||
89 | return -EBUSY; | ||
90 | |||
89 | ret = wait_event_interruptible(q->mq_freeze_wq, | 91 | ret = wait_event_interruptible(q->mq_freeze_wq, |
90 | !q->mq_freeze_depth || blk_queue_dying(q)); | 92 | !q->mq_freeze_depth || blk_queue_dying(q)); |
91 | if (blk_queue_dying(q)) | 93 | if (blk_queue_dying(q)) |
@@ -118,7 +120,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q) | |||
118 | 120 | ||
119 | if (freeze) { | 121 | if (freeze) { |
120 | percpu_ref_kill(&q->mq_usage_counter); | 122 | percpu_ref_kill(&q->mq_usage_counter); |
121 | blk_mq_run_queues(q); | 123 | blk_mq_run_hw_queues(q, false); |
122 | } | 124 | } |
123 | } | 125 | } |
124 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); | 126 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); |
@@ -257,7 +259,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, | |||
257 | struct blk_mq_alloc_data alloc_data; | 259 | struct blk_mq_alloc_data alloc_data; |
258 | int ret; | 260 | int ret; |
259 | 261 | ||
260 | ret = blk_mq_queue_enter(q); | 262 | ret = blk_mq_queue_enter(q, gfp); |
261 | if (ret) | 263 | if (ret) |
262 | return ERR_PTR(ret); | 264 | return ERR_PTR(ret); |
263 | 265 | ||
@@ -904,7 +906,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | |||
904 | &hctx->run_work, 0); | 906 | &hctx->run_work, 0); |
905 | } | 907 | } |
906 | 908 | ||
907 | static void blk_mq_run_queues(struct request_queue *q) | 909 | void blk_mq_run_hw_queues(struct request_queue *q, bool async) |
908 | { | 910 | { |
909 | struct blk_mq_hw_ctx *hctx; | 911 | struct blk_mq_hw_ctx *hctx; |
910 | int i; | 912 | int i; |
@@ -915,9 +917,10 @@ static void blk_mq_run_queues(struct request_queue *q) | |||
915 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | 917 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) |
916 | continue; | 918 | continue; |
917 | 919 | ||
918 | blk_mq_run_hw_queue(hctx, false); | 920 | blk_mq_run_hw_queue(hctx, async); |
919 | } | 921 | } |
920 | } | 922 | } |
923 | EXPORT_SYMBOL(blk_mq_run_hw_queues); | ||
921 | 924 | ||
922 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) | 925 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) |
923 | { | 926 | { |
@@ -1186,7 +1189,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, | |||
1186 | int rw = bio_data_dir(bio); | 1189 | int rw = bio_data_dir(bio); |
1187 | struct blk_mq_alloc_data alloc_data; | 1190 | struct blk_mq_alloc_data alloc_data; |
1188 | 1191 | ||
1189 | if (unlikely(blk_mq_queue_enter(q))) { | 1192 | if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) { |
1190 | bio_endio(bio, -EIO); | 1193 | bio_endio(bio, -EIO); |
1191 | return NULL; | 1194 | return NULL; |
1192 | } | 1195 | } |
@@ -1517,8 +1520,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) | |||
1517 | if (!bitmap->map) | 1520 | if (!bitmap->map) |
1518 | return -ENOMEM; | 1521 | return -ENOMEM; |
1519 | 1522 | ||
1520 | bitmap->map_size = num_maps; | ||
1521 | |||
1522 | total = nr_cpu_ids; | 1523 | total = nr_cpu_ids; |
1523 | for (i = 0; i < num_maps; i++) { | 1524 | for (i = 0; i < num_maps; i++) { |
1524 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); | 1525 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); |
@@ -1759,8 +1760,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1759 | continue; | 1760 | continue; |
1760 | 1761 | ||
1761 | hctx = q->mq_ops->map_queue(q, i); | 1762 | hctx = q->mq_ops->map_queue(q, i); |
1762 | cpumask_set_cpu(i, hctx->cpumask); | ||
1763 | hctx->nr_ctx++; | ||
1764 | 1763 | ||
1765 | /* | 1764 | /* |
1766 | * Set local node, IFF we have more than one hw queue. If | 1765 | * Set local node, IFF we have more than one hw queue. If |
@@ -1797,6 +1796,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1797 | } | 1796 | } |
1798 | 1797 | ||
1799 | queue_for_each_hw_ctx(q, hctx, i) { | 1798 | queue_for_each_hw_ctx(q, hctx, i) { |
1799 | struct blk_mq_ctxmap *map = &hctx->ctx_map; | ||
1800 | |||
1800 | /* | 1801 | /* |
1801 | * If no software queues are mapped to this hardware queue, | 1802 | * If no software queues are mapped to this hardware queue, |
1802 | * disable it and free the request entries. | 1803 | * disable it and free the request entries. |
@@ -1813,6 +1814,13 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1813 | } | 1814 | } |
1814 | 1815 | ||
1815 | /* | 1816 | /* |
1817 | * Set the map size to the number of mapped software queues. | ||
1818 | * This is more accurate and more efficient than looping | ||
1819 | * over all possibly mapped software queues. | ||
1820 | */ | ||
1821 | map->map_size = hctx->nr_ctx / map->bits_per_word; | ||
1822 | |||
1823 | /* | ||
1816 | * Initialize batch roundrobin counts | 1824 | * Initialize batch roundrobin counts |
1817 | */ | 1825 | */ |
1818 | hctx->next_cpu = cpumask_first(hctx->cpumask); | 1826 | hctx->next_cpu = cpumask_first(hctx->cpumask); |
@@ -1889,9 +1897,25 @@ void blk_mq_release(struct request_queue *q) | |||
1889 | 1897 | ||
1890 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | 1898 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) |
1891 | { | 1899 | { |
1900 | struct request_queue *uninit_q, *q; | ||
1901 | |||
1902 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); | ||
1903 | if (!uninit_q) | ||
1904 | return ERR_PTR(-ENOMEM); | ||
1905 | |||
1906 | q = blk_mq_init_allocated_queue(set, uninit_q); | ||
1907 | if (IS_ERR(q)) | ||
1908 | blk_cleanup_queue(uninit_q); | ||
1909 | |||
1910 | return q; | ||
1911 | } | ||
1912 | EXPORT_SYMBOL(blk_mq_init_queue); | ||
1913 | |||
1914 | struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | ||
1915 | struct request_queue *q) | ||
1916 | { | ||
1892 | struct blk_mq_hw_ctx **hctxs; | 1917 | struct blk_mq_hw_ctx **hctxs; |
1893 | struct blk_mq_ctx __percpu *ctx; | 1918 | struct blk_mq_ctx __percpu *ctx; |
1894 | struct request_queue *q; | ||
1895 | unsigned int *map; | 1919 | unsigned int *map; |
1896 | int i; | 1920 | int i; |
1897 | 1921 | ||
@@ -1926,20 +1950,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1926 | hctxs[i]->queue_num = i; | 1950 | hctxs[i]->queue_num = i; |
1927 | } | 1951 | } |
1928 | 1952 | ||
1929 | q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); | ||
1930 | if (!q) | ||
1931 | goto err_hctxs; | ||
1932 | |||
1933 | /* | 1953 | /* |
1934 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | 1954 | * Init percpu_ref in atomic mode so that it's faster to shutdown. |
1935 | * See blk_register_queue() for details. | 1955 | * See blk_register_queue() for details. |
1936 | */ | 1956 | */ |
1937 | if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, | 1957 | if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, |
1938 | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) | 1958 | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) |
1939 | goto err_mq_usage; | 1959 | goto err_hctxs; |
1940 | 1960 | ||
1941 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); | 1961 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); |
1942 | blk_queue_rq_timeout(q, 30000); | 1962 | blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); |
1943 | 1963 | ||
1944 | q->nr_queues = nr_cpu_ids; | 1964 | q->nr_queues = nr_cpu_ids; |
1945 | q->nr_hw_queues = set->nr_hw_queues; | 1965 | q->nr_hw_queues = set->nr_hw_queues; |
@@ -1965,9 +1985,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1965 | else | 1985 | else |
1966 | blk_queue_make_request(q, blk_sq_make_request); | 1986 | blk_queue_make_request(q, blk_sq_make_request); |
1967 | 1987 | ||
1968 | if (set->timeout) | ||
1969 | blk_queue_rq_timeout(q, set->timeout); | ||
1970 | |||
1971 | /* | 1988 | /* |
1972 | * Do this after blk_queue_make_request() overrides it... | 1989 | * Do this after blk_queue_make_request() overrides it... |
1973 | */ | 1990 | */ |
@@ -1979,7 +1996,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1979 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); | 1996 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); |
1980 | 1997 | ||
1981 | if (blk_mq_init_hw_queues(q, set)) | 1998 | if (blk_mq_init_hw_queues(q, set)) |
1982 | goto err_mq_usage; | 1999 | goto err_hctxs; |
1983 | 2000 | ||
1984 | mutex_lock(&all_q_mutex); | 2001 | mutex_lock(&all_q_mutex); |
1985 | list_add_tail(&q->all_q_node, &all_q_list); | 2002 | list_add_tail(&q->all_q_node, &all_q_list); |
@@ -1991,8 +2008,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1991 | 2008 | ||
1992 | return q; | 2009 | return q; |
1993 | 2010 | ||
1994 | err_mq_usage: | ||
1995 | blk_cleanup_queue(q); | ||
1996 | err_hctxs: | 2011 | err_hctxs: |
1997 | kfree(map); | 2012 | kfree(map); |
1998 | for (i = 0; i < set->nr_hw_queues; i++) { | 2013 | for (i = 0; i < set->nr_hw_queues; i++) { |
@@ -2007,7 +2022,7 @@ err_percpu: | |||
2007 | free_percpu(ctx); | 2022 | free_percpu(ctx); |
2008 | return ERR_PTR(-ENOMEM); | 2023 | return ERR_PTR(-ENOMEM); |
2009 | } | 2024 | } |
2010 | EXPORT_SYMBOL(blk_mq_init_queue); | 2025 | EXPORT_SYMBOL(blk_mq_init_allocated_queue); |
2011 | 2026 | ||
2012 | void blk_mq_free_queue(struct request_queue *q) | 2027 | void blk_mq_free_queue(struct request_queue *q) |
2013 | { | 2028 | { |
@@ -2159,7 +2174,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | |||
2159 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) | 2174 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) |
2160 | return -EINVAL; | 2175 | return -EINVAL; |
2161 | 2176 | ||
2162 | if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) | 2177 | if (!set->ops->queue_rq || !set->ops->map_queue) |
2163 | return -EINVAL; | 2178 | return -EINVAL; |
2164 | 2179 | ||
2165 | if (set->queue_depth > BLK_MQ_MAX_DEPTH) { | 2180 | if (set->queue_depth > BLK_MQ_MAX_DEPTH) { |
@@ -77,6 +77,11 @@ struct kioctx_cpu { | |||
77 | unsigned reqs_available; | 77 | unsigned reqs_available; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct ctx_rq_wait { | ||
81 | struct completion comp; | ||
82 | atomic_t count; | ||
83 | }; | ||
84 | |||
80 | struct kioctx { | 85 | struct kioctx { |
81 | struct percpu_ref users; | 86 | struct percpu_ref users; |
82 | atomic_t dead; | 87 | atomic_t dead; |
@@ -115,7 +120,7 @@ struct kioctx { | |||
115 | /* | 120 | /* |
116 | * signals when all in-flight requests are done | 121 | * signals when all in-flight requests are done |
117 | */ | 122 | */ |
118 | struct completion *requests_done; | 123 | struct ctx_rq_wait *rq_wait; |
119 | 124 | ||
120 | struct { | 125 | struct { |
121 | /* | 126 | /* |
@@ -572,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |||
572 | struct kioctx *ctx = container_of(ref, struct kioctx, reqs); | 577 | struct kioctx *ctx = container_of(ref, struct kioctx, reqs); |
573 | 578 | ||
574 | /* At this point we know that there are no any in-flight requests */ | 579 | /* At this point we know that there are no any in-flight requests */ |
575 | if (ctx->requests_done) | 580 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) |
576 | complete(ctx->requests_done); | 581 | complete(&ctx->rq_wait->comp); |
577 | 582 | ||
578 | INIT_WORK(&ctx->free_work, free_ioctx); | 583 | INIT_WORK(&ctx->free_work, free_ioctx); |
579 | schedule_work(&ctx->free_work); | 584 | schedule_work(&ctx->free_work); |
@@ -783,7 +788,7 @@ err: | |||
783 | * the rapid destruction of the kioctx. | 788 | * the rapid destruction of the kioctx. |
784 | */ | 789 | */ |
785 | static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | 790 | static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, |
786 | struct completion *requests_done) | 791 | struct ctx_rq_wait *wait) |
787 | { | 792 | { |
788 | struct kioctx_table *table; | 793 | struct kioctx_table *table; |
789 | 794 | ||
@@ -813,7 +818,7 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | |||
813 | if (ctx->mmap_size) | 818 | if (ctx->mmap_size) |
814 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | 819 | vm_munmap(ctx->mmap_base, ctx->mmap_size); |
815 | 820 | ||
816 | ctx->requests_done = requests_done; | 821 | ctx->rq_wait = wait; |
817 | percpu_ref_kill(&ctx->users); | 822 | percpu_ref_kill(&ctx->users); |
818 | return 0; | 823 | return 0; |
819 | } | 824 | } |
@@ -829,18 +834,24 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | |||
829 | void exit_aio(struct mm_struct *mm) | 834 | void exit_aio(struct mm_struct *mm) |
830 | { | 835 | { |
831 | struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); | 836 | struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); |
832 | int i; | 837 | struct ctx_rq_wait wait; |
838 | int i, skipped; | ||
833 | 839 | ||
834 | if (!table) | 840 | if (!table) |
835 | return; | 841 | return; |
836 | 842 | ||
843 | atomic_set(&wait.count, table->nr); | ||
844 | init_completion(&wait.comp); | ||
845 | |||
846 | skipped = 0; | ||
837 | for (i = 0; i < table->nr; ++i) { | 847 | for (i = 0; i < table->nr; ++i) { |
838 | struct kioctx *ctx = table->table[i]; | 848 | struct kioctx *ctx = table->table[i]; |
839 | struct completion requests_done = | ||
840 | COMPLETION_INITIALIZER_ONSTACK(requests_done); | ||
841 | 849 | ||
842 | if (!ctx) | 850 | if (!ctx) { |
851 | skipped++; | ||
843 | continue; | 852 | continue; |
853 | } | ||
854 | |||
844 | /* | 855 | /* |
845 | * We don't need to bother with munmap() here - exit_mmap(mm) | 856 | * We don't need to bother with munmap() here - exit_mmap(mm) |
846 | * is coming and it'll unmap everything. And we simply can't, | 857 | * is coming and it'll unmap everything. And we simply can't, |
@@ -849,10 +860,12 @@ void exit_aio(struct mm_struct *mm) | |||
849 | * that it needs to unmap the area, just set it to 0. | 860 | * that it needs to unmap the area, just set it to 0. |
850 | */ | 861 | */ |
851 | ctx->mmap_size = 0; | 862 | ctx->mmap_size = 0; |
852 | kill_ioctx(mm, ctx, &requests_done); | 863 | kill_ioctx(mm, ctx, &wait); |
864 | } | ||
853 | 865 | ||
866 | if (!atomic_sub_and_test(skipped, &wait.count)) { | ||
854 | /* Wait until all IO for the context are done. */ | 867 | /* Wait until all IO for the context are done. */ |
855 | wait_for_completion(&requests_done); | 868 | wait_for_completion(&wait.comp); |
856 | } | 869 | } |
857 | 870 | ||
858 | RCU_INIT_POINTER(mm->ioctx_table, NULL); | 871 | RCU_INIT_POINTER(mm->ioctx_table, NULL); |
@@ -1331,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | |||
1331 | { | 1344 | { |
1332 | struct kioctx *ioctx = lookup_ioctx(ctx); | 1345 | struct kioctx *ioctx = lookup_ioctx(ctx); |
1333 | if (likely(NULL != ioctx)) { | 1346 | if (likely(NULL != ioctx)) { |
1334 | struct completion requests_done = | 1347 | struct ctx_rq_wait wait; |
1335 | COMPLETION_INITIALIZER_ONSTACK(requests_done); | ||
1336 | int ret; | 1348 | int ret; |
1337 | 1349 | ||
1350 | init_completion(&wait.comp); | ||
1351 | atomic_set(&wait.count, 1); | ||
1352 | |||
1338 | /* Pass requests_done to kill_ioctx() where it can be set | 1353 | /* Pass requests_done to kill_ioctx() where it can be set |
1339 | * in a thread-safe way. If we try to set it here then we have | 1354 | * in a thread-safe way. If we try to set it here then we have |
1340 | * a race condition if two io_destroy() called simultaneously. | 1355 | * a race condition if two io_destroy() called simultaneously. |
1341 | */ | 1356 | */ |
1342 | ret = kill_ioctx(current->mm, ioctx, &requests_done); | 1357 | ret = kill_ioctx(current->mm, ioctx, &wait); |
1343 | percpu_ref_put(&ioctx->users); | 1358 | percpu_ref_put(&ioctx->users); |
1344 | 1359 | ||
1345 | /* Wait until all IO for the context are done. Otherwise kernel | 1360 | /* Wait until all IO for the context are done. Otherwise kernel |
@@ -1347,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | |||
1347 | * is destroyed. | 1362 | * is destroyed. |
1348 | */ | 1363 | */ |
1349 | if (!ret) | 1364 | if (!ret) |
1350 | wait_for_completion(&requests_done); | 1365 | wait_for_completion(&wait.comp); |
1351 | 1366 | ||
1352 | return ret; | 1367 | return ret; |
1353 | } | 1368 | } |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7aec86127335..8210e8797c12 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -164,6 +164,8 @@ enum { | |||
164 | << BLK_MQ_F_ALLOC_POLICY_START_BIT) | 164 | << BLK_MQ_F_ALLOC_POLICY_START_BIT) |
165 | 165 | ||
166 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); | 166 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); |
167 | struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | ||
168 | struct request_queue *q); | ||
167 | void blk_mq_finish_init(struct request_queue *q); | 169 | void blk_mq_finish_init(struct request_queue *q); |
168 | int blk_mq_register_disk(struct gendisk *); | 170 | int blk_mq_register_disk(struct gendisk *); |
169 | void blk_mq_unregister_disk(struct gendisk *); | 171 | void blk_mq_unregister_disk(struct gendisk *); |
@@ -218,6 +220,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); | |||
218 | void blk_mq_stop_hw_queues(struct request_queue *q); | 220 | void blk_mq_stop_hw_queues(struct request_queue *q); |
219 | void blk_mq_start_hw_queues(struct request_queue *q); | 221 | void blk_mq_start_hw_queues(struct request_queue *q); |
220 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); | 222 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); |
223 | void blk_mq_run_hw_queues(struct request_queue *q, bool async); | ||
221 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); | 224 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); |
222 | void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, | 225 | void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, |
223 | void *priv); | 226 | void *priv); |
@@ -227,7 +230,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q); | |||
227 | 230 | ||
228 | /* | 231 | /* |
229 | * Driver command data is immediately after the request. So subtract request | 232 | * Driver command data is immediately after the request. So subtract request |
230 | * size to get back to the original request. | 233 | * size to get back to the original request, add request size to get the PDU. |
231 | */ | 234 | */ |
232 | static inline struct request *blk_mq_rq_from_pdu(void *pdu) | 235 | static inline struct request *blk_mq_rq_from_pdu(void *pdu) |
233 | { | 236 | { |
@@ -235,7 +238,7 @@ static inline struct request *blk_mq_rq_from_pdu(void *pdu) | |||
235 | } | 238 | } |
236 | static inline void *blk_mq_rq_to_pdu(struct request *rq) | 239 | static inline void *blk_mq_rq_to_pdu(struct request *rq) |
237 | { | 240 | { |
238 | return (void *) rq + sizeof(*rq); | 241 | return rq + 1; |
239 | } | 242 | } |
240 | 243 | ||
241 | #define queue_for_each_hw_ctx(q, hctx, i) \ | 244 | #define queue_for_each_hw_ctx(q, hctx, i) \ |