diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 21:49:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 21:49:16 -0400 |
commit | d82312c80860b8b83cd4473ac6eafd244e712061 (patch) | |
tree | 028b2e843e9d59d35aeb8924582864f18aa4ca36 /block | |
parent | 7d69cff26ceadce8638cb65191285932a3de3d4c (diff) | |
parent | 889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (diff) |
Merge branch 'for-4.1/core' of git://git.kernel.dk/linux-block
Pull block layer core bits from Jens Axboe:
"This is the core pull request for 4.1. Not a lot of stuff in here for
this round, mostly little fixes or optimizations. This pull request
contains:
- An optimization that speeds up queue runs on blk-mq, especially for
the case where there's a large difference between nr_cpu_ids and
the actual mapped software queues on a hardware queue. From Chong
Yuan.
- Honor node local allocations for requests on legacy devices. From
David Rientjes.
- Cleanup of blk_mq_rq_to_pdu() from me.
- exit_aio() fixup from me, greatly speeding up exiting multiple IO
contexts off exit_group(). For my particular test case, fio exit
took ~6 seconds. A typical case of both exposing RCU grace periods
to user space, and serializing exit of them.
- Make blk_mq_queue_enter() honor the gfp mask passed in, so we only
wait if __GFP_WAIT is set. From Keith Busch.
- blk-mq exports and two added helpers from Mike Snitzer, which will
be used by the dm-mq code.
- Cleanups of blk-mq queue init from Wei Fang and Xiaoguang Wang"
* 'for-4.1/core' of git://git.kernel.dk/linux-block:
blk-mq: reduce unnecessary software queue looping
aio: fix serial draining in exit_aio()
blk-mq: cleanup blk_mq_rq_to_pdu()
blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue()
block: remove redundant check about 'set->nr_hw_queues' in blk_mq_alloc_tag_set()
block: allocate request memory local to request queue
blk-mq: don't wait in blk_mq_queue_enter() if __GFP_WAIT isn't set
blk-mq: export blk_mq_run_hw_queues
blk-mq: add blk_mq_init_allocated_queue and export blk_mq_register_disk
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 19 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 67 |
3 files changed, 58 insertions, 29 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 794c3e7f01cf..fd154b94447a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -557,6 +557,18 @@ void blk_cleanup_queue(struct request_queue *q) | |||
557 | } | 557 | } |
558 | EXPORT_SYMBOL(blk_cleanup_queue); | 558 | EXPORT_SYMBOL(blk_cleanup_queue); |
559 | 559 | ||
560 | /* Allocate memory local to the request queue */ | ||
561 | static void *alloc_request_struct(gfp_t gfp_mask, void *data) | ||
562 | { | ||
563 | int nid = (int)(long)data; | ||
564 | return kmem_cache_alloc_node(request_cachep, gfp_mask, nid); | ||
565 | } | ||
566 | |||
567 | static void free_request_struct(void *element, void *unused) | ||
568 | { | ||
569 | kmem_cache_free(request_cachep, element); | ||
570 | } | ||
571 | |||
560 | int blk_init_rl(struct request_list *rl, struct request_queue *q, | 572 | int blk_init_rl(struct request_list *rl, struct request_queue *q, |
561 | gfp_t gfp_mask) | 573 | gfp_t gfp_mask) |
562 | { | 574 | { |
@@ -569,9 +581,10 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, | |||
569 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); | 581 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
570 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); | 582 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
571 | 583 | ||
572 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 584 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct, |
573 | mempool_free_slab, request_cachep, | 585 | free_request_struct, |
574 | gfp_mask, q->node); | 586 | (void *)(long)q->node, gfp_mask, |
587 | q->node); | ||
575 | if (!rl->rq_pool) | 588 | if (!rl->rq_pool) |
576 | return -ENOMEM; | 589 | return -ENOMEM; |
577 | 590 | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 1630a20d5dcf..b79685e06b70 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -436,6 +436,7 @@ int blk_mq_register_disk(struct gendisk *disk) | |||
436 | 436 | ||
437 | return 0; | 437 | return 0; |
438 | } | 438 | } |
439 | EXPORT_SYMBOL_GPL(blk_mq_register_disk); | ||
439 | 440 | ||
440 | void blk_mq_sysfs_unregister(struct request_queue *q) | 441 | void blk_mq_sysfs_unregister(struct request_queue *q) |
441 | { | 442 | { |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 33c428530193..c82de08f3721 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(all_q_mutex); | |||
33 | static LIST_HEAD(all_q_list); | 33 | static LIST_HEAD(all_q_list); |
34 | 34 | ||
35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); | 35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); |
36 | static void blk_mq_run_queues(struct request_queue *q); | ||
37 | 36 | ||
38 | /* | 37 | /* |
39 | * Check if any of the ctx's have pending work in this hardware queue | 38 | * Check if any of the ctx's have pending work in this hardware queue |
@@ -78,7 +77,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, | |||
78 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); | 77 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); |
79 | } | 78 | } |
80 | 79 | ||
81 | static int blk_mq_queue_enter(struct request_queue *q) | 80 | static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) |
82 | { | 81 | { |
83 | while (true) { | 82 | while (true) { |
84 | int ret; | 83 | int ret; |
@@ -86,6 +85,9 @@ static int blk_mq_queue_enter(struct request_queue *q) | |||
86 | if (percpu_ref_tryget_live(&q->mq_usage_counter)) | 85 | if (percpu_ref_tryget_live(&q->mq_usage_counter)) |
87 | return 0; | 86 | return 0; |
88 | 87 | ||
88 | if (!(gfp & __GFP_WAIT)) | ||
89 | return -EBUSY; | ||
90 | |||
89 | ret = wait_event_interruptible(q->mq_freeze_wq, | 91 | ret = wait_event_interruptible(q->mq_freeze_wq, |
90 | !q->mq_freeze_depth || blk_queue_dying(q)); | 92 | !q->mq_freeze_depth || blk_queue_dying(q)); |
91 | if (blk_queue_dying(q)) | 93 | if (blk_queue_dying(q)) |
@@ -118,7 +120,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q) | |||
118 | 120 | ||
119 | if (freeze) { | 121 | if (freeze) { |
120 | percpu_ref_kill(&q->mq_usage_counter); | 122 | percpu_ref_kill(&q->mq_usage_counter); |
121 | blk_mq_run_queues(q); | 123 | blk_mq_run_hw_queues(q, false); |
122 | } | 124 | } |
123 | } | 125 | } |
124 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); | 126 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); |
@@ -257,7 +259,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, | |||
257 | struct blk_mq_alloc_data alloc_data; | 259 | struct blk_mq_alloc_data alloc_data; |
258 | int ret; | 260 | int ret; |
259 | 261 | ||
260 | ret = blk_mq_queue_enter(q); | 262 | ret = blk_mq_queue_enter(q, gfp); |
261 | if (ret) | 263 | if (ret) |
262 | return ERR_PTR(ret); | 264 | return ERR_PTR(ret); |
263 | 265 | ||
@@ -904,7 +906,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | |||
904 | &hctx->run_work, 0); | 906 | &hctx->run_work, 0); |
905 | } | 907 | } |
906 | 908 | ||
907 | static void blk_mq_run_queues(struct request_queue *q) | 909 | void blk_mq_run_hw_queues(struct request_queue *q, bool async) |
908 | { | 910 | { |
909 | struct blk_mq_hw_ctx *hctx; | 911 | struct blk_mq_hw_ctx *hctx; |
910 | int i; | 912 | int i; |
@@ -915,9 +917,10 @@ static void blk_mq_run_queues(struct request_queue *q) | |||
915 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | 917 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) |
916 | continue; | 918 | continue; |
917 | 919 | ||
918 | blk_mq_run_hw_queue(hctx, false); | 920 | blk_mq_run_hw_queue(hctx, async); |
919 | } | 921 | } |
920 | } | 922 | } |
923 | EXPORT_SYMBOL(blk_mq_run_hw_queues); | ||
921 | 924 | ||
922 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) | 925 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) |
923 | { | 926 | { |
@@ -1186,7 +1189,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, | |||
1186 | int rw = bio_data_dir(bio); | 1189 | int rw = bio_data_dir(bio); |
1187 | struct blk_mq_alloc_data alloc_data; | 1190 | struct blk_mq_alloc_data alloc_data; |
1188 | 1191 | ||
1189 | if (unlikely(blk_mq_queue_enter(q))) { | 1192 | if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) { |
1190 | bio_endio(bio, -EIO); | 1193 | bio_endio(bio, -EIO); |
1191 | return NULL; | 1194 | return NULL; |
1192 | } | 1195 | } |
@@ -1517,8 +1520,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) | |||
1517 | if (!bitmap->map) | 1520 | if (!bitmap->map) |
1518 | return -ENOMEM; | 1521 | return -ENOMEM; |
1519 | 1522 | ||
1520 | bitmap->map_size = num_maps; | ||
1521 | |||
1522 | total = nr_cpu_ids; | 1523 | total = nr_cpu_ids; |
1523 | for (i = 0; i < num_maps; i++) { | 1524 | for (i = 0; i < num_maps; i++) { |
1524 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); | 1525 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); |
@@ -1759,8 +1760,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1759 | continue; | 1760 | continue; |
1760 | 1761 | ||
1761 | hctx = q->mq_ops->map_queue(q, i); | 1762 | hctx = q->mq_ops->map_queue(q, i); |
1762 | cpumask_set_cpu(i, hctx->cpumask); | ||
1763 | hctx->nr_ctx++; | ||
1764 | 1763 | ||
1765 | /* | 1764 | /* |
1766 | * Set local node, IFF we have more than one hw queue. If | 1765 | * Set local node, IFF we have more than one hw queue. If |
@@ -1797,6 +1796,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1797 | } | 1796 | } |
1798 | 1797 | ||
1799 | queue_for_each_hw_ctx(q, hctx, i) { | 1798 | queue_for_each_hw_ctx(q, hctx, i) { |
1799 | struct blk_mq_ctxmap *map = &hctx->ctx_map; | ||
1800 | |||
1800 | /* | 1801 | /* |
1801 | * If no software queues are mapped to this hardware queue, | 1802 | * If no software queues are mapped to this hardware queue, |
1802 | * disable it and free the request entries. | 1803 | * disable it and free the request entries. |
@@ -1813,6 +1814,13 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1813 | } | 1814 | } |
1814 | 1815 | ||
1815 | /* | 1816 | /* |
1817 | * Set the map size to the number of mapped software queues. | ||
1818 | * This is more accurate and more efficient than looping | ||
1819 | * over all possibly mapped software queues. | ||
1820 | */ | ||
1821 | map->map_size = hctx->nr_ctx / map->bits_per_word; | ||
1822 | |||
1823 | /* | ||
1816 | * Initialize batch roundrobin counts | 1824 | * Initialize batch roundrobin counts |
1817 | */ | 1825 | */ |
1818 | hctx->next_cpu = cpumask_first(hctx->cpumask); | 1826 | hctx->next_cpu = cpumask_first(hctx->cpumask); |
@@ -1889,9 +1897,25 @@ void blk_mq_release(struct request_queue *q) | |||
1889 | 1897 | ||
1890 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | 1898 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) |
1891 | { | 1899 | { |
1900 | struct request_queue *uninit_q, *q; | ||
1901 | |||
1902 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); | ||
1903 | if (!uninit_q) | ||
1904 | return ERR_PTR(-ENOMEM); | ||
1905 | |||
1906 | q = blk_mq_init_allocated_queue(set, uninit_q); | ||
1907 | if (IS_ERR(q)) | ||
1908 | blk_cleanup_queue(uninit_q); | ||
1909 | |||
1910 | return q; | ||
1911 | } | ||
1912 | EXPORT_SYMBOL(blk_mq_init_queue); | ||
1913 | |||
1914 | struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | ||
1915 | struct request_queue *q) | ||
1916 | { | ||
1892 | struct blk_mq_hw_ctx **hctxs; | 1917 | struct blk_mq_hw_ctx **hctxs; |
1893 | struct blk_mq_ctx __percpu *ctx; | 1918 | struct blk_mq_ctx __percpu *ctx; |
1894 | struct request_queue *q; | ||
1895 | unsigned int *map; | 1919 | unsigned int *map; |
1896 | int i; | 1920 | int i; |
1897 | 1921 | ||
@@ -1926,20 +1950,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1926 | hctxs[i]->queue_num = i; | 1950 | hctxs[i]->queue_num = i; |
1927 | } | 1951 | } |
1928 | 1952 | ||
1929 | q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); | ||
1930 | if (!q) | ||
1931 | goto err_hctxs; | ||
1932 | |||
1933 | /* | 1953 | /* |
1934 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | 1954 | * Init percpu_ref in atomic mode so that it's faster to shutdown. |
1935 | * See blk_register_queue() for details. | 1955 | * See blk_register_queue() for details. |
1936 | */ | 1956 | */ |
1937 | if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, | 1957 | if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, |
1938 | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) | 1958 | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) |
1939 | goto err_mq_usage; | 1959 | goto err_hctxs; |
1940 | 1960 | ||
1941 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); | 1961 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); |
1942 | blk_queue_rq_timeout(q, 30000); | 1962 | blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); |
1943 | 1963 | ||
1944 | q->nr_queues = nr_cpu_ids; | 1964 | q->nr_queues = nr_cpu_ids; |
1945 | q->nr_hw_queues = set->nr_hw_queues; | 1965 | q->nr_hw_queues = set->nr_hw_queues; |
@@ -1965,9 +1985,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1965 | else | 1985 | else |
1966 | blk_queue_make_request(q, blk_sq_make_request); | 1986 | blk_queue_make_request(q, blk_sq_make_request); |
1967 | 1987 | ||
1968 | if (set->timeout) | ||
1969 | blk_queue_rq_timeout(q, set->timeout); | ||
1970 | |||
1971 | /* | 1988 | /* |
1972 | * Do this after blk_queue_make_request() overrides it... | 1989 | * Do this after blk_queue_make_request() overrides it... |
1973 | */ | 1990 | */ |
@@ -1979,7 +1996,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1979 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); | 1996 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); |
1980 | 1997 | ||
1981 | if (blk_mq_init_hw_queues(q, set)) | 1998 | if (blk_mq_init_hw_queues(q, set)) |
1982 | goto err_mq_usage; | 1999 | goto err_hctxs; |
1983 | 2000 | ||
1984 | mutex_lock(&all_q_mutex); | 2001 | mutex_lock(&all_q_mutex); |
1985 | list_add_tail(&q->all_q_node, &all_q_list); | 2002 | list_add_tail(&q->all_q_node, &all_q_list); |
@@ -1991,8 +2008,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1991 | 2008 | ||
1992 | return q; | 2009 | return q; |
1993 | 2010 | ||
1994 | err_mq_usage: | ||
1995 | blk_cleanup_queue(q); | ||
1996 | err_hctxs: | 2011 | err_hctxs: |
1997 | kfree(map); | 2012 | kfree(map); |
1998 | for (i = 0; i < set->nr_hw_queues; i++) { | 2013 | for (i = 0; i < set->nr_hw_queues; i++) { |
@@ -2007,7 +2022,7 @@ err_percpu: | |||
2007 | free_percpu(ctx); | 2022 | free_percpu(ctx); |
2008 | return ERR_PTR(-ENOMEM); | 2023 | return ERR_PTR(-ENOMEM); |
2009 | } | 2024 | } |
2010 | EXPORT_SYMBOL(blk_mq_init_queue); | 2025 | EXPORT_SYMBOL(blk_mq_init_allocated_queue); |
2011 | 2026 | ||
2012 | void blk_mq_free_queue(struct request_queue *q) | 2027 | void blk_mq_free_queue(struct request_queue *q) |
2013 | { | 2028 | { |
@@ -2159,7 +2174,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | |||
2159 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) | 2174 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) |
2160 | return -EINVAL; | 2175 | return -EINVAL; |
2161 | 2176 | ||
2162 | if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) | 2177 | if (!set->ops->queue_rq || !set->ops->map_queue) |
2163 | return -EINVAL; | 2178 | return -EINVAL; |
2164 | 2179 | ||
2165 | if (set->queue_depth > BLK_MQ_MAX_DEPTH) { | 2180 | if (set->queue_depth > BLK_MQ_MAX_DEPTH) { |