diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 21:49:16 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 21:49:16 -0400 |
| commit | d82312c80860b8b83cd4473ac6eafd244e712061 (patch) | |
| tree | 028b2e843e9d59d35aeb8924582864f18aa4ca36 /block | |
| parent | 7d69cff26ceadce8638cb65191285932a3de3d4c (diff) | |
| parent | 889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (diff) | |
Merge branch 'for-4.1/core' of git://git.kernel.dk/linux-block
Pull block layer core bits from Jens Axboe:
"This is the core pull request for 4.1. Not a lot of stuff in here for
this round, mostly little fixes or optimizations. This pull request
contains:
- An optimization that speeds up queue runs on blk-mq, especially for
the case where there's a large difference between nr_cpu_ids and
the actual mapped software queues on a hardware queue. From Chong
Yuan.
- Honor node local allocations for requests on legacy devices. From
David Rientjes.
- Cleanup of blk_mq_rq_to_pdu() from me.
- exit_aio() fixup from me, greatly speeding up exiting multiple IO
contexts off exit_group(). For my particular test case, fio exit
took ~6 seconds. A typical case of both exposing RCU grace periods
to user space, and serializing exit of them.
- Make blk_mq_queue_enter() honor the gfp mask passed in, so we only
wait if __GFP_WAIT is set. From Keith Busch.
- blk-mq exports and two added helpers from Mike Snitzer, which will
be used by the dm-mq code.
- Cleanups of blk-mq queue init from Wei Fang and Xiaoguang Wang"
* 'for-4.1/core' of git://git.kernel.dk/linux-block:
blk-mq: reduce unnecessary software queue looping
aio: fix serial draining in exit_aio()
blk-mq: cleanup blk_mq_rq_to_pdu()
blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue()
block: remove redundant check about 'set->nr_hw_queues' in blk_mq_alloc_tag_set()
block: allocate request memory local to request queue
blk-mq: don't wait in blk_mq_queue_enter() if __GFP_WAIT isn't set
blk-mq: export blk_mq_run_hw_queues
blk-mq: add blk_mq_init_allocated_queue and export blk_mq_register_disk
Diffstat (limited to 'block')
| -rw-r--r-- | block/blk-core.c | 19 | ||||
| -rw-r--r-- | block/blk-mq-sysfs.c | 1 | ||||
| -rw-r--r-- | block/blk-mq.c | 67 |
3 files changed, 58 insertions, 29 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 794c3e7f01cf..fd154b94447a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -557,6 +557,18 @@ void blk_cleanup_queue(struct request_queue *q) | |||
| 557 | } | 557 | } |
| 558 | EXPORT_SYMBOL(blk_cleanup_queue); | 558 | EXPORT_SYMBOL(blk_cleanup_queue); |
| 559 | 559 | ||
| 560 | /* Allocate memory local to the request queue */ | ||
| 561 | static void *alloc_request_struct(gfp_t gfp_mask, void *data) | ||
| 562 | { | ||
| 563 | int nid = (int)(long)data; | ||
| 564 | return kmem_cache_alloc_node(request_cachep, gfp_mask, nid); | ||
| 565 | } | ||
| 566 | |||
| 567 | static void free_request_struct(void *element, void *unused) | ||
| 568 | { | ||
| 569 | kmem_cache_free(request_cachep, element); | ||
| 570 | } | ||
| 571 | |||
| 560 | int blk_init_rl(struct request_list *rl, struct request_queue *q, | 572 | int blk_init_rl(struct request_list *rl, struct request_queue *q, |
| 561 | gfp_t gfp_mask) | 573 | gfp_t gfp_mask) |
| 562 | { | 574 | { |
| @@ -569,9 +581,10 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, | |||
| 569 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); | 581 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
| 570 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); | 582 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
| 571 | 583 | ||
| 572 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 584 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct, |
| 573 | mempool_free_slab, request_cachep, | 585 | free_request_struct, |
| 574 | gfp_mask, q->node); | 586 | (void *)(long)q->node, gfp_mask, |
| 587 | q->node); | ||
| 575 | if (!rl->rq_pool) | 588 | if (!rl->rq_pool) |
| 576 | return -ENOMEM; | 589 | return -ENOMEM; |
| 577 | 590 | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 1630a20d5dcf..b79685e06b70 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
| @@ -436,6 +436,7 @@ int blk_mq_register_disk(struct gendisk *disk) | |||
| 436 | 436 | ||
| 437 | return 0; | 437 | return 0; |
| 438 | } | 438 | } |
| 439 | EXPORT_SYMBOL_GPL(blk_mq_register_disk); | ||
| 439 | 440 | ||
| 440 | void blk_mq_sysfs_unregister(struct request_queue *q) | 441 | void blk_mq_sysfs_unregister(struct request_queue *q) |
| 441 | { | 442 | { |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 33c428530193..c82de08f3721 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
| @@ -33,7 +33,6 @@ static DEFINE_MUTEX(all_q_mutex); | |||
| 33 | static LIST_HEAD(all_q_list); | 33 | static LIST_HEAD(all_q_list); |
| 34 | 34 | ||
| 35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); | 35 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); |
| 36 | static void blk_mq_run_queues(struct request_queue *q); | ||
| 37 | 36 | ||
| 38 | /* | 37 | /* |
| 39 | * Check if any of the ctx's have pending work in this hardware queue | 38 | * Check if any of the ctx's have pending work in this hardware queue |
| @@ -78,7 +77,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, | |||
| 78 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); | 77 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); |
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | static int blk_mq_queue_enter(struct request_queue *q) | 80 | static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) |
| 82 | { | 81 | { |
| 83 | while (true) { | 82 | while (true) { |
| 84 | int ret; | 83 | int ret; |
| @@ -86,6 +85,9 @@ static int blk_mq_queue_enter(struct request_queue *q) | |||
| 86 | if (percpu_ref_tryget_live(&q->mq_usage_counter)) | 85 | if (percpu_ref_tryget_live(&q->mq_usage_counter)) |
| 87 | return 0; | 86 | return 0; |
| 88 | 87 | ||
| 88 | if (!(gfp & __GFP_WAIT)) | ||
| 89 | return -EBUSY; | ||
| 90 | |||
| 89 | ret = wait_event_interruptible(q->mq_freeze_wq, | 91 | ret = wait_event_interruptible(q->mq_freeze_wq, |
| 90 | !q->mq_freeze_depth || blk_queue_dying(q)); | 92 | !q->mq_freeze_depth || blk_queue_dying(q)); |
| 91 | if (blk_queue_dying(q)) | 93 | if (blk_queue_dying(q)) |
| @@ -118,7 +120,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q) | |||
| 118 | 120 | ||
| 119 | if (freeze) { | 121 | if (freeze) { |
| 120 | percpu_ref_kill(&q->mq_usage_counter); | 122 | percpu_ref_kill(&q->mq_usage_counter); |
| 121 | blk_mq_run_queues(q); | 123 | blk_mq_run_hw_queues(q, false); |
| 122 | } | 124 | } |
| 123 | } | 125 | } |
| 124 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); | 126 | EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); |
| @@ -257,7 +259,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, | |||
| 257 | struct blk_mq_alloc_data alloc_data; | 259 | struct blk_mq_alloc_data alloc_data; |
| 258 | int ret; | 260 | int ret; |
| 259 | 261 | ||
| 260 | ret = blk_mq_queue_enter(q); | 262 | ret = blk_mq_queue_enter(q, gfp); |
| 261 | if (ret) | 263 | if (ret) |
| 262 | return ERR_PTR(ret); | 264 | return ERR_PTR(ret); |
| 263 | 265 | ||
| @@ -904,7 +906,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | |||
| 904 | &hctx->run_work, 0); | 906 | &hctx->run_work, 0); |
| 905 | } | 907 | } |
| 906 | 908 | ||
| 907 | static void blk_mq_run_queues(struct request_queue *q) | 909 | void blk_mq_run_hw_queues(struct request_queue *q, bool async) |
| 908 | { | 910 | { |
| 909 | struct blk_mq_hw_ctx *hctx; | 911 | struct blk_mq_hw_ctx *hctx; |
| 910 | int i; | 912 | int i; |
| @@ -915,9 +917,10 @@ static void blk_mq_run_queues(struct request_queue *q) | |||
| 915 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | 917 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) |
| 916 | continue; | 918 | continue; |
| 917 | 919 | ||
| 918 | blk_mq_run_hw_queue(hctx, false); | 920 | blk_mq_run_hw_queue(hctx, async); |
| 919 | } | 921 | } |
| 920 | } | 922 | } |
| 923 | EXPORT_SYMBOL(blk_mq_run_hw_queues); | ||
| 921 | 924 | ||
| 922 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) | 925 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) |
| 923 | { | 926 | { |
| @@ -1186,7 +1189,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, | |||
| 1186 | int rw = bio_data_dir(bio); | 1189 | int rw = bio_data_dir(bio); |
| 1187 | struct blk_mq_alloc_data alloc_data; | 1190 | struct blk_mq_alloc_data alloc_data; |
| 1188 | 1191 | ||
| 1189 | if (unlikely(blk_mq_queue_enter(q))) { | 1192 | if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) { |
| 1190 | bio_endio(bio, -EIO); | 1193 | bio_endio(bio, -EIO); |
| 1191 | return NULL; | 1194 | return NULL; |
| 1192 | } | 1195 | } |
| @@ -1517,8 +1520,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) | |||
| 1517 | if (!bitmap->map) | 1520 | if (!bitmap->map) |
| 1518 | return -ENOMEM; | 1521 | return -ENOMEM; |
| 1519 | 1522 | ||
| 1520 | bitmap->map_size = num_maps; | ||
| 1521 | |||
| 1522 | total = nr_cpu_ids; | 1523 | total = nr_cpu_ids; |
| 1523 | for (i = 0; i < num_maps; i++) { | 1524 | for (i = 0; i < num_maps; i++) { |
| 1524 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); | 1525 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); |
| @@ -1759,8 +1760,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
| 1759 | continue; | 1760 | continue; |
| 1760 | 1761 | ||
| 1761 | hctx = q->mq_ops->map_queue(q, i); | 1762 | hctx = q->mq_ops->map_queue(q, i); |
| 1762 | cpumask_set_cpu(i, hctx->cpumask); | ||
| 1763 | hctx->nr_ctx++; | ||
| 1764 | 1763 | ||
| 1765 | /* | 1764 | /* |
| 1766 | * Set local node, IFF we have more than one hw queue. If | 1765 | * Set local node, IFF we have more than one hw queue. If |
| @@ -1797,6 +1796,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
| 1797 | } | 1796 | } |
| 1798 | 1797 | ||
| 1799 | queue_for_each_hw_ctx(q, hctx, i) { | 1798 | queue_for_each_hw_ctx(q, hctx, i) { |
| 1799 | struct blk_mq_ctxmap *map = &hctx->ctx_map; | ||
| 1800 | |||
| 1800 | /* | 1801 | /* |
| 1801 | * If no software queues are mapped to this hardware queue, | 1802 | * If no software queues are mapped to this hardware queue, |
| 1802 | * disable it and free the request entries. | 1803 | * disable it and free the request entries. |
| @@ -1813,6 +1814,13 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
| 1813 | } | 1814 | } |
| 1814 | 1815 | ||
| 1815 | /* | 1816 | /* |
| 1817 | * Set the map size to the number of mapped software queues. | ||
| 1818 | * This is more accurate and more efficient than looping | ||
| 1819 | * over all possibly mapped software queues. | ||
| 1820 | */ | ||
| 1821 | map->map_size = hctx->nr_ctx / map->bits_per_word; | ||
| 1822 | |||
| 1823 | /* | ||
| 1816 | * Initialize batch roundrobin counts | 1824 | * Initialize batch roundrobin counts |
| 1817 | */ | 1825 | */ |
| 1818 | hctx->next_cpu = cpumask_first(hctx->cpumask); | 1826 | hctx->next_cpu = cpumask_first(hctx->cpumask); |
| @@ -1889,9 +1897,25 @@ void blk_mq_release(struct request_queue *q) | |||
| 1889 | 1897 | ||
| 1890 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | 1898 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) |
| 1891 | { | 1899 | { |
| 1900 | struct request_queue *uninit_q, *q; | ||
| 1901 | |||
| 1902 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); | ||
| 1903 | if (!uninit_q) | ||
| 1904 | return ERR_PTR(-ENOMEM); | ||
| 1905 | |||
| 1906 | q = blk_mq_init_allocated_queue(set, uninit_q); | ||
| 1907 | if (IS_ERR(q)) | ||
| 1908 | blk_cleanup_queue(uninit_q); | ||
| 1909 | |||
| 1910 | return q; | ||
| 1911 | } | ||
| 1912 | EXPORT_SYMBOL(blk_mq_init_queue); | ||
| 1913 | |||
| 1914 | struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | ||
| 1915 | struct request_queue *q) | ||
| 1916 | { | ||
| 1892 | struct blk_mq_hw_ctx **hctxs; | 1917 | struct blk_mq_hw_ctx **hctxs; |
| 1893 | struct blk_mq_ctx __percpu *ctx; | 1918 | struct blk_mq_ctx __percpu *ctx; |
| 1894 | struct request_queue *q; | ||
| 1895 | unsigned int *map; | 1919 | unsigned int *map; |
| 1896 | int i; | 1920 | int i; |
| 1897 | 1921 | ||
| @@ -1926,20 +1950,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
| 1926 | hctxs[i]->queue_num = i; | 1950 | hctxs[i]->queue_num = i; |
| 1927 | } | 1951 | } |
| 1928 | 1952 | ||
| 1929 | q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); | ||
| 1930 | if (!q) | ||
| 1931 | goto err_hctxs; | ||
| 1932 | |||
| 1933 | /* | 1953 | /* |
| 1934 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | 1954 | * Init percpu_ref in atomic mode so that it's faster to shutdown. |
| 1935 | * See blk_register_queue() for details. | 1955 | * See blk_register_queue() for details. |
| 1936 | */ | 1956 | */ |
| 1937 | if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, | 1957 | if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, |
| 1938 | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) | 1958 | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) |
| 1939 | goto err_mq_usage; | 1959 | goto err_hctxs; |
| 1940 | 1960 | ||
| 1941 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); | 1961 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); |
| 1942 | blk_queue_rq_timeout(q, 30000); | 1962 | blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); |
| 1943 | 1963 | ||
| 1944 | q->nr_queues = nr_cpu_ids; | 1964 | q->nr_queues = nr_cpu_ids; |
| 1945 | q->nr_hw_queues = set->nr_hw_queues; | 1965 | q->nr_hw_queues = set->nr_hw_queues; |
| @@ -1965,9 +1985,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
| 1965 | else | 1985 | else |
| 1966 | blk_queue_make_request(q, blk_sq_make_request); | 1986 | blk_queue_make_request(q, blk_sq_make_request); |
| 1967 | 1987 | ||
| 1968 | if (set->timeout) | ||
| 1969 | blk_queue_rq_timeout(q, set->timeout); | ||
| 1970 | |||
| 1971 | /* | 1988 | /* |
| 1972 | * Do this after blk_queue_make_request() overrides it... | 1989 | * Do this after blk_queue_make_request() overrides it... |
| 1973 | */ | 1990 | */ |
| @@ -1979,7 +1996,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
| 1979 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); | 1996 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); |
| 1980 | 1997 | ||
| 1981 | if (blk_mq_init_hw_queues(q, set)) | 1998 | if (blk_mq_init_hw_queues(q, set)) |
| 1982 | goto err_mq_usage; | 1999 | goto err_hctxs; |
| 1983 | 2000 | ||
| 1984 | mutex_lock(&all_q_mutex); | 2001 | mutex_lock(&all_q_mutex); |
| 1985 | list_add_tail(&q->all_q_node, &all_q_list); | 2002 | list_add_tail(&q->all_q_node, &all_q_list); |
| @@ -1991,8 +2008,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
| 1991 | 2008 | ||
| 1992 | return q; | 2009 | return q; |
| 1993 | 2010 | ||
| 1994 | err_mq_usage: | ||
| 1995 | blk_cleanup_queue(q); | ||
| 1996 | err_hctxs: | 2011 | err_hctxs: |
| 1997 | kfree(map); | 2012 | kfree(map); |
| 1998 | for (i = 0; i < set->nr_hw_queues; i++) { | 2013 | for (i = 0; i < set->nr_hw_queues; i++) { |
| @@ -2007,7 +2022,7 @@ err_percpu: | |||
| 2007 | free_percpu(ctx); | 2022 | free_percpu(ctx); |
| 2008 | return ERR_PTR(-ENOMEM); | 2023 | return ERR_PTR(-ENOMEM); |
| 2009 | } | 2024 | } |
| 2010 | EXPORT_SYMBOL(blk_mq_init_queue); | 2025 | EXPORT_SYMBOL(blk_mq_init_allocated_queue); |
| 2011 | 2026 | ||
| 2012 | void blk_mq_free_queue(struct request_queue *q) | 2027 | void blk_mq_free_queue(struct request_queue *q) |
| 2013 | { | 2028 | { |
| @@ -2159,7 +2174,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | |||
| 2159 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) | 2174 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) |
| 2160 | return -EINVAL; | 2175 | return -EINVAL; |
| 2161 | 2176 | ||
| 2162 | if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) | 2177 | if (!set->ops->queue_rq || !set->ops->map_queue) |
| 2163 | return -EINVAL; | 2178 | return -EINVAL; |
| 2164 | 2179 | ||
| 2165 | if (set->queue_depth > BLK_MQ_MAX_DEPTH) { | 2180 | if (set->queue_depth > BLK_MQ_MAX_DEPTH) { |
