aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-09-10 11:02:03 -0400
committerJens Axboe <axboe@fb.com>2014-09-10 11:02:03 -0400
commita516440542afcb9647f88d12c35640baf02d07ea (patch)
treed6e1cdc5f18f6c8025d4fb6415e682bb0b60dd29
parentdf35c7c912fe668797681842b3b74c61b0664050 (diff)
blk-mq: scale depth and rq map appropriate if low on memory
If we are running in a kdump environment, resources are scarce. For some SCSI setups with a huge set of shared tags, we run out of memory allocating what the drivers is asking for. So implement a scale back logic to reduce the tag depth for those cases, allowing the driver to successfully load. We should extend this to detect low memory situations, and implement a sane fallback for those (1 queue, 64 tags, or something like that). Tested-by: Robert Elliott <elliott@hp.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/blk-mq.c88
1 files changed, 69 insertions, 19 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f9b85e83d9ba..383ea0cb1f0a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1321,6 +1321,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1321 continue; 1321 continue;
1322 set->ops->exit_request(set->driver_data, tags->rqs[i], 1322 set->ops->exit_request(set->driver_data, tags->rqs[i],
1323 hctx_idx, i); 1323 hctx_idx, i);
1324 tags->rqs[i] = NULL;
1324 } 1325 }
1325 } 1326 }
1326 1327
@@ -1354,8 +1355,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1354 1355
1355 INIT_LIST_HEAD(&tags->page_list); 1356 INIT_LIST_HEAD(&tags->page_list);
1356 1357
1357 tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), 1358 tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
1358 GFP_KERNEL, set->numa_node); 1359 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
1360 set->numa_node);
1359 if (!tags->rqs) { 1361 if (!tags->rqs) {
1360 blk_mq_free_tags(tags); 1362 blk_mq_free_tags(tags);
1361 return NULL; 1363 return NULL;
@@ -1379,8 +1381,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1379 this_order--; 1381 this_order--;
1380 1382
1381 do { 1383 do {
1382 page = alloc_pages_node(set->numa_node, GFP_KERNEL, 1384 page = alloc_pages_node(set->numa_node,
1383 this_order); 1385 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
1386 this_order);
1384 if (page) 1387 if (page)
1385 break; 1388 break;
1386 if (!this_order--) 1389 if (!this_order--)
@@ -1404,8 +1407,10 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1404 if (set->ops->init_request) { 1407 if (set->ops->init_request) {
1405 if (set->ops->init_request(set->driver_data, 1408 if (set->ops->init_request(set->driver_data,
1406 tags->rqs[i], hctx_idx, i, 1409 tags->rqs[i], hctx_idx, i,
1407 set->numa_node)) 1410 set->numa_node)) {
1411 tags->rqs[i] = NULL;
1408 goto fail; 1412 goto fail;
1413 }
1409 } 1414 }
1410 1415
1411 p += rq_size; 1416 p += rq_size;
@@ -1416,7 +1421,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1416 return tags; 1421 return tags;
1417 1422
1418fail: 1423fail:
1419 pr_warn("%s: failed to allocate requests\n", __func__);
1420 blk_mq_free_rq_map(set, tags, hctx_idx); 1424 blk_mq_free_rq_map(set, tags, hctx_idx);
1421 return NULL; 1425 return NULL;
1422} 1426}
@@ -1936,6 +1940,61 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1936 return NOTIFY_OK; 1940 return NOTIFY_OK;
1937} 1941}
1938 1942
1943static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
1944{
1945 int i;
1946
1947 for (i = 0; i < set->nr_hw_queues; i++) {
1948 set->tags[i] = blk_mq_init_rq_map(set, i);
1949 if (!set->tags[i])
1950 goto out_unwind;
1951 }
1952
1953 return 0;
1954
1955out_unwind:
1956 while (--i >= 0)
1957 blk_mq_free_rq_map(set, set->tags[i], i);
1958
1959 set->tags = NULL;
1960 return -ENOMEM;
1961}
1962
1963/*
1964 * Allocate the request maps associated with this tag_set. Note that this
1965 * may reduce the depth asked for, if memory is tight. set->queue_depth
1966 * will be updated to reflect the allocated depth.
1967 */
1968static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
1969{
1970 unsigned int depth;
1971 int err;
1972
1973 depth = set->queue_depth;
1974 do {
1975 err = __blk_mq_alloc_rq_maps(set);
1976 if (!err)
1977 break;
1978
1979 set->queue_depth >>= 1;
1980 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
1981 err = -ENOMEM;
1982 break;
1983 }
1984 } while (set->queue_depth);
1985
1986 if (!set->queue_depth || err) {
1987 pr_err("blk-mq: failed to allocate request map\n");
1988 return -ENOMEM;
1989 }
1990
1991 if (depth != set->queue_depth)
1992 pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
1993 depth, set->queue_depth);
1994
1995 return 0;
1996}
1997
1939/* 1998/*
1940 * Alloc a tag set to be associated with one or more request queues. 1999 * Alloc a tag set to be associated with one or more request queues.
1941 * May fail with EINVAL for various error conditions. May adjust the 2000 * May fail with EINVAL for various error conditions. May adjust the
@@ -1944,8 +2003,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1944 */ 2003 */
1945int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) 2004int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1946{ 2005{
1947 int i;
1948
1949 if (!set->nr_hw_queues) 2006 if (!set->nr_hw_queues)
1950 return -EINVAL; 2007 return -EINVAL;
1951 if (!set->queue_depth) 2008 if (!set->queue_depth)
@@ -1966,25 +2023,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1966 sizeof(struct blk_mq_tags *), 2023 sizeof(struct blk_mq_tags *),
1967 GFP_KERNEL, set->numa_node); 2024 GFP_KERNEL, set->numa_node);
1968 if (!set->tags) 2025 if (!set->tags)
1969 goto out; 2026 return -ENOMEM;
1970 2027
1971 for (i = 0; i < set->nr_hw_queues; i++) { 2028 if (blk_mq_alloc_rq_maps(set))
1972 set->tags[i] = blk_mq_init_rq_map(set, i); 2029 goto enomem;
1973 if (!set->tags[i])
1974 goto out_unwind;
1975 }
1976 2030
1977 mutex_init(&set->tag_list_lock); 2031 mutex_init(&set->tag_list_lock);
1978 INIT_LIST_HEAD(&set->tag_list); 2032 INIT_LIST_HEAD(&set->tag_list);
1979 2033
1980 return 0; 2034 return 0;
1981 2035enomem:
1982out_unwind:
1983 while (--i >= 0)
1984 blk_mq_free_rq_map(set, set->tags[i], i);
1985 kfree(set->tags); 2036 kfree(set->tags);
1986 set->tags = NULL; 2037 set->tags = NULL;
1987out:
1988 return -ENOMEM; 2038 return -ENOMEM;
1989} 2039}
1990EXPORT_SYMBOL(blk_mq_alloc_tag_set); 2040EXPORT_SYMBOL(blk_mq_alloc_tag_set);