diff options
author | Jens Axboe <axboe@fb.com> | 2014-09-10 11:02:03 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-09-10 11:02:03 -0400 |
commit | a516440542afcb9647f88d12c35640baf02d07ea (patch) | |
tree | d6e1cdc5f18f6c8025d4fb6415e682bb0b60dd29 | |
parent | df35c7c912fe668797681842b3b74c61b0664050 (diff) |
blk-mq: scale depth and rq map appropriate if low on memory
If we are running in a kdump environment, resources are scarce.
For some SCSI setups with a huge set of shared tags, we run out
of memory allocating what the drivers is asking for. So implement
a scale back logic to reduce the tag depth for those cases, allowing
the driver to successfully load.
We should extend this to detect low memory situations, and implement
a sane fallback for those (1 queue, 64 tags, or something like that).
Tested-by: Robert Elliott <elliott@hp.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | block/blk-mq.c | 88 |
1 files changed, 69 insertions, 19 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index f9b85e83d9ba..383ea0cb1f0a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1321,6 +1321,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, | |||
1321 | continue; | 1321 | continue; |
1322 | set->ops->exit_request(set->driver_data, tags->rqs[i], | 1322 | set->ops->exit_request(set->driver_data, tags->rqs[i], |
1323 | hctx_idx, i); | 1323 | hctx_idx, i); |
1324 | tags->rqs[i] = NULL; | ||
1324 | } | 1325 | } |
1325 | } | 1326 | } |
1326 | 1327 | ||
@@ -1354,8 +1355,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1354 | 1355 | ||
1355 | INIT_LIST_HEAD(&tags->page_list); | 1356 | INIT_LIST_HEAD(&tags->page_list); |
1356 | 1357 | ||
1357 | tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), | 1358 | tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), |
1358 | GFP_KERNEL, set->numa_node); | 1359 | GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, |
1360 | set->numa_node); | ||
1359 | if (!tags->rqs) { | 1361 | if (!tags->rqs) { |
1360 | blk_mq_free_tags(tags); | 1362 | blk_mq_free_tags(tags); |
1361 | return NULL; | 1363 | return NULL; |
@@ -1379,8 +1381,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1379 | this_order--; | 1381 | this_order--; |
1380 | 1382 | ||
1381 | do { | 1383 | do { |
1382 | page = alloc_pages_node(set->numa_node, GFP_KERNEL, | 1384 | page = alloc_pages_node(set->numa_node, |
1383 | this_order); | 1385 | GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, |
1386 | this_order); | ||
1384 | if (page) | 1387 | if (page) |
1385 | break; | 1388 | break; |
1386 | if (!this_order--) | 1389 | if (!this_order--) |
@@ -1404,8 +1407,10 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1404 | if (set->ops->init_request) { | 1407 | if (set->ops->init_request) { |
1405 | if (set->ops->init_request(set->driver_data, | 1408 | if (set->ops->init_request(set->driver_data, |
1406 | tags->rqs[i], hctx_idx, i, | 1409 | tags->rqs[i], hctx_idx, i, |
1407 | set->numa_node)) | 1410 | set->numa_node)) { |
1411 | tags->rqs[i] = NULL; | ||
1408 | goto fail; | 1412 | goto fail; |
1413 | } | ||
1409 | } | 1414 | } |
1410 | 1415 | ||
1411 | p += rq_size; | 1416 | p += rq_size; |
@@ -1416,7 +1421,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1416 | return tags; | 1421 | return tags; |
1417 | 1422 | ||
1418 | fail: | 1423 | fail: |
1419 | pr_warn("%s: failed to allocate requests\n", __func__); | ||
1420 | blk_mq_free_rq_map(set, tags, hctx_idx); | 1424 | blk_mq_free_rq_map(set, tags, hctx_idx); |
1421 | return NULL; | 1425 | return NULL; |
1422 | } | 1426 | } |
@@ -1936,6 +1940,61 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
1936 | return NOTIFY_OK; | 1940 | return NOTIFY_OK; |
1937 | } | 1941 | } |
1938 | 1942 | ||
1943 | static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | ||
1944 | { | ||
1945 | int i; | ||
1946 | |||
1947 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
1948 | set->tags[i] = blk_mq_init_rq_map(set, i); | ||
1949 | if (!set->tags[i]) | ||
1950 | goto out_unwind; | ||
1951 | } | ||
1952 | |||
1953 | return 0; | ||
1954 | |||
1955 | out_unwind: | ||
1956 | while (--i >= 0) | ||
1957 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
1958 | |||
1959 | set->tags = NULL; | ||
1960 | return -ENOMEM; | ||
1961 | } | ||
1962 | |||
1963 | /* | ||
1964 | * Allocate the request maps associated with this tag_set. Note that this | ||
1965 | * may reduce the depth asked for, if memory is tight. set->queue_depth | ||
1966 | * will be updated to reflect the allocated depth. | ||
1967 | */ | ||
1968 | static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | ||
1969 | { | ||
1970 | unsigned int depth; | ||
1971 | int err; | ||
1972 | |||
1973 | depth = set->queue_depth; | ||
1974 | do { | ||
1975 | err = __blk_mq_alloc_rq_maps(set); | ||
1976 | if (!err) | ||
1977 | break; | ||
1978 | |||
1979 | set->queue_depth >>= 1; | ||
1980 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { | ||
1981 | err = -ENOMEM; | ||
1982 | break; | ||
1983 | } | ||
1984 | } while (set->queue_depth); | ||
1985 | |||
1986 | if (!set->queue_depth || err) { | ||
1987 | pr_err("blk-mq: failed to allocate request map\n"); | ||
1988 | return -ENOMEM; | ||
1989 | } | ||
1990 | |||
1991 | if (depth != set->queue_depth) | ||
1992 | pr_info("blk-mq: reduced tag depth (%u -> %u)\n", | ||
1993 | depth, set->queue_depth); | ||
1994 | |||
1995 | return 0; | ||
1996 | } | ||
1997 | |||
1939 | /* | 1998 | /* |
1940 | * Alloc a tag set to be associated with one or more request queues. | 1999 | * Alloc a tag set to be associated with one or more request queues. |
1941 | * May fail with EINVAL for various error conditions. May adjust the | 2000 | * May fail with EINVAL for various error conditions. May adjust the |
@@ -1944,8 +2003,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
1944 | */ | 2003 | */ |
1945 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | 2004 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) |
1946 | { | 2005 | { |
1947 | int i; | ||
1948 | |||
1949 | if (!set->nr_hw_queues) | 2006 | if (!set->nr_hw_queues) |
1950 | return -EINVAL; | 2007 | return -EINVAL; |
1951 | if (!set->queue_depth) | 2008 | if (!set->queue_depth) |
@@ -1966,25 +2023,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | |||
1966 | sizeof(struct blk_mq_tags *), | 2023 | sizeof(struct blk_mq_tags *), |
1967 | GFP_KERNEL, set->numa_node); | 2024 | GFP_KERNEL, set->numa_node); |
1968 | if (!set->tags) | 2025 | if (!set->tags) |
1969 | goto out; | 2026 | return -ENOMEM; |
1970 | 2027 | ||
1971 | for (i = 0; i < set->nr_hw_queues; i++) { | 2028 | if (blk_mq_alloc_rq_maps(set)) |
1972 | set->tags[i] = blk_mq_init_rq_map(set, i); | 2029 | goto enomem; |
1973 | if (!set->tags[i]) | ||
1974 | goto out_unwind; | ||
1975 | } | ||
1976 | 2030 | ||
1977 | mutex_init(&set->tag_list_lock); | 2031 | mutex_init(&set->tag_list_lock); |
1978 | INIT_LIST_HEAD(&set->tag_list); | 2032 | INIT_LIST_HEAD(&set->tag_list); |
1979 | 2033 | ||
1980 | return 0; | 2034 | return 0; |
1981 | 2035 | enomem: | |
1982 | out_unwind: | ||
1983 | while (--i >= 0) | ||
1984 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
1985 | kfree(set->tags); | 2036 | kfree(set->tags); |
1986 | set->tags = NULL; | 2037 | set->tags = NULL; |
1987 | out: | ||
1988 | return -ENOMEM; | 2038 | return -ENOMEM; |
1989 | } | 2039 | } |
1990 | EXPORT_SYMBOL(blk_mq_alloc_tag_set); | 2040 | EXPORT_SYMBOL(blk_mq_alloc_tag_set); |