diff options
| author | Jens Axboe <axboe@fb.com> | 2014-09-10 11:02:03 -0400 |
|---|---|---|
| committer | Jens Axboe <axboe@fb.com> | 2014-09-10 11:02:03 -0400 |
| commit | a516440542afcb9647f88d12c35640baf02d07ea (patch) | |
| tree | d6e1cdc5f18f6c8025d4fb6415e682bb0b60dd29 | |
| parent | df35c7c912fe668797681842b3b74c61b0664050 (diff) | |
blk-mq: scale depth and rq map appropriate if low on memory
If we are running in a kdump environment, resources are scarce.
For some SCSI setups with a huge set of shared tags, we run out
of memory allocating what the drivers is asking for. So implement
a scale back logic to reduce the tag depth for those cases, allowing
the driver to successfully load.
We should extend this to detect low memory situations, and implement
a sane fallback for those (1 queue, 64 tags, or something like that).
Tested-by: Robert Elliott <elliott@hp.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
| -rw-r--r-- | block/blk-mq.c | 88 |
1 files changed, 69 insertions, 19 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index f9b85e83d9ba..383ea0cb1f0a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
| @@ -1321,6 +1321,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, | |||
| 1321 | continue; | 1321 | continue; |
| 1322 | set->ops->exit_request(set->driver_data, tags->rqs[i], | 1322 | set->ops->exit_request(set->driver_data, tags->rqs[i], |
| 1323 | hctx_idx, i); | 1323 | hctx_idx, i); |
| 1324 | tags->rqs[i] = NULL; | ||
| 1324 | } | 1325 | } |
| 1325 | } | 1326 | } |
| 1326 | 1327 | ||
| @@ -1354,8 +1355,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
| 1354 | 1355 | ||
| 1355 | INIT_LIST_HEAD(&tags->page_list); | 1356 | INIT_LIST_HEAD(&tags->page_list); |
| 1356 | 1357 | ||
| 1357 | tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), | 1358 | tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), |
| 1358 | GFP_KERNEL, set->numa_node); | 1359 | GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, |
| 1360 | set->numa_node); | ||
| 1359 | if (!tags->rqs) { | 1361 | if (!tags->rqs) { |
| 1360 | blk_mq_free_tags(tags); | 1362 | blk_mq_free_tags(tags); |
| 1361 | return NULL; | 1363 | return NULL; |
| @@ -1379,8 +1381,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
| 1379 | this_order--; | 1381 | this_order--; |
| 1380 | 1382 | ||
| 1381 | do { | 1383 | do { |
| 1382 | page = alloc_pages_node(set->numa_node, GFP_KERNEL, | 1384 | page = alloc_pages_node(set->numa_node, |
| 1383 | this_order); | 1385 | GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, |
| 1386 | this_order); | ||
| 1384 | if (page) | 1387 | if (page) |
| 1385 | break; | 1388 | break; |
| 1386 | if (!this_order--) | 1389 | if (!this_order--) |
| @@ -1404,8 +1407,10 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
| 1404 | if (set->ops->init_request) { | 1407 | if (set->ops->init_request) { |
| 1405 | if (set->ops->init_request(set->driver_data, | 1408 | if (set->ops->init_request(set->driver_data, |
| 1406 | tags->rqs[i], hctx_idx, i, | 1409 | tags->rqs[i], hctx_idx, i, |
| 1407 | set->numa_node)) | 1410 | set->numa_node)) { |
| 1411 | tags->rqs[i] = NULL; | ||
| 1408 | goto fail; | 1412 | goto fail; |
| 1413 | } | ||
| 1409 | } | 1414 | } |
| 1410 | 1415 | ||
| 1411 | p += rq_size; | 1416 | p += rq_size; |
| @@ -1416,7 +1421,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
| 1416 | return tags; | 1421 | return tags; |
| 1417 | 1422 | ||
| 1418 | fail: | 1423 | fail: |
| 1419 | pr_warn("%s: failed to allocate requests\n", __func__); | ||
| 1420 | blk_mq_free_rq_map(set, tags, hctx_idx); | 1424 | blk_mq_free_rq_map(set, tags, hctx_idx); |
| 1421 | return NULL; | 1425 | return NULL; |
| 1422 | } | 1426 | } |
| @@ -1936,6 +1940,61 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
| 1936 | return NOTIFY_OK; | 1940 | return NOTIFY_OK; |
| 1937 | } | 1941 | } |
| 1938 | 1942 | ||
| 1943 | static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | ||
| 1944 | { | ||
| 1945 | int i; | ||
| 1946 | |||
| 1947 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
| 1948 | set->tags[i] = blk_mq_init_rq_map(set, i); | ||
| 1949 | if (!set->tags[i]) | ||
| 1950 | goto out_unwind; | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | return 0; | ||
| 1954 | |||
| 1955 | out_unwind: | ||
| 1956 | while (--i >= 0) | ||
| 1957 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
| 1958 | |||
| 1959 | set->tags = NULL; | ||
| 1960 | return -ENOMEM; | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | /* | ||
| 1964 | * Allocate the request maps associated with this tag_set. Note that this | ||
| 1965 | * may reduce the depth asked for, if memory is tight. set->queue_depth | ||
| 1966 | * will be updated to reflect the allocated depth. | ||
| 1967 | */ | ||
| 1968 | static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | ||
| 1969 | { | ||
| 1970 | unsigned int depth; | ||
| 1971 | int err; | ||
| 1972 | |||
| 1973 | depth = set->queue_depth; | ||
| 1974 | do { | ||
| 1975 | err = __blk_mq_alloc_rq_maps(set); | ||
| 1976 | if (!err) | ||
| 1977 | break; | ||
| 1978 | |||
| 1979 | set->queue_depth >>= 1; | ||
| 1980 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { | ||
| 1981 | err = -ENOMEM; | ||
| 1982 | break; | ||
| 1983 | } | ||
| 1984 | } while (set->queue_depth); | ||
| 1985 | |||
| 1986 | if (!set->queue_depth || err) { | ||
| 1987 | pr_err("blk-mq: failed to allocate request map\n"); | ||
| 1988 | return -ENOMEM; | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | if (depth != set->queue_depth) | ||
| 1992 | pr_info("blk-mq: reduced tag depth (%u -> %u)\n", | ||
| 1993 | depth, set->queue_depth); | ||
| 1994 | |||
| 1995 | return 0; | ||
| 1996 | } | ||
| 1997 | |||
| 1939 | /* | 1998 | /* |
| 1940 | * Alloc a tag set to be associated with one or more request queues. | 1999 | * Alloc a tag set to be associated with one or more request queues. |
| 1941 | * May fail with EINVAL for various error conditions. May adjust the | 2000 | * May fail with EINVAL for various error conditions. May adjust the |
| @@ -1944,8 +2003,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
| 1944 | */ | 2003 | */ |
| 1945 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | 2004 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) |
| 1946 | { | 2005 | { |
| 1947 | int i; | ||
| 1948 | |||
| 1949 | if (!set->nr_hw_queues) | 2006 | if (!set->nr_hw_queues) |
| 1950 | return -EINVAL; | 2007 | return -EINVAL; |
| 1951 | if (!set->queue_depth) | 2008 | if (!set->queue_depth) |
| @@ -1966,25 +2023,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | |||
| 1966 | sizeof(struct blk_mq_tags *), | 2023 | sizeof(struct blk_mq_tags *), |
| 1967 | GFP_KERNEL, set->numa_node); | 2024 | GFP_KERNEL, set->numa_node); |
| 1968 | if (!set->tags) | 2025 | if (!set->tags) |
| 1969 | goto out; | 2026 | return -ENOMEM; |
| 1970 | 2027 | ||
| 1971 | for (i = 0; i < set->nr_hw_queues; i++) { | 2028 | if (blk_mq_alloc_rq_maps(set)) |
| 1972 | set->tags[i] = blk_mq_init_rq_map(set, i); | 2029 | goto enomem; |
| 1973 | if (!set->tags[i]) | ||
| 1974 | goto out_unwind; | ||
| 1975 | } | ||
| 1976 | 2030 | ||
| 1977 | mutex_init(&set->tag_list_lock); | 2031 | mutex_init(&set->tag_list_lock); |
| 1978 | INIT_LIST_HEAD(&set->tag_list); | 2032 | INIT_LIST_HEAD(&set->tag_list); |
| 1979 | 2033 | ||
| 1980 | return 0; | 2034 | return 0; |
| 1981 | 2035 | enomem: | |
| 1982 | out_unwind: | ||
| 1983 | while (--i >= 0) | ||
| 1984 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
| 1985 | kfree(set->tags); | 2036 | kfree(set->tags); |
| 1986 | set->tags = NULL; | 2037 | set->tags = NULL; |
| 1987 | out: | ||
| 1988 | return -ENOMEM; | 2038 | return -ENOMEM; |
| 1989 | } | 2039 | } |
| 1990 | EXPORT_SYMBOL(blk_mq_alloc_tag_set); | 2040 | EXPORT_SYMBOL(blk_mq_alloc_tag_set); |
