aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2014-04-15 16:14:00 -0400
committerJens Axboe <axboe@fb.com>2014-04-15 16:18:02 -0400
commit24d2f90309b23f2cfe016b2aebc5f0d6e01c57fd (patch)
tree10307829129eb8f96facbb89fefbba3c0032fb46 /block
parented44832dea8a74f909e0187f350201402927f5e5 (diff)
blk-mq: split out tag initialization, support shared tags
Add a new blk_mq_tag_set structure that gets set up before we initialize the queue. A single blk_mq_tag_set structure can be shared by multiple queues. Signed-off-by: Christoph Hellwig <hch@lst.de> Modular export of blk_mq_{alloc,free}_tagset added by me. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq-cpumap.c6
-rw-r--r--block/blk-mq-tag.c14
-rw-r--r--block/blk-mq-tag.h19
-rw-r--r--block/blk-mq.c244
-rw-r--r--block/blk-mq.h5
5 files changed, 160 insertions, 128 deletions
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 097921329619..5d0f93cf358c 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -80,17 +80,17 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
80 return 0; 80 return 0;
81} 81}
82 82
83unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg) 83unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
84{ 84{
85 unsigned int *map; 85 unsigned int *map;
86 86
87 /* If cpus are offline, map them to first hctx */ 87 /* If cpus are offline, map them to first hctx */
88 map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, 88 map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL,
89 reg->numa_node); 89 set->numa_node);
90 if (!map) 90 if (!map)
91 return NULL; 91 return NULL;
92 92
93 if (!blk_mq_update_queue_map(map, reg->nr_hw_queues)) 93 if (!blk_mq_update_queue_map(map, set->nr_hw_queues))
94 return map; 94 return map;
95 95
96 kfree(map); 96 kfree(map);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 83ae96c51a27..7a799c46c32d 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -1,25 +1,11 @@
1#include <linux/kernel.h> 1#include <linux/kernel.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/percpu_ida.h>
4 3
5#include <linux/blk-mq.h> 4#include <linux/blk-mq.h>
6#include "blk.h" 5#include "blk.h"
7#include "blk-mq.h" 6#include "blk-mq.h"
8#include "blk-mq-tag.h" 7#include "blk-mq-tag.h"
9 8
10/*
11 * Per tagged queue (tag address space) map
12 */
13struct blk_mq_tags {
14 unsigned int nr_tags;
15 unsigned int nr_reserved_tags;
16 unsigned int nr_batch_move;
17 unsigned int nr_max_cache;
18
19 struct percpu_ida free_tags;
20 struct percpu_ida reserved_tags;
21};
22
23void blk_mq_wait_for_tags(struct blk_mq_tags *tags) 9void blk_mq_wait_for_tags(struct blk_mq_tags *tags)
24{ 10{
25 int tag = blk_mq_get_tag(tags, __GFP_WAIT, false); 11 int tag = blk_mq_get_tag(tags, __GFP_WAIT, false);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 947ba2c6148e..b602e3fa66ea 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -1,7 +1,24 @@
1#ifndef INT_BLK_MQ_TAG_H 1#ifndef INT_BLK_MQ_TAG_H
2#define INT_BLK_MQ_TAG_H 2#define INT_BLK_MQ_TAG_H
3 3
4struct blk_mq_tags; 4#include <linux/percpu_ida.h>
5
6/*
7 * Tag address space map.
8 */
9struct blk_mq_tags {
10 unsigned int nr_tags;
11 unsigned int nr_reserved_tags;
12 unsigned int nr_batch_move;
13 unsigned int nr_max_cache;
14
15 struct percpu_ida free_tags;
16 struct percpu_ida reserved_tags;
17
18 struct request **rqs;
19 struct list_head page_list;
20};
21
5 22
6extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); 23extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
7extern void blk_mq_free_tags(struct blk_mq_tags *tags); 24extern void blk_mq_free_tags(struct blk_mq_tags *tags);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2a5a0fed10a3..9180052d42cc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -81,7 +81,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
81 81
82 tag = blk_mq_get_tag(hctx->tags, gfp, reserved); 82 tag = blk_mq_get_tag(hctx->tags, gfp, reserved);
83 if (tag != BLK_MQ_TAG_FAIL) { 83 if (tag != BLK_MQ_TAG_FAIL) {
84 rq = hctx->rqs[tag]; 84 rq = hctx->tags->rqs[tag];
85 blk_rq_init(hctx->queue, rq); 85 blk_rq_init(hctx->queue, rq);
86 rq->tag = tag; 86 rq->tag = tag;
87 87
@@ -404,6 +404,12 @@ static void blk_mq_requeue_request(struct request *rq)
404 rq->nr_phys_segments--; 404 rq->nr_phys_segments--;
405} 405}
406 406
407struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
408{
409 return tags->rqs[tag];
410}
411EXPORT_SYMBOL(blk_mq_tag_to_rq);
412
407struct blk_mq_timeout_data { 413struct blk_mq_timeout_data {
408 struct blk_mq_hw_ctx *hctx; 414 struct blk_mq_hw_ctx *hctx;
409 unsigned long *next; 415 unsigned long *next;
@@ -425,12 +431,13 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
425 do { 431 do {
426 struct request *rq; 432 struct request *rq;
427 433
428 tag = find_next_zero_bit(free_tags, hctx->queue_depth, tag); 434 tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag);
429 if (tag >= hctx->queue_depth) 435 if (tag >= hctx->tags->nr_tags)
430 break; 436 break;
431 437
432 rq = hctx->rqs[tag++]; 438 rq = blk_mq_tag_to_rq(hctx->tags, tag++);
433 439 if (rq->q != hctx->queue)
440 continue;
434 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) 441 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
435 continue; 442 continue;
436 443
@@ -969,11 +976,11 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
969} 976}
970EXPORT_SYMBOL(blk_mq_map_queue); 977EXPORT_SYMBOL(blk_mq_map_queue);
971 978
972struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *reg, 979struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set,
973 unsigned int hctx_index) 980 unsigned int hctx_index)
974{ 981{
975 return kmalloc_node(sizeof(struct blk_mq_hw_ctx), 982 return kmalloc_node(sizeof(struct blk_mq_hw_ctx),
976 GFP_KERNEL | __GFP_ZERO, reg->numa_node); 983 GFP_KERNEL | __GFP_ZERO, set->numa_node);
977} 984}
978EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue); 985EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue);
979 986
@@ -1030,31 +1037,31 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
1030 blk_mq_put_ctx(ctx); 1037 blk_mq_put_ctx(ctx);
1031} 1038}
1032 1039
1033static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx, void *driver_data) 1040static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1041 struct blk_mq_tags *tags, unsigned int hctx_idx)
1034{ 1042{
1035 struct page *page; 1043 struct page *page;
1036 1044
1037 if (hctx->rqs && hctx->queue->mq_ops->exit_request) { 1045 if (tags->rqs && set->ops->exit_request) {
1038 int i; 1046 int i;
1039 1047
1040 for (i = 0; i < hctx->queue_depth; i++) { 1048 for (i = 0; i < tags->nr_tags; i++) {
1041 if (!hctx->rqs[i]) 1049 if (!tags->rqs[i])
1042 continue; 1050 continue;
1043 hctx->queue->mq_ops->exit_request(driver_data, hctx, 1051 set->ops->exit_request(set->driver_data, tags->rqs[i],
1044 hctx->rqs[i], i); 1052 hctx_idx, i);
1045 } 1053 }
1046 } 1054 }
1047 1055
1048 while (!list_empty(&hctx->page_list)) { 1056 while (!list_empty(&tags->page_list)) {
1049 page = list_first_entry(&hctx->page_list, struct page, lru); 1057 page = list_first_entry(&tags->page_list, struct page, lru);
1050 list_del_init(&page->lru); 1058 list_del_init(&page->lru);
1051 __free_pages(page, page->private); 1059 __free_pages(page, page->private);
1052 } 1060 }
1053 1061
1054 kfree(hctx->rqs); 1062 kfree(tags->rqs);
1055 1063
1056 if (hctx->tags) 1064 blk_mq_free_tags(tags);
1057 blk_mq_free_tags(hctx->tags);
1058} 1065}
1059 1066
1060static size_t order_to_size(unsigned int order) 1067static size_t order_to_size(unsigned int order)
@@ -1067,30 +1074,36 @@ static size_t order_to_size(unsigned int order)
1067 return ret; 1074 return ret;
1068} 1075}
1069 1076
1070static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, 1077static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1071 struct blk_mq_reg *reg, void *driver_data, int node) 1078 unsigned int hctx_idx)
1072{ 1079{
1073 unsigned int reserved_tags = reg->reserved_tags; 1080 struct blk_mq_tags *tags;
1074 unsigned int i, j, entries_per_page, max_order = 4; 1081 unsigned int i, j, entries_per_page, max_order = 4;
1075 size_t rq_size, left; 1082 size_t rq_size, left;
1076 int error;
1077 1083
1078 INIT_LIST_HEAD(&hctx->page_list); 1084 tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
1085 set->numa_node);
1086 if (!tags)
1087 return NULL;
1079 1088
1080 hctx->rqs = kmalloc_node(hctx->queue_depth * sizeof(struct request *), 1089 INIT_LIST_HEAD(&tags->page_list);
1081 GFP_KERNEL, node); 1090
1082 if (!hctx->rqs) 1091 tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *),
1083 return -ENOMEM; 1092 GFP_KERNEL, set->numa_node);
1093 if (!tags->rqs) {
1094 blk_mq_free_tags(tags);
1095 return NULL;
1096 }
1084 1097
1085 /* 1098 /*
1086 * rq_size is the size of the request plus driver payload, rounded 1099 * rq_size is the size of the request plus driver payload, rounded
1087 * to the cacheline size 1100 * to the cacheline size
1088 */ 1101 */
1089 rq_size = round_up(sizeof(struct request) + hctx->cmd_size, 1102 rq_size = round_up(sizeof(struct request) + set->cmd_size,
1090 cache_line_size()); 1103 cache_line_size());
1091 left = rq_size * hctx->queue_depth; 1104 left = rq_size * set->queue_depth;
1092 1105
1093 for (i = 0; i < hctx->queue_depth;) { 1106 for (i = 0; i < set->queue_depth; ) {
1094 int this_order = max_order; 1107 int this_order = max_order;
1095 struct page *page; 1108 struct page *page;
1096 int to_do; 1109 int to_do;
@@ -1100,7 +1113,8 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
1100 this_order--; 1113 this_order--;
1101 1114
1102 do { 1115 do {
1103 page = alloc_pages_node(node, GFP_KERNEL, this_order); 1116 page = alloc_pages_node(set->numa_node, GFP_KERNEL,
1117 this_order);
1104 if (page) 1118 if (page)
1105 break; 1119 break;
1106 if (!this_order--) 1120 if (!this_order--)
@@ -1110,22 +1124,22 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
1110 } while (1); 1124 } while (1);
1111 1125
1112 if (!page) 1126 if (!page)
1113 break; 1127 goto fail;
1114 1128
1115 page->private = this_order; 1129 page->private = this_order;
1116 list_add_tail(&page->lru, &hctx->page_list); 1130 list_add_tail(&page->lru, &tags->page_list);
1117 1131
1118 p = page_address(page); 1132 p = page_address(page);
1119 entries_per_page = order_to_size(this_order) / rq_size; 1133 entries_per_page = order_to_size(this_order) / rq_size;
1120 to_do = min(entries_per_page, hctx->queue_depth - i); 1134 to_do = min(entries_per_page, set->queue_depth - i);
1121 left -= to_do * rq_size; 1135 left -= to_do * rq_size;
1122 for (j = 0; j < to_do; j++) { 1136 for (j = 0; j < to_do; j++) {
1123 hctx->rqs[i] = p; 1137 tags->rqs[i] = p;
1124 if (reg->ops->init_request) { 1138 if (set->ops->init_request) {
1125 error = reg->ops->init_request(driver_data, 1139 if (set->ops->init_request(set->driver_data,
1126 hctx, hctx->rqs[i], i); 1140 tags->rqs[i], hctx_idx, i,
1127 if (error) 1141 set->numa_node))
1128 goto err_rq_map; 1142 goto fail;
1129 } 1143 }
1130 1144
1131 p += rq_size; 1145 p += rq_size;
@@ -1133,30 +1147,16 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
1133 } 1147 }
1134 } 1148 }
1135 1149
1136 if (i < (reserved_tags + BLK_MQ_TAG_MIN)) { 1150 return tags;
1137 error = -ENOMEM;
1138 goto err_rq_map;
1139 }
1140 if (i != hctx->queue_depth) {
1141 hctx->queue_depth = i;
1142 pr_warn("%s: queue depth set to %u because of low memory\n",
1143 __func__, i);
1144 }
1145 1151
1146 hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node); 1152fail:
1147 if (!hctx->tags) { 1153 pr_warn("%s: failed to allocate requests\n", __func__);
1148 error = -ENOMEM; 1154 blk_mq_free_rq_map(set, tags, hctx_idx);
1149 goto err_rq_map; 1155 return NULL;
1150 }
1151
1152 return 0;
1153err_rq_map:
1154 blk_mq_free_rq_map(hctx, driver_data);
1155 return error;
1156} 1156}
1157 1157
1158static int blk_mq_init_hw_queues(struct request_queue *q, 1158static int blk_mq_init_hw_queues(struct request_queue *q,
1159 struct blk_mq_reg *reg, void *driver_data) 1159 struct blk_mq_tag_set *set)
1160{ 1160{
1161 struct blk_mq_hw_ctx *hctx; 1161 struct blk_mq_hw_ctx *hctx;
1162 unsigned int i, j; 1162 unsigned int i, j;
@@ -1170,23 +1170,21 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
1170 1170
1171 node = hctx->numa_node; 1171 node = hctx->numa_node;
1172 if (node == NUMA_NO_NODE) 1172 if (node == NUMA_NO_NODE)
1173 node = hctx->numa_node = reg->numa_node; 1173 node = hctx->numa_node = set->numa_node;
1174 1174
1175 INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn); 1175 INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn);
1176 spin_lock_init(&hctx->lock); 1176 spin_lock_init(&hctx->lock);
1177 INIT_LIST_HEAD(&hctx->dispatch); 1177 INIT_LIST_HEAD(&hctx->dispatch);
1178 hctx->queue = q; 1178 hctx->queue = q;
1179 hctx->queue_num = i; 1179 hctx->queue_num = i;
1180 hctx->flags = reg->flags; 1180 hctx->flags = set->flags;
1181 hctx->queue_depth = reg->queue_depth; 1181 hctx->cmd_size = set->cmd_size;
1182 hctx->cmd_size = reg->cmd_size;
1183 1182
1184 blk_mq_init_cpu_notifier(&hctx->cpu_notifier, 1183 blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
1185 blk_mq_hctx_notify, hctx); 1184 blk_mq_hctx_notify, hctx);
1186 blk_mq_register_cpu_notifier(&hctx->cpu_notifier); 1185 blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
1187 1186
1188 if (blk_mq_init_rq_map(hctx, reg, driver_data, node)) 1187 hctx->tags = set->tags[i];
1189 break;
1190 1188
1191 /* 1189 /*
1192 * Allocate space for all possible cpus to avoid allocation in 1190 * Allocate space for all possible cpus to avoid allocation in
@@ -1206,8 +1204,8 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
1206 hctx->nr_ctx_map = num_maps; 1204 hctx->nr_ctx_map = num_maps;
1207 hctx->nr_ctx = 0; 1205 hctx->nr_ctx = 0;
1208 1206
1209 if (reg->ops->init_hctx && 1207 if (set->ops->init_hctx &&
1210 reg->ops->init_hctx(hctx, driver_data, i)) 1208 set->ops->init_hctx(hctx, set->driver_data, i))
1211 break; 1209 break;
1212 } 1210 }
1213 1211
@@ -1221,11 +1219,10 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
1221 if (i == j) 1219 if (i == j)
1222 break; 1220 break;
1223 1221
1224 if (reg->ops->exit_hctx) 1222 if (set->ops->exit_hctx)
1225 reg->ops->exit_hctx(hctx, j); 1223 set->ops->exit_hctx(hctx, j);
1226 1224
1227 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1225 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1228 blk_mq_free_rq_map(hctx, driver_data);
1229 kfree(hctx->ctxs); 1226 kfree(hctx->ctxs);
1230 } 1227 }
1231 1228
@@ -1290,41 +1287,25 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1290 } 1287 }
1291} 1288}
1292 1289
1293struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, 1290struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1294 void *driver_data)
1295{ 1291{
1296 struct blk_mq_hw_ctx **hctxs; 1292 struct blk_mq_hw_ctx **hctxs;
1297 struct blk_mq_ctx *ctx; 1293 struct blk_mq_ctx *ctx;
1298 struct request_queue *q; 1294 struct request_queue *q;
1299 int i; 1295 int i;
1300 1296
1301 if (!reg->nr_hw_queues ||
1302 !reg->ops->queue_rq || !reg->ops->map_queue ||
1303 !reg->ops->alloc_hctx || !reg->ops->free_hctx)
1304 return ERR_PTR(-EINVAL);
1305
1306 if (!reg->queue_depth)
1307 reg->queue_depth = BLK_MQ_MAX_DEPTH;
1308 else if (reg->queue_depth > BLK_MQ_MAX_DEPTH) {
1309 pr_err("blk-mq: queuedepth too large (%u)\n", reg->queue_depth);
1310 reg->queue_depth = BLK_MQ_MAX_DEPTH;
1311 }
1312
1313 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
1314 return ERR_PTR(-EINVAL);
1315
1316 ctx = alloc_percpu(struct blk_mq_ctx); 1297 ctx = alloc_percpu(struct blk_mq_ctx);
1317 if (!ctx) 1298 if (!ctx)
1318 return ERR_PTR(-ENOMEM); 1299 return ERR_PTR(-ENOMEM);
1319 1300
1320 hctxs = kmalloc_node(reg->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, 1301 hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
1321 reg->numa_node); 1302 set->numa_node);
1322 1303
1323 if (!hctxs) 1304 if (!hctxs)
1324 goto err_percpu; 1305 goto err_percpu;
1325 1306
1326 for (i = 0; i < reg->nr_hw_queues; i++) { 1307 for (i = 0; i < set->nr_hw_queues; i++) {
1327 hctxs[i] = reg->ops->alloc_hctx(reg, i); 1308 hctxs[i] = set->ops->alloc_hctx(set, i);
1328 if (!hctxs[i]) 1309 if (!hctxs[i])
1329 goto err_hctxs; 1310 goto err_hctxs;
1330 1311
@@ -1335,11 +1316,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1335 hctxs[i]->queue_num = i; 1316 hctxs[i]->queue_num = i;
1336 } 1317 }
1337 1318
1338 q = blk_alloc_queue_node(GFP_KERNEL, reg->numa_node); 1319 q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
1339 if (!q) 1320 if (!q)
1340 goto err_hctxs; 1321 goto err_hctxs;
1341 1322
1342 q->mq_map = blk_mq_make_queue_map(reg); 1323 q->mq_map = blk_mq_make_queue_map(set);
1343 if (!q->mq_map) 1324 if (!q->mq_map)
1344 goto err_map; 1325 goto err_map;
1345 1326
@@ -1347,33 +1328,34 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1347 blk_queue_rq_timeout(q, 30000); 1328 blk_queue_rq_timeout(q, 30000);
1348 1329
1349 q->nr_queues = nr_cpu_ids; 1330 q->nr_queues = nr_cpu_ids;
1350 q->nr_hw_queues = reg->nr_hw_queues; 1331 q->nr_hw_queues = set->nr_hw_queues;
1351 1332
1352 q->queue_ctx = ctx; 1333 q->queue_ctx = ctx;
1353 q->queue_hw_ctx = hctxs; 1334 q->queue_hw_ctx = hctxs;
1354 1335
1355 q->mq_ops = reg->ops; 1336 q->mq_ops = set->ops;
1356 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 1337 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
1357 1338
1358 q->sg_reserved_size = INT_MAX; 1339 q->sg_reserved_size = INT_MAX;
1359 1340
1360 blk_queue_make_request(q, blk_mq_make_request); 1341 blk_queue_make_request(q, blk_mq_make_request);
1361 blk_queue_rq_timed_out(q, reg->ops->timeout); 1342 blk_queue_rq_timed_out(q, set->ops->timeout);
1362 if (reg->timeout) 1343 if (set->timeout)
1363 blk_queue_rq_timeout(q, reg->timeout); 1344 blk_queue_rq_timeout(q, set->timeout);
1364 1345
1365 if (reg->ops->complete) 1346 if (set->ops->complete)
1366 blk_queue_softirq_done(q, reg->ops->complete); 1347 blk_queue_softirq_done(q, set->ops->complete);
1367 1348
1368 blk_mq_init_flush(q); 1349 blk_mq_init_flush(q);
1369 blk_mq_init_cpu_queues(q, reg->nr_hw_queues); 1350 blk_mq_init_cpu_queues(q, set->nr_hw_queues);
1370 1351
1371 q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, 1352 q->flush_rq = kzalloc(round_up(sizeof(struct request) +
1372 cache_line_size()), GFP_KERNEL); 1353 set->cmd_size, cache_line_size()),
1354 GFP_KERNEL);
1373 if (!q->flush_rq) 1355 if (!q->flush_rq)
1374 goto err_hw; 1356 goto err_hw;
1375 1357
1376 if (blk_mq_init_hw_queues(q, reg, driver_data)) 1358 if (blk_mq_init_hw_queues(q, set))
1377 goto err_flush_rq; 1359 goto err_flush_rq;
1378 1360
1379 blk_mq_map_swqueue(q); 1361 blk_mq_map_swqueue(q);
@@ -1391,11 +1373,11 @@ err_hw:
1391err_map: 1373err_map:
1392 blk_cleanup_queue(q); 1374 blk_cleanup_queue(q);
1393err_hctxs: 1375err_hctxs:
1394 for (i = 0; i < reg->nr_hw_queues; i++) { 1376 for (i = 0; i < set->nr_hw_queues; i++) {
1395 if (!hctxs[i]) 1377 if (!hctxs[i])
1396 break; 1378 break;
1397 free_cpumask_var(hctxs[i]->cpumask); 1379 free_cpumask_var(hctxs[i]->cpumask);
1398 reg->ops->free_hctx(hctxs[i], i); 1380 set->ops->free_hctx(hctxs[i], i);
1399 } 1381 }
1400 kfree(hctxs); 1382 kfree(hctxs);
1401err_percpu: 1383err_percpu:
@@ -1412,7 +1394,6 @@ void blk_mq_free_queue(struct request_queue *q)
1412 queue_for_each_hw_ctx(q, hctx, i) { 1394 queue_for_each_hw_ctx(q, hctx, i) {
1413 kfree(hctx->ctx_map); 1395 kfree(hctx->ctx_map);
1414 kfree(hctx->ctxs); 1396 kfree(hctx->ctxs);
1415 blk_mq_free_rq_map(hctx, q->queuedata);
1416 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1397 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1417 if (q->mq_ops->exit_hctx) 1398 if (q->mq_ops->exit_hctx)
1418 q->mq_ops->exit_hctx(hctx, i); 1399 q->mq_ops->exit_hctx(hctx, i);
@@ -1473,6 +1454,53 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1473 return NOTIFY_OK; 1454 return NOTIFY_OK;
1474} 1455}
1475 1456
1457int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1458{
1459 int i;
1460
1461 if (!set->nr_hw_queues)
1462 return -EINVAL;
1463 if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
1464 return -EINVAL;
1465 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
1466 return -EINVAL;
1467
1468 if (!set->nr_hw_queues ||
1469 !set->ops->queue_rq || !set->ops->map_queue ||
1470 !set->ops->alloc_hctx || !set->ops->free_hctx)
1471 return -EINVAL;
1472
1473
1474 set->tags = kmalloc_node(set->nr_hw_queues * sizeof(struct blk_mq_tags),
1475 GFP_KERNEL, set->numa_node);
1476 if (!set->tags)
1477 goto out;
1478
1479 for (i = 0; i < set->nr_hw_queues; i++) {
1480 set->tags[i] = blk_mq_init_rq_map(set, i);
1481 if (!set->tags[i])
1482 goto out_unwind;
1483 }
1484
1485 return 0;
1486
1487out_unwind:
1488 while (--i >= 0)
1489 blk_mq_free_rq_map(set, set->tags[i], i);
1490out:
1491 return -ENOMEM;
1492}
1493EXPORT_SYMBOL(blk_mq_alloc_tag_set);
1494
1495void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
1496{
1497 int i;
1498
1499 for (i = 0; i < set->nr_hw_queues; i++)
1500 blk_mq_free_rq_map(set, set->tags[i], i);
1501}
1502EXPORT_SYMBOL(blk_mq_free_tag_set);
1503
1476void blk_mq_disable_hotplug(void) 1504void blk_mq_disable_hotplug(void)
1477{ 1505{
1478 mutex_lock(&all_q_mutex); 1506 mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 7964dadb7d64..5fa14f19f752 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -1,6 +1,8 @@
1#ifndef INT_BLK_MQ_H 1#ifndef INT_BLK_MQ_H
2#define INT_BLK_MQ_H 2#define INT_BLK_MQ_H
3 3
4struct blk_mq_tag_set;
5
4struct blk_mq_ctx { 6struct blk_mq_ctx {
5 struct { 7 struct {
6 spinlock_t lock; 8 spinlock_t lock;
@@ -46,8 +48,7 @@ void blk_mq_disable_hotplug(void);
46/* 48/*
47 * CPU -> queue mappings 49 * CPU -> queue mappings
48 */ 50 */
49struct blk_mq_reg; 51extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
50extern unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg);
51extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); 52extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
52 53
53void blk_mq_add_timer(struct request *rq); 54void blk_mq_add_timer(struct request *rq);