aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2015-01-23 16:18:00 -0500
committerJens Axboe <axboe@fb.com>2015-01-23 16:18:00 -0500
commit24391c0dc57c3756a219defaa781e68637d6ab7d (patch)
tree4d0e5b213c017e622d69d916c9e13e7f6e805368
parentee1b6f7aff94019c09e73837054979063f722046 (diff)
blk-mq: add tag allocation policy
This is the blk-mq part to support tag allocation policy. The default allocation policy isn't changed (though it's not a strict FIFO). The new policy is round-robin for libata. But it's a try-best implementation. If multiple tasks are competing, the tags returned will be mixed (which is unavoidable even with !mq, as requests from different tasks can be mixed in queue) Cc: Jens Axboe <axboe@fb.com> Cc: Tejun Heo <tj@kernel.org> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/blk-mq-tag.c39
-rw-r--r--block/blk-mq-tag.h4
-rw-r--r--block/blk-mq.c3
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--include/linux/blk-mq.h8
5 files changed, 39 insertions, 17 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index d4daee385a23..e3387a74a9a2 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -140,7 +140,8 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
140 return atomic_read(&hctx->nr_active) < depth; 140 return atomic_read(&hctx->nr_active) < depth;
141} 141}
142 142
143static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) 143static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
144 bool nowrap)
144{ 145{
145 int tag, org_last_tag = last_tag; 146 int tag, org_last_tag = last_tag;
146 147
@@ -152,7 +153,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
152 * offset to 0 in a failure case, so start from 0 to 153 * offset to 0 in a failure case, so start from 0 to
153 * exhaust the map. 154 * exhaust the map.
154 */ 155 */
155 if (org_last_tag && last_tag) { 156 if (org_last_tag && last_tag && !nowrap) {
156 last_tag = org_last_tag = 0; 157 last_tag = org_last_tag = 0;
157 continue; 158 continue;
158 } 159 }
@@ -170,6 +171,8 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
170 return tag; 171 return tag;
171} 172}
172 173
174#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)
175
173/* 176/*
174 * Straight forward bitmap tag implementation, where each bit is a tag 177 * Straight forward bitmap tag implementation, where each bit is a tag
175 * (cleared == free, and set == busy). The small twist is using per-cpu 178 * (cleared == free, and set == busy). The small twist is using per-cpu
@@ -182,7 +185,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
182 * until the map is exhausted. 185 * until the map is exhausted.
183 */ 186 */
184static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, 187static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
185 unsigned int *tag_cache) 188 unsigned int *tag_cache, struct blk_mq_tags *tags)
186{ 189{
187 unsigned int last_tag, org_last_tag; 190 unsigned int last_tag, org_last_tag;
188 int index, i, tag; 191 int index, i, tag;
@@ -194,7 +197,8 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
194 index = TAG_TO_INDEX(bt, last_tag); 197 index = TAG_TO_INDEX(bt, last_tag);
195 198
196 for (i = 0; i < bt->map_nr; i++) { 199 for (i = 0; i < bt->map_nr; i++) {
197 tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); 200 tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
201 BT_ALLOC_RR(tags));
198 if (tag != -1) { 202 if (tag != -1) {
199 tag += (index << bt->bits_per_word); 203 tag += (index << bt->bits_per_word);
200 goto done; 204 goto done;
@@ -221,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
221 * up using the specific cached tag. 225 * up using the specific cached tag.
222 */ 226 */
223done: 227done:
224 if (tag == org_last_tag) { 228 if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
225 last_tag = tag + 1; 229 last_tag = tag + 1;
226 if (last_tag >= bt->depth - 1) 230 if (last_tag >= bt->depth - 1)
227 last_tag = 0; 231 last_tag = 0;
@@ -250,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
250static int bt_get(struct blk_mq_alloc_data *data, 254static int bt_get(struct blk_mq_alloc_data *data,
251 struct blk_mq_bitmap_tags *bt, 255 struct blk_mq_bitmap_tags *bt,
252 struct blk_mq_hw_ctx *hctx, 256 struct blk_mq_hw_ctx *hctx,
253 unsigned int *last_tag) 257 unsigned int *last_tag, struct blk_mq_tags *tags)
254{ 258{
255 struct bt_wait_state *bs; 259 struct bt_wait_state *bs;
256 DEFINE_WAIT(wait); 260 DEFINE_WAIT(wait);
257 int tag; 261 int tag;
258 262
259 tag = __bt_get(hctx, bt, last_tag); 263 tag = __bt_get(hctx, bt, last_tag, tags);
260 if (tag != -1) 264 if (tag != -1)
261 return tag; 265 return tag;
262 266
@@ -267,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
267 do { 271 do {
268 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); 272 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
269 273
270 tag = __bt_get(hctx, bt, last_tag); 274 tag = __bt_get(hctx, bt, last_tag, tags);
271 if (tag != -1) 275 if (tag != -1)
272 break; 276 break;
273 277
@@ -282,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
282 * Retry tag allocation after running the hardware queue, 286 * Retry tag allocation after running the hardware queue,
283 * as running the queue may also have found completions. 287 * as running the queue may also have found completions.
284 */ 288 */
285 tag = __bt_get(hctx, bt, last_tag); 289 tag = __bt_get(hctx, bt, last_tag, tags);
286 if (tag != -1) 290 if (tag != -1)
287 break; 291 break;
288 292
@@ -313,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
313 int tag; 317 int tag;
314 318
315 tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, 319 tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
316 &data->ctx->last_tag); 320 &data->ctx->last_tag, data->hctx->tags);
317 if (tag >= 0) 321 if (tag >= 0)
318 return tag + data->hctx->tags->nr_reserved_tags; 322 return tag + data->hctx->tags->nr_reserved_tags;
319 323
@@ -329,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
329 return BLK_MQ_TAG_FAIL; 333 return BLK_MQ_TAG_FAIL;
330 } 334 }
331 335
332 tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero); 336 tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
337 data->hctx->tags);
333 if (tag < 0) 338 if (tag < 0)
334 return BLK_MQ_TAG_FAIL; 339 return BLK_MQ_TAG_FAIL;
335 340
@@ -401,7 +406,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
401 406
402 BUG_ON(real_tag >= tags->nr_tags); 407 BUG_ON(real_tag >= tags->nr_tags);
403 bt_clear_tag(&tags->bitmap_tags, real_tag); 408 bt_clear_tag(&tags->bitmap_tags, real_tag);
404 *last_tag = real_tag; 409 if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
410 *last_tag = real_tag;
405 } else { 411 } else {
406 BUG_ON(tag >= tags->nr_reserved_tags); 412 BUG_ON(tag >= tags->nr_reserved_tags);
407 bt_clear_tag(&tags->breserved_tags, tag); 413 bt_clear_tag(&tags->breserved_tags, tag);
@@ -538,10 +544,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
538} 544}
539 545
540static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, 546static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
541 int node) 547 int node, int alloc_policy)
542{ 548{
543 unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; 549 unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
544 550
551 tags->alloc_policy = alloc_policy;
552
545 if (bt_alloc(&tags->bitmap_tags, depth, node, false)) 553 if (bt_alloc(&tags->bitmap_tags, depth, node, false))
546 goto enomem; 554 goto enomem;
547 if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) 555 if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
@@ -555,7 +563,8 @@ enomem:
555} 563}
556 564
557struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, 565struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
558 unsigned int reserved_tags, int node) 566 unsigned int reserved_tags,
567 int node, int alloc_policy)
559{ 568{
560 struct blk_mq_tags *tags; 569 struct blk_mq_tags *tags;
561 570
@@ -571,7 +580,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
571 tags->nr_tags = total_tags; 580 tags->nr_tags = total_tags;
572 tags->nr_reserved_tags = reserved_tags; 581 tags->nr_reserved_tags = reserved_tags;
573 582
574 return blk_mq_init_bitmap_tags(tags, node); 583 return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
575} 584}
576 585
577void blk_mq_free_tags(struct blk_mq_tags *tags) 586void blk_mq_free_tags(struct blk_mq_tags *tags)
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index a6fa0fc9d41a..90767b370308 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -42,10 +42,12 @@ struct blk_mq_tags {
42 42
43 struct request **rqs; 43 struct request **rqs;
44 struct list_head page_list; 44 struct list_head page_list;
45
46 int alloc_policy;
45}; 47};
46 48
47 49
48extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); 50extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
49extern void blk_mq_free_tags(struct blk_mq_tags *tags); 51extern void blk_mq_free_tags(struct blk_mq_tags *tags);
50 52
51extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); 53extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a7d4a988516f..eb8e694fda06 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1374,7 +1374,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1374 size_t rq_size, left; 1374 size_t rq_size, left;
1375 1375
1376 tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, 1376 tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
1377 set->numa_node); 1377 set->numa_node,
1378 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
1378 if (!tags) 1379 if (!tags)
1379 return NULL; 1380 return NULL;
1380 1381
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9ea95dd3e260..49ab11508286 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2188,6 +2188,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
2188 shost->tag_set.cmd_size = cmd_size; 2188 shost->tag_set.cmd_size = cmd_size;
2189 shost->tag_set.numa_node = NUMA_NO_NODE; 2189 shost->tag_set.numa_node = NUMA_NO_NODE;
2190 shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 2190 shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2191 shost->tag_set.flags |=
2192 BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
2191 shost->tag_set.driver_data = shost; 2193 shost->tag_set.driver_data = shost;
2192 2194
2193 return blk_mq_alloc_tag_set(&shost->tag_set); 2195 return blk_mq_alloc_tag_set(&shost->tag_set);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5b6500c77ed2..86b08b1a5eba 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -147,6 +147,8 @@ enum {
147 BLK_MQ_F_SG_MERGE = 1 << 2, 147 BLK_MQ_F_SG_MERGE = 1 << 2,
148 BLK_MQ_F_SYSFS_UP = 1 << 3, 148 BLK_MQ_F_SYSFS_UP = 1 << 3,
149 BLK_MQ_F_DEFER_ISSUE = 1 << 4, 149 BLK_MQ_F_DEFER_ISSUE = 1 << 4,
150 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
151 BLK_MQ_F_ALLOC_POLICY_BITS = 1,
150 152
151 BLK_MQ_S_STOPPED = 0, 153 BLK_MQ_S_STOPPED = 0,
152 BLK_MQ_S_TAG_ACTIVE = 1, 154 BLK_MQ_S_TAG_ACTIVE = 1,
@@ -155,6 +157,12 @@ enum {
155 157
156 BLK_MQ_CPU_WORK_BATCH = 8, 158 BLK_MQ_CPU_WORK_BATCH = 8,
157}; 159};
160#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
161 ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
162 ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
163#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
164 ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
165 << BLK_MQ_F_ALLOC_POLICY_START_BIT)
158 166
159struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); 167struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
160void blk_mq_finish_init(struct request_queue *q); 168void blk_mq_finish_init(struct request_queue *q);