diff options
author | Shaohua Li <shli@fb.com> | 2015-01-23 16:18:00 -0500 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2015-01-23 16:18:00 -0500 |
commit | 24391c0dc57c3756a219defaa781e68637d6ab7d (patch) | |
tree | 4d0e5b213c017e622d69d916c9e13e7f6e805368 | |
parent | ee1b6f7aff94019c09e73837054979063f722046 (diff) |
blk-mq: add tag allocation policy
This is the blk-mq part to support tag allocation policy. The default
allocation policy isn't changed (though it's not a strict FIFO). The new
policy is round-robin for libata. But it's a try-best implementation. If
multiple tasks are competing, the tags returned will be mixed (which is
unavoidable even with !mq, as requests from different tasks can be
mixed in queue)
Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | block/blk-mq-tag.c | 39 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 3 | ||||
-rw-r--r-- | drivers/scsi/scsi_lib.c | 2 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 8 |
5 files changed, 39 insertions, 17 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d4daee385a23..e3387a74a9a2 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -140,7 +140,8 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | |||
140 | return atomic_read(&hctx->nr_active) < depth; | 140 | return atomic_read(&hctx->nr_active) < depth; |
141 | } | 141 | } |
142 | 142 | ||
143 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | 143 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag, |
144 | bool nowrap) | ||
144 | { | 145 | { |
145 | int tag, org_last_tag = last_tag; | 146 | int tag, org_last_tag = last_tag; |
146 | 147 | ||
@@ -152,7 +153,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | |||
152 | * offset to 0 in a failure case, so start from 0 to | 153 | * offset to 0 in a failure case, so start from 0 to |
153 | * exhaust the map. | 154 | * exhaust the map. |
154 | */ | 155 | */ |
155 | if (org_last_tag && last_tag) { | 156 | if (org_last_tag && last_tag && !nowrap) { |
156 | last_tag = org_last_tag = 0; | 157 | last_tag = org_last_tag = 0; |
157 | continue; | 158 | continue; |
158 | } | 159 | } |
@@ -170,6 +171,8 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | |||
170 | return tag; | 171 | return tag; |
171 | } | 172 | } |
172 | 173 | ||
174 | #define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR) | ||
175 | |||
173 | /* | 176 | /* |
174 | * Straight forward bitmap tag implementation, where each bit is a tag | 177 | * Straight forward bitmap tag implementation, where each bit is a tag |
175 | * (cleared == free, and set == busy). The small twist is using per-cpu | 178 | * (cleared == free, and set == busy). The small twist is using per-cpu |
@@ -182,7 +185,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | |||
182 | * until the map is exhausted. | 185 | * until the map is exhausted. |
183 | */ | 186 | */ |
184 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | 187 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, |
185 | unsigned int *tag_cache) | 188 | unsigned int *tag_cache, struct blk_mq_tags *tags) |
186 | { | 189 | { |
187 | unsigned int last_tag, org_last_tag; | 190 | unsigned int last_tag, org_last_tag; |
188 | int index, i, tag; | 191 | int index, i, tag; |
@@ -194,7 +197,8 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | |||
194 | index = TAG_TO_INDEX(bt, last_tag); | 197 | index = TAG_TO_INDEX(bt, last_tag); |
195 | 198 | ||
196 | for (i = 0; i < bt->map_nr; i++) { | 199 | for (i = 0; i < bt->map_nr; i++) { |
197 | tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); | 200 | tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag), |
201 | BT_ALLOC_RR(tags)); | ||
198 | if (tag != -1) { | 202 | if (tag != -1) { |
199 | tag += (index << bt->bits_per_word); | 203 | tag += (index << bt->bits_per_word); |
200 | goto done; | 204 | goto done; |
@@ -221,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | |||
221 | * up using the specific cached tag. | 225 | * up using the specific cached tag. |
222 | */ | 226 | */ |
223 | done: | 227 | done: |
224 | if (tag == org_last_tag) { | 228 | if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) { |
225 | last_tag = tag + 1; | 229 | last_tag = tag + 1; |
226 | if (last_tag >= bt->depth - 1) | 230 | if (last_tag >= bt->depth - 1) |
227 | last_tag = 0; | 231 | last_tag = 0; |
@@ -250,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, | |||
250 | static int bt_get(struct blk_mq_alloc_data *data, | 254 | static int bt_get(struct blk_mq_alloc_data *data, |
251 | struct blk_mq_bitmap_tags *bt, | 255 | struct blk_mq_bitmap_tags *bt, |
252 | struct blk_mq_hw_ctx *hctx, | 256 | struct blk_mq_hw_ctx *hctx, |
253 | unsigned int *last_tag) | 257 | unsigned int *last_tag, struct blk_mq_tags *tags) |
254 | { | 258 | { |
255 | struct bt_wait_state *bs; | 259 | struct bt_wait_state *bs; |
256 | DEFINE_WAIT(wait); | 260 | DEFINE_WAIT(wait); |
257 | int tag; | 261 | int tag; |
258 | 262 | ||
259 | tag = __bt_get(hctx, bt, last_tag); | 263 | tag = __bt_get(hctx, bt, last_tag, tags); |
260 | if (tag != -1) | 264 | if (tag != -1) |
261 | return tag; | 265 | return tag; |
262 | 266 | ||
@@ -267,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data, | |||
267 | do { | 271 | do { |
268 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); | 272 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); |
269 | 273 | ||
270 | tag = __bt_get(hctx, bt, last_tag); | 274 | tag = __bt_get(hctx, bt, last_tag, tags); |
271 | if (tag != -1) | 275 | if (tag != -1) |
272 | break; | 276 | break; |
273 | 277 | ||
@@ -282,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data, | |||
282 | * Retry tag allocation after running the hardware queue, | 286 | * Retry tag allocation after running the hardware queue, |
283 | * as running the queue may also have found completions. | 287 | * as running the queue may also have found completions. |
284 | */ | 288 | */ |
285 | tag = __bt_get(hctx, bt, last_tag); | 289 | tag = __bt_get(hctx, bt, last_tag, tags); |
286 | if (tag != -1) | 290 | if (tag != -1) |
287 | break; | 291 | break; |
288 | 292 | ||
@@ -313,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data) | |||
313 | int tag; | 317 | int tag; |
314 | 318 | ||
315 | tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, | 319 | tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, |
316 | &data->ctx->last_tag); | 320 | &data->ctx->last_tag, data->hctx->tags); |
317 | if (tag >= 0) | 321 | if (tag >= 0) |
318 | return tag + data->hctx->tags->nr_reserved_tags; | 322 | return tag + data->hctx->tags->nr_reserved_tags; |
319 | 323 | ||
@@ -329,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) | |||
329 | return BLK_MQ_TAG_FAIL; | 333 | return BLK_MQ_TAG_FAIL; |
330 | } | 334 | } |
331 | 335 | ||
332 | tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero); | 336 | tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero, |
337 | data->hctx->tags); | ||
333 | if (tag < 0) | 338 | if (tag < 0) |
334 | return BLK_MQ_TAG_FAIL; | 339 | return BLK_MQ_TAG_FAIL; |
335 | 340 | ||
@@ -401,7 +406,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, | |||
401 | 406 | ||
402 | BUG_ON(real_tag >= tags->nr_tags); | 407 | BUG_ON(real_tag >= tags->nr_tags); |
403 | bt_clear_tag(&tags->bitmap_tags, real_tag); | 408 | bt_clear_tag(&tags->bitmap_tags, real_tag); |
404 | *last_tag = real_tag; | 409 | if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO)) |
410 | *last_tag = real_tag; | ||
405 | } else { | 411 | } else { |
406 | BUG_ON(tag >= tags->nr_reserved_tags); | 412 | BUG_ON(tag >= tags->nr_reserved_tags); |
407 | bt_clear_tag(&tags->breserved_tags, tag); | 413 | bt_clear_tag(&tags->breserved_tags, tag); |
@@ -538,10 +544,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt) | |||
538 | } | 544 | } |
539 | 545 | ||
540 | static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, | 546 | static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, |
541 | int node) | 547 | int node, int alloc_policy) |
542 | { | 548 | { |
543 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; | 549 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; |
544 | 550 | ||
551 | tags->alloc_policy = alloc_policy; | ||
552 | |||
545 | if (bt_alloc(&tags->bitmap_tags, depth, node, false)) | 553 | if (bt_alloc(&tags->bitmap_tags, depth, node, false)) |
546 | goto enomem; | 554 | goto enomem; |
547 | if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) | 555 | if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) |
@@ -555,7 +563,8 @@ enomem: | |||
555 | } | 563 | } |
556 | 564 | ||
557 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | 565 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, |
558 | unsigned int reserved_tags, int node) | 566 | unsigned int reserved_tags, |
567 | int node, int alloc_policy) | ||
559 | { | 568 | { |
560 | struct blk_mq_tags *tags; | 569 | struct blk_mq_tags *tags; |
561 | 570 | ||
@@ -571,7 +580,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | |||
571 | tags->nr_tags = total_tags; | 580 | tags->nr_tags = total_tags; |
572 | tags->nr_reserved_tags = reserved_tags; | 581 | tags->nr_reserved_tags = reserved_tags; |
573 | 582 | ||
574 | return blk_mq_init_bitmap_tags(tags, node); | 583 | return blk_mq_init_bitmap_tags(tags, node, alloc_policy); |
575 | } | 584 | } |
576 | 585 | ||
577 | void blk_mq_free_tags(struct blk_mq_tags *tags) | 586 | void blk_mq_free_tags(struct blk_mq_tags *tags) |
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index a6fa0fc9d41a..90767b370308 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -42,10 +42,12 @@ struct blk_mq_tags { | |||
42 | 42 | ||
43 | struct request **rqs; | 43 | struct request **rqs; |
44 | struct list_head page_list; | 44 | struct list_head page_list; |
45 | |||
46 | int alloc_policy; | ||
45 | }; | 47 | }; |
46 | 48 | ||
47 | 49 | ||
48 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); | 50 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); |
49 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); | 51 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); |
50 | 52 | ||
51 | extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); | 53 | extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); |
diff --git a/block/blk-mq.c b/block/blk-mq.c index a7d4a988516f..eb8e694fda06 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1374,7 +1374,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1374 | size_t rq_size, left; | 1374 | size_t rq_size, left; |
1375 | 1375 | ||
1376 | tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, | 1376 | tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, |
1377 | set->numa_node); | 1377 | set->numa_node, |
1378 | BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); | ||
1378 | if (!tags) | 1379 | if (!tags) |
1379 | return NULL; | 1380 | return NULL; |
1380 | 1381 | ||
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9ea95dd3e260..49ab11508286 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c | |||
@@ -2188,6 +2188,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) | |||
2188 | shost->tag_set.cmd_size = cmd_size; | 2188 | shost->tag_set.cmd_size = cmd_size; |
2189 | shost->tag_set.numa_node = NUMA_NO_NODE; | 2189 | shost->tag_set.numa_node = NUMA_NO_NODE; |
2190 | shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; | 2190 | shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; |
2191 | shost->tag_set.flags |= | ||
2192 | BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); | ||
2191 | shost->tag_set.driver_data = shost; | 2193 | shost->tag_set.driver_data = shost; |
2192 | 2194 | ||
2193 | return blk_mq_alloc_tag_set(&shost->tag_set); | 2195 | return blk_mq_alloc_tag_set(&shost->tag_set); |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5b6500c77ed2..86b08b1a5eba 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -147,6 +147,8 @@ enum { | |||
147 | BLK_MQ_F_SG_MERGE = 1 << 2, | 147 | BLK_MQ_F_SG_MERGE = 1 << 2, |
148 | BLK_MQ_F_SYSFS_UP = 1 << 3, | 148 | BLK_MQ_F_SYSFS_UP = 1 << 3, |
149 | BLK_MQ_F_DEFER_ISSUE = 1 << 4, | 149 | BLK_MQ_F_DEFER_ISSUE = 1 << 4, |
150 | BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, | ||
151 | BLK_MQ_F_ALLOC_POLICY_BITS = 1, | ||
150 | 152 | ||
151 | BLK_MQ_S_STOPPED = 0, | 153 | BLK_MQ_S_STOPPED = 0, |
152 | BLK_MQ_S_TAG_ACTIVE = 1, | 154 | BLK_MQ_S_TAG_ACTIVE = 1, |
@@ -155,6 +157,12 @@ enum { | |||
155 | 157 | ||
156 | BLK_MQ_CPU_WORK_BATCH = 8, | 158 | BLK_MQ_CPU_WORK_BATCH = 8, |
157 | }; | 159 | }; |
160 | #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ | ||
161 | ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ | ||
162 | ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) | ||
163 | #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ | ||
164 | ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ | ||
165 | << BLK_MQ_F_ALLOC_POLICY_START_BIT) | ||
158 | 166 | ||
159 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); | 167 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); |
160 | void blk_mq_finish_init(struct request_queue *q); | 168 | void blk_mq_finish_init(struct request_queue *q); |