diff options
-rw-r--r-- | block/blk-mq-sysfs.c | 10 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 112 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 27 | ||||
-rw-r--r-- | block/blk-mq.c | 85 | ||||
-rw-r--r-- | block/blk-timeout.c | 13 | ||||
-rw-r--r-- | block/blk.h | 4 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 7 | ||||
-rw-r--r-- | include/linux/blk_types.h | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 3 |
9 files changed, 236 insertions, 27 deletions
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 8145b5b25b4b..99a60a829e69 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -208,6 +208,11 @@ static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) | |||
208 | return blk_mq_tag_sysfs_show(hctx->tags, page); | 208 | return blk_mq_tag_sysfs_show(hctx->tags, page); |
209 | } | 209 | } |
210 | 210 | ||
211 | static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
212 | { | ||
213 | return sprintf(page, "%u\n", atomic_read(&hctx->nr_active)); | ||
214 | } | ||
215 | |||
211 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) | 216 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) |
212 | { | 217 | { |
213 | unsigned int i, first = 1; | 218 | unsigned int i, first = 1; |
@@ -267,6 +272,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = { | |||
267 | .attr = {.name = "dispatched", .mode = S_IRUGO }, | 272 | .attr = {.name = "dispatched", .mode = S_IRUGO }, |
268 | .show = blk_mq_hw_sysfs_dispatched_show, | 273 | .show = blk_mq_hw_sysfs_dispatched_show, |
269 | }; | 274 | }; |
275 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = { | ||
276 | .attr = {.name = "active", .mode = S_IRUGO }, | ||
277 | .show = blk_mq_hw_sysfs_active_show, | ||
278 | }; | ||
270 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { | 279 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { |
271 | .attr = {.name = "pending", .mode = S_IRUGO }, | 280 | .attr = {.name = "pending", .mode = S_IRUGO }, |
272 | .show = blk_mq_hw_sysfs_rq_list_show, | 281 | .show = blk_mq_hw_sysfs_rq_list_show, |
@@ -287,6 +296,7 @@ static struct attribute *default_hw_ctx_attrs[] = { | |||
287 | &blk_mq_hw_sysfs_pending.attr, | 296 | &blk_mq_hw_sysfs_pending.attr, |
288 | &blk_mq_hw_sysfs_tags.attr, | 297 | &blk_mq_hw_sysfs_tags.attr, |
289 | &blk_mq_hw_sysfs_cpus.attr, | 298 | &blk_mq_hw_sysfs_cpus.attr, |
299 | &blk_mq_hw_sysfs_active.attr, | ||
290 | NULL, | 300 | NULL, |
291 | }; | 301 | }; |
292 | 302 | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 8d526a3e02f6..c80086c9c064 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -7,13 +7,12 @@ | |||
7 | #include "blk-mq.h" | 7 | #include "blk-mq.h" |
8 | #include "blk-mq-tag.h" | 8 | #include "blk-mq-tag.h" |
9 | 9 | ||
10 | void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, | 10 | void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved) |
11 | bool reserved) | ||
12 | { | 11 | { |
13 | int tag, zero = 0; | 12 | int tag, zero = 0; |
14 | 13 | ||
15 | tag = blk_mq_get_tag(tags, hctx, &zero, __GFP_WAIT, reserved); | 14 | tag = blk_mq_get_tag(hctx, &zero, __GFP_WAIT, reserved); |
16 | blk_mq_put_tag(tags, tag, &zero); | 15 | blk_mq_put_tag(hctx, tag, &zero); |
17 | } | 16 | } |
18 | 17 | ||
19 | static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt) | 18 | static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt) |
@@ -40,6 +39,84 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags) | |||
40 | return bt_has_free_tags(&tags->bitmap_tags); | 39 | return bt_has_free_tags(&tags->bitmap_tags); |
41 | } | 40 | } |
42 | 41 | ||
42 | static inline void bt_index_inc(unsigned int *index) | ||
43 | { | ||
44 | *index = (*index + 1) & (BT_WAIT_QUEUES - 1); | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * If a previously inactive queue goes active, bump the active user count. | ||
49 | */ | ||
50 | bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) | ||
51 | { | ||
52 | if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && | ||
53 | !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
54 | atomic_inc(&hctx->tags->active_queues); | ||
55 | |||
56 | return true; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * If a previously busy queue goes inactive, potential waiters could now | ||
61 | * be allowed to queue. Wake them up and check. | ||
62 | */ | ||
63 | void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | ||
64 | { | ||
65 | struct blk_mq_tags *tags = hctx->tags; | ||
66 | struct blk_mq_bitmap_tags *bt; | ||
67 | int i, wake_index; | ||
68 | |||
69 | if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
70 | return; | ||
71 | |||
72 | atomic_dec(&tags->active_queues); | ||
73 | |||
74 | /* | ||
75 | * Will only throttle depth on non-reserved tags | ||
76 | */ | ||
77 | bt = &tags->bitmap_tags; | ||
78 | wake_index = bt->wake_index; | ||
79 | for (i = 0; i < BT_WAIT_QUEUES; i++) { | ||
80 | struct bt_wait_state *bs = &bt->bs[wake_index]; | ||
81 | |||
82 | if (waitqueue_active(&bs->wait)) | ||
83 | wake_up(&bs->wait); | ||
84 | |||
85 | bt_index_inc(&wake_index); | ||
86 | } | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * For shared tag users, we track the number of currently active users | ||
91 | * and attempt to provide a fair share of the tag depth for each of them. | ||
92 | */ | ||
93 | static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | ||
94 | struct blk_mq_bitmap_tags *bt) | ||
95 | { | ||
96 | unsigned int depth, users; | ||
97 | |||
98 | if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
99 | return true; | ||
100 | if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
101 | return true; | ||
102 | |||
103 | /* | ||
104 | * Don't try dividing an ant | ||
105 | */ | ||
106 | if (bt->depth == 1) | ||
107 | return true; | ||
108 | |||
109 | users = atomic_read(&hctx->tags->active_queues); | ||
110 | if (!users) | ||
111 | return true; | ||
112 | |||
113 | /* | ||
114 | * Allow at least some tags | ||
115 | */ | ||
116 | depth = max((bt->depth + users - 1) / users, 4U); | ||
117 | return atomic_read(&hctx->nr_active) < depth; | ||
118 | } | ||
119 | |||
43 | static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag) | 120 | static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag) |
44 | { | 121 | { |
45 | int tag, org_last_tag, end; | 122 | int tag, org_last_tag, end; |
@@ -78,11 +155,15 @@ restart: | |||
78 | * multiple users will tend to stick to different cachelines, at least | 155 | * multiple users will tend to stick to different cachelines, at least |
79 | * until the map is exhausted. | 156 | * until the map is exhausted. |
80 | */ | 157 | */ |
81 | static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache) | 158 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, |
159 | unsigned int *tag_cache) | ||
82 | { | 160 | { |
83 | unsigned int last_tag, org_last_tag; | 161 | unsigned int last_tag, org_last_tag; |
84 | int index, i, tag; | 162 | int index, i, tag; |
85 | 163 | ||
164 | if (!hctx_may_queue(hctx, bt)) | ||
165 | return -1; | ||
166 | |||
86 | last_tag = org_last_tag = *tag_cache; | 167 | last_tag = org_last_tag = *tag_cache; |
87 | index = TAG_TO_INDEX(bt, last_tag); | 168 | index = TAG_TO_INDEX(bt, last_tag); |
88 | 169 | ||
@@ -117,11 +198,6 @@ done: | |||
117 | return tag; | 198 | return tag; |
118 | } | 199 | } |
119 | 200 | ||
120 | static inline void bt_index_inc(unsigned int *index) | ||
121 | { | ||
122 | *index = (*index + 1) & (BT_WAIT_QUEUES - 1); | ||
123 | } | ||
124 | |||
125 | static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, | 201 | static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, |
126 | struct blk_mq_hw_ctx *hctx) | 202 | struct blk_mq_hw_ctx *hctx) |
127 | { | 203 | { |
@@ -142,7 +218,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx, | |||
142 | DEFINE_WAIT(wait); | 218 | DEFINE_WAIT(wait); |
143 | int tag; | 219 | int tag; |
144 | 220 | ||
145 | tag = __bt_get(bt, last_tag); | 221 | tag = __bt_get(hctx, bt, last_tag); |
146 | if (tag != -1) | 222 | if (tag != -1) |
147 | return tag; | 223 | return tag; |
148 | 224 | ||
@@ -156,7 +232,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx, | |||
156 | was_empty = list_empty(&wait.task_list); | 232 | was_empty = list_empty(&wait.task_list); |
157 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); | 233 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); |
158 | 234 | ||
159 | tag = __bt_get(bt, last_tag); | 235 | tag = __bt_get(hctx, bt, last_tag); |
160 | if (tag != -1) | 236 | if (tag != -1) |
161 | break; | 237 | break; |
162 | 238 | ||
@@ -200,14 +276,13 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, | |||
200 | return tag; | 276 | return tag; |
201 | } | 277 | } |
202 | 278 | ||
203 | unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, | 279 | unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, |
204 | struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, | ||
205 | gfp_t gfp, bool reserved) | 280 | gfp_t gfp, bool reserved) |
206 | { | 281 | { |
207 | if (!reserved) | 282 | if (!reserved) |
208 | return __blk_mq_get_tag(tags, hctx, last_tag, gfp); | 283 | return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp); |
209 | 284 | ||
210 | return __blk_mq_get_reserved_tag(tags, gfp); | 285 | return __blk_mq_get_reserved_tag(hctx->tags, gfp); |
211 | } | 286 | } |
212 | 287 | ||
213 | static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) | 288 | static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) |
@@ -265,9 +340,11 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, | |||
265 | bt_clear_tag(&tags->breserved_tags, tag); | 340 | bt_clear_tag(&tags->breserved_tags, tag); |
266 | } | 341 | } |
267 | 342 | ||
268 | void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, | 343 | void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, |
269 | unsigned int *last_tag) | 344 | unsigned int *last_tag) |
270 | { | 345 | { |
346 | struct blk_mq_tags *tags = hctx->tags; | ||
347 | |||
271 | if (tag >= tags->nr_reserved_tags) { | 348 | if (tag >= tags->nr_reserved_tags) { |
272 | const int real_tag = tag - tags->nr_reserved_tags; | 349 | const int real_tag = tag - tags->nr_reserved_tags; |
273 | 350 | ||
@@ -465,6 +542,7 @@ ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | |||
465 | res = bt_unused_tags(&tags->breserved_tags); | 542 | res = bt_unused_tags(&tags->breserved_tags); |
466 | 543 | ||
467 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res); | 544 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res); |
545 | page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues)); | ||
468 | 546 | ||
469 | return page - orig_page; | 547 | return page - orig_page; |
470 | } | 548 | } |
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 7aa9f0665489..0f5ec8b50ef3 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -38,6 +38,8 @@ struct blk_mq_tags { | |||
38 | unsigned int nr_tags; | 38 | unsigned int nr_tags; |
39 | unsigned int nr_reserved_tags; | 39 | unsigned int nr_reserved_tags; |
40 | 40 | ||
41 | atomic_t active_queues; | ||
42 | |||
41 | struct blk_mq_bitmap_tags bitmap_tags; | 43 | struct blk_mq_bitmap_tags bitmap_tags; |
42 | struct blk_mq_bitmap_tags breserved_tags; | 44 | struct blk_mq_bitmap_tags breserved_tags; |
43 | 45 | ||
@@ -49,9 +51,9 @@ struct blk_mq_tags { | |||
49 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); | 51 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); |
50 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); | 52 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); |
51 | 53 | ||
52 | extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved); | 54 | extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved); |
53 | extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, bool reserved); | 55 | extern void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved); |
54 | extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, unsigned int *last_tag); | 56 | extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag); |
55 | extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); | 57 | extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); |
56 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); | 58 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); |
57 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); | 59 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); |
@@ -68,4 +70,23 @@ enum { | |||
68 | BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, | 70 | BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, |
69 | }; | 71 | }; |
70 | 72 | ||
73 | extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *); | ||
74 | extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); | ||
75 | |||
76 | static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) | ||
77 | { | ||
78 | if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
79 | return false; | ||
80 | |||
81 | return __blk_mq_tag_busy(hctx); | ||
82 | } | ||
83 | |||
84 | static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | ||
85 | { | ||
86 | if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
87 | return; | ||
88 | |||
89 | __blk_mq_tag_idle(hctx); | ||
90 | } | ||
91 | |||
71 | #endif | 92 | #endif |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 9f07a266f7ab..3c4f1fceef8e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -80,9 +80,16 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, | |||
80 | struct request *rq; | 80 | struct request *rq; |
81 | unsigned int tag; | 81 | unsigned int tag; |
82 | 82 | ||
83 | tag = blk_mq_get_tag(hctx->tags, hctx, &ctx->last_tag, gfp, reserved); | 83 | tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved); |
84 | if (tag != BLK_MQ_TAG_FAIL) { | 84 | if (tag != BLK_MQ_TAG_FAIL) { |
85 | rq = hctx->tags->rqs[tag]; | 85 | rq = hctx->tags->rqs[tag]; |
86 | |||
87 | rq->cmd_flags = 0; | ||
88 | if (blk_mq_tag_busy(hctx)) { | ||
89 | rq->cmd_flags = REQ_MQ_INFLIGHT; | ||
90 | atomic_inc(&hctx->nr_active); | ||
91 | } | ||
92 | |||
86 | rq->tag = tag; | 93 | rq->tag = tag; |
87 | return rq; | 94 | return rq; |
88 | } | 95 | } |
@@ -190,7 +197,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |||
190 | /* csd/requeue_work/fifo_time is initialized before use */ | 197 | /* csd/requeue_work/fifo_time is initialized before use */ |
191 | rq->q = q; | 198 | rq->q = q; |
192 | rq->mq_ctx = ctx; | 199 | rq->mq_ctx = ctx; |
193 | rq->cmd_flags = rw_flags; | 200 | rq->cmd_flags |= rw_flags; |
194 | rq->cmd_type = 0; | 201 | rq->cmd_type = 0; |
195 | /* do not touch atomic flags, it needs atomic ops against the timer */ | 202 | /* do not touch atomic flags, it needs atomic ops against the timer */ |
196 | rq->cpu = -1; | 203 | rq->cpu = -1; |
@@ -262,7 +269,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | |||
262 | break; | 269 | break; |
263 | } | 270 | } |
264 | 271 | ||
265 | blk_mq_wait_for_tags(hctx->tags, hctx, reserved); | 272 | blk_mq_wait_for_tags(hctx, reserved); |
266 | } while (1); | 273 | } while (1); |
267 | 274 | ||
268 | return rq; | 275 | return rq; |
@@ -303,8 +310,11 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | |||
303 | const int tag = rq->tag; | 310 | const int tag = rq->tag; |
304 | struct request_queue *q = rq->q; | 311 | struct request_queue *q = rq->q; |
305 | 312 | ||
313 | if (rq->cmd_flags & REQ_MQ_INFLIGHT) | ||
314 | atomic_dec(&hctx->nr_active); | ||
315 | |||
306 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 316 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
307 | blk_mq_put_tag(hctx->tags, tag, &ctx->last_tag); | 317 | blk_mq_put_tag(hctx, tag, &ctx->last_tag); |
308 | blk_mq_queue_exit(q); | 318 | blk_mq_queue_exit(q); |
309 | } | 319 | } |
310 | 320 | ||
@@ -571,8 +581,13 @@ static void blk_mq_rq_timer(unsigned long data) | |||
571 | queue_for_each_hw_ctx(q, hctx, i) | 581 | queue_for_each_hw_ctx(q, hctx, i) |
572 | blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); | 582 | blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); |
573 | 583 | ||
574 | if (next_set) | 584 | if (next_set) { |
575 | mod_timer(&q->timeout, round_jiffies_up(next)); | 585 | next = blk_rq_timeout(round_jiffies_up(next)); |
586 | mod_timer(&q->timeout, next); | ||
587 | } else { | ||
588 | queue_for_each_hw_ctx(q, hctx, i) | ||
589 | blk_mq_tag_idle(hctx); | ||
590 | } | ||
576 | } | 591 | } |
577 | 592 | ||
578 | /* | 593 | /* |
@@ -1439,6 +1454,56 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1439 | } | 1454 | } |
1440 | } | 1455 | } |
1441 | 1456 | ||
1457 | static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) | ||
1458 | { | ||
1459 | struct blk_mq_hw_ctx *hctx; | ||
1460 | struct request_queue *q; | ||
1461 | bool shared; | ||
1462 | int i; | ||
1463 | |||
1464 | if (set->tag_list.next == set->tag_list.prev) | ||
1465 | shared = false; | ||
1466 | else | ||
1467 | shared = true; | ||
1468 | |||
1469 | list_for_each_entry(q, &set->tag_list, tag_set_list) { | ||
1470 | blk_mq_freeze_queue(q); | ||
1471 | |||
1472 | queue_for_each_hw_ctx(q, hctx, i) { | ||
1473 | if (shared) | ||
1474 | hctx->flags |= BLK_MQ_F_TAG_SHARED; | ||
1475 | else | ||
1476 | hctx->flags &= ~BLK_MQ_F_TAG_SHARED; | ||
1477 | } | ||
1478 | blk_mq_unfreeze_queue(q); | ||
1479 | } | ||
1480 | } | ||
1481 | |||
1482 | static void blk_mq_del_queue_tag_set(struct request_queue *q) | ||
1483 | { | ||
1484 | struct blk_mq_tag_set *set = q->tag_set; | ||
1485 | |||
1486 | blk_mq_freeze_queue(q); | ||
1487 | |||
1488 | mutex_lock(&set->tag_list_lock); | ||
1489 | list_del_init(&q->tag_set_list); | ||
1490 | blk_mq_update_tag_set_depth(set); | ||
1491 | mutex_unlock(&set->tag_list_lock); | ||
1492 | |||
1493 | blk_mq_unfreeze_queue(q); | ||
1494 | } | ||
1495 | |||
1496 | static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, | ||
1497 | struct request_queue *q) | ||
1498 | { | ||
1499 | q->tag_set = set; | ||
1500 | |||
1501 | mutex_lock(&set->tag_list_lock); | ||
1502 | list_add_tail(&q->tag_set_list, &set->tag_list); | ||
1503 | blk_mq_update_tag_set_depth(set); | ||
1504 | mutex_unlock(&set->tag_list_lock); | ||
1505 | } | ||
1506 | |||
1442 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | 1507 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) |
1443 | { | 1508 | { |
1444 | struct blk_mq_hw_ctx **hctxs; | 1509 | struct blk_mq_hw_ctx **hctxs; |
@@ -1464,6 +1529,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1464 | if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) | 1529 | if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) |
1465 | goto err_hctxs; | 1530 | goto err_hctxs; |
1466 | 1531 | ||
1532 | atomic_set(&hctxs[i]->nr_active, 0); | ||
1467 | hctxs[i]->numa_node = NUMA_NO_NODE; | 1533 | hctxs[i]->numa_node = NUMA_NO_NODE; |
1468 | hctxs[i]->queue_num = i; | 1534 | hctxs[i]->queue_num = i; |
1469 | } | 1535 | } |
@@ -1516,6 +1582,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1516 | list_add_tail(&q->all_q_node, &all_q_list); | 1582 | list_add_tail(&q->all_q_node, &all_q_list); |
1517 | mutex_unlock(&all_q_mutex); | 1583 | mutex_unlock(&all_q_mutex); |
1518 | 1584 | ||
1585 | blk_mq_add_queue_tag_set(set, q); | ||
1586 | |||
1519 | return q; | 1587 | return q; |
1520 | 1588 | ||
1521 | err_flush_rq: | 1589 | err_flush_rq: |
@@ -1543,6 +1611,8 @@ void blk_mq_free_queue(struct request_queue *q) | |||
1543 | struct blk_mq_hw_ctx *hctx; | 1611 | struct blk_mq_hw_ctx *hctx; |
1544 | int i; | 1612 | int i; |
1545 | 1613 | ||
1614 | blk_mq_del_queue_tag_set(q); | ||
1615 | |||
1546 | queue_for_each_hw_ctx(q, hctx, i) { | 1616 | queue_for_each_hw_ctx(q, hctx, i) { |
1547 | kfree(hctx->ctx_map); | 1617 | kfree(hctx->ctx_map); |
1548 | kfree(hctx->ctxs); | 1618 | kfree(hctx->ctxs); |
@@ -1635,6 +1705,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | |||
1635 | goto out_unwind; | 1705 | goto out_unwind; |
1636 | } | 1706 | } |
1637 | 1707 | ||
1708 | mutex_init(&set->tag_list_lock); | ||
1709 | INIT_LIST_HEAD(&set->tag_list); | ||
1710 | |||
1638 | return 0; | 1711 | return 0; |
1639 | 1712 | ||
1640 | out_unwind: | 1713 | out_unwind: |
diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 448745683d28..43e8b515806f 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c | |||
@@ -166,6 +166,17 @@ void blk_abort_request(struct request *req) | |||
166 | } | 166 | } |
167 | EXPORT_SYMBOL_GPL(blk_abort_request); | 167 | EXPORT_SYMBOL_GPL(blk_abort_request); |
168 | 168 | ||
169 | unsigned long blk_rq_timeout(unsigned long timeout) | ||
170 | { | ||
171 | unsigned long maxt; | ||
172 | |||
173 | maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT); | ||
174 | if (time_after(timeout, maxt)) | ||
175 | timeout = maxt; | ||
176 | |||
177 | return timeout; | ||
178 | } | ||
179 | |||
169 | /** | 180 | /** |
170 | * blk_add_timer - Start timeout timer for a single request | 181 | * blk_add_timer - Start timeout timer for a single request |
171 | * @req: request that is about to start running. | 182 | * @req: request that is about to start running. |
@@ -200,7 +211,7 @@ void blk_add_timer(struct request *req) | |||
200 | * than an existing one, modify the timer. Round up to next nearest | 211 | * than an existing one, modify the timer. Round up to next nearest |
201 | * second. | 212 | * second. |
202 | */ | 213 | */ |
203 | expiry = round_jiffies_up(req->deadline); | 214 | expiry = blk_rq_timeout(round_jiffies_up(req->deadline)); |
204 | 215 | ||
205 | if (!timer_pending(&q->timeout) || | 216 | if (!timer_pending(&q->timeout) || |
206 | time_before(expiry, q->timeout.expires)) { | 217 | time_before(expiry, q->timeout.expires)) { |
diff --git a/block/blk.h b/block/blk.h index 79be2cbce7fd..95cab70000e3 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -9,6 +9,9 @@ | |||
9 | /* Number of requests a "batching" process may submit */ | 9 | /* Number of requests a "batching" process may submit */ |
10 | #define BLK_BATCH_REQ 32 | 10 | #define BLK_BATCH_REQ 32 |
11 | 11 | ||
12 | /* Max future timer expiry for timeouts */ | ||
13 | #define BLK_MAX_TIMEOUT (5 * HZ) | ||
14 | |||
12 | extern struct kmem_cache *blk_requestq_cachep; | 15 | extern struct kmem_cache *blk_requestq_cachep; |
13 | extern struct kmem_cache *request_cachep; | 16 | extern struct kmem_cache *request_cachep; |
14 | extern struct kobj_type blk_queue_ktype; | 17 | extern struct kobj_type blk_queue_ktype; |
@@ -37,6 +40,7 @@ bool __blk_end_bidi_request(struct request *rq, int error, | |||
37 | void blk_rq_timed_out_timer(unsigned long data); | 40 | void blk_rq_timed_out_timer(unsigned long data); |
38 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, | 41 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, |
39 | unsigned int *next_set); | 42 | unsigned int *next_set); |
43 | unsigned long blk_rq_timeout(unsigned long timeout); | ||
40 | void blk_add_timer(struct request *req); | 44 | void blk_add_timer(struct request *req); |
41 | void blk_delete_timer(struct request *); | 45 | void blk_delete_timer(struct request *); |
42 | 46 | ||
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f83d15f6e1c1..379f88d5c44d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -48,6 +48,8 @@ struct blk_mq_hw_ctx { | |||
48 | unsigned int numa_node; | 48 | unsigned int numa_node; |
49 | unsigned int cmd_size; /* per-request extra data */ | 49 | unsigned int cmd_size; /* per-request extra data */ |
50 | 50 | ||
51 | atomic_t nr_active; | ||
52 | |||
51 | struct blk_mq_cpu_notifier cpu_notifier; | 53 | struct blk_mq_cpu_notifier cpu_notifier; |
52 | struct kobject kobj; | 54 | struct kobject kobj; |
53 | }; | 55 | }; |
@@ -64,6 +66,9 @@ struct blk_mq_tag_set { | |||
64 | void *driver_data; | 66 | void *driver_data; |
65 | 67 | ||
66 | struct blk_mq_tags **tags; | 68 | struct blk_mq_tags **tags; |
69 | |||
70 | struct mutex tag_list_lock; | ||
71 | struct list_head tag_list; | ||
67 | }; | 72 | }; |
68 | 73 | ||
69 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); | 74 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); |
@@ -126,8 +131,10 @@ enum { | |||
126 | 131 | ||
127 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, | 132 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, |
128 | BLK_MQ_F_SHOULD_SORT = 1 << 1, | 133 | BLK_MQ_F_SHOULD_SORT = 1 << 1, |
134 | BLK_MQ_F_TAG_SHARED = 1 << 2, | ||
129 | 135 | ||
130 | BLK_MQ_S_STOPPED = 0, | 136 | BLK_MQ_S_STOPPED = 0, |
137 | BLK_MQ_S_TAG_ACTIVE = 1, | ||
131 | 138 | ||
132 | BLK_MQ_MAX_DEPTH = 2048, | 139 | BLK_MQ_MAX_DEPTH = 2048, |
133 | 140 | ||
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index aa0eaa2d0bd8..d8e4cea23a25 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -190,6 +190,7 @@ enum rq_flag_bits { | |||
190 | __REQ_PM, /* runtime pm request */ | 190 | __REQ_PM, /* runtime pm request */ |
191 | __REQ_END, /* last of chain of requests */ | 191 | __REQ_END, /* last of chain of requests */ |
192 | __REQ_HASHED, /* on IO scheduler merge hash */ | 192 | __REQ_HASHED, /* on IO scheduler merge hash */ |
193 | __REQ_MQ_INFLIGHT, /* track inflight for MQ */ | ||
193 | __REQ_NR_BITS, /* stops here */ | 194 | __REQ_NR_BITS, /* stops here */ |
194 | }; | 195 | }; |
195 | 196 | ||
@@ -243,5 +244,6 @@ enum rq_flag_bits { | |||
243 | #define REQ_PM (1ULL << __REQ_PM) | 244 | #define REQ_PM (1ULL << __REQ_PM) |
244 | #define REQ_END (1ULL << __REQ_END) | 245 | #define REQ_END (1ULL << __REQ_END) |
245 | #define REQ_HASHED (1ULL << __REQ_HASHED) | 246 | #define REQ_HASHED (1ULL << __REQ_HASHED) |
247 | #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) | ||
246 | 248 | ||
247 | #endif /* __LINUX_BLK_TYPES_H */ | 249 | #endif /* __LINUX_BLK_TYPES_H */ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94b27210641b..6bc011a09e82 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -481,6 +481,9 @@ struct request_queue { | |||
481 | wait_queue_head_t mq_freeze_wq; | 481 | wait_queue_head_t mq_freeze_wq; |
482 | struct percpu_counter mq_usage_counter; | 482 | struct percpu_counter mq_usage_counter; |
483 | struct list_head all_q_node; | 483 | struct list_head all_q_node; |
484 | |||
485 | struct blk_mq_tag_set *tag_set; | ||
486 | struct list_head tag_set_list; | ||
484 | }; | 487 | }; |
485 | 488 | ||
486 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 489 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |