aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-05-19 13:52:35 -0400
committerJens Axboe <axboe@fb.com>2014-05-19 13:52:35 -0400
commit39a9f97e5ea99e048c4980c23cf197f6e77995cb (patch)
treeb1f72ed1e852372b6d86b79157b09f77fabc5a20
parent1429d7c9467e1e3de0b0ff91d7e4d67c1a92f8a3 (diff)
parent0d2602ca30e410e84e8bdf05c84ed5688e0a5a44 (diff)
Merge branch 'for-3.16/blk-mq-tagging' into for-3.16/core
Signed-off-by: Jens Axboe <axboe@fb.com> Conflicts: block/blk-mq-tag.c
-rw-r--r--block/blk-mq-sysfs.c10
-rw-r--r--block/blk-mq-tag.c112
-rw-r--r--block/blk-mq-tag.h27
-rw-r--r--block/blk-mq.c85
-rw-r--r--block/blk-timeout.c13
-rw-r--r--block/blk.h4
-rw-r--r--include/linux/blk-mq.h7
-rw-r--r--include/linux/blk_types.h2
-rw-r--r--include/linux/blkdev.h3
9 files changed, 236 insertions, 27 deletions
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 8145b5b25b4b..99a60a829e69 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -208,6 +208,11 @@ static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
208 return blk_mq_tag_sysfs_show(hctx->tags, page); 208 return blk_mq_tag_sysfs_show(hctx->tags, page);
209} 209}
210 210
211static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
212{
213 return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
214}
215
211static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) 216static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
212{ 217{
213 unsigned int i, first = 1; 218 unsigned int i, first = 1;
@@ -267,6 +272,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
267 .attr = {.name = "dispatched", .mode = S_IRUGO }, 272 .attr = {.name = "dispatched", .mode = S_IRUGO },
268 .show = blk_mq_hw_sysfs_dispatched_show, 273 .show = blk_mq_hw_sysfs_dispatched_show,
269}; 274};
275static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
276 .attr = {.name = "active", .mode = S_IRUGO },
277 .show = blk_mq_hw_sysfs_active_show,
278};
270static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { 279static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
271 .attr = {.name = "pending", .mode = S_IRUGO }, 280 .attr = {.name = "pending", .mode = S_IRUGO },
272 .show = blk_mq_hw_sysfs_rq_list_show, 281 .show = blk_mq_hw_sysfs_rq_list_show,
@@ -287,6 +296,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
287 &blk_mq_hw_sysfs_pending.attr, 296 &blk_mq_hw_sysfs_pending.attr,
288 &blk_mq_hw_sysfs_tags.attr, 297 &blk_mq_hw_sysfs_tags.attr,
289 &blk_mq_hw_sysfs_cpus.attr, 298 &blk_mq_hw_sysfs_cpus.attr,
299 &blk_mq_hw_sysfs_active.attr,
290 NULL, 300 NULL,
291}; 301};
292 302
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 03ce6a11ba79..e6b3fbae9862 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -7,13 +7,12 @@
7#include "blk-mq.h" 7#include "blk-mq.h"
8#include "blk-mq-tag.h" 8#include "blk-mq-tag.h"
9 9
10void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, 10void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved)
11 bool reserved)
12{ 11{
13 int tag, zero = 0; 12 int tag, zero = 0;
14 13
15 tag = blk_mq_get_tag(tags, hctx, &zero, __GFP_WAIT, reserved); 14 tag = blk_mq_get_tag(hctx, &zero, __GFP_WAIT, reserved);
16 blk_mq_put_tag(tags, tag, &zero); 15 blk_mq_put_tag(hctx, tag, &zero);
17} 16}
18 17
19static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt) 18static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt)
@@ -40,6 +39,84 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
40 return bt_has_free_tags(&tags->bitmap_tags); 39 return bt_has_free_tags(&tags->bitmap_tags);
41} 40}
42 41
42static inline void bt_index_inc(unsigned int *index)
43{
44 *index = (*index + 1) & (BT_WAIT_QUEUES - 1);
45}
46
47/*
48 * If a previously inactive queue goes active, bump the active user count.
49 */
50bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
51{
52 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
53 !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
54 atomic_inc(&hctx->tags->active_queues);
55
56 return true;
57}
58
59/*
60 * If a previously busy queue goes inactive, potential waiters could now
61 * be allowed to queue. Wake them up and check.
62 */
63void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
64{
65 struct blk_mq_tags *tags = hctx->tags;
66 struct blk_mq_bitmap_tags *bt;
67 int i, wake_index;
68
69 if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
70 return;
71
72 atomic_dec(&tags->active_queues);
73
74 /*
75 * Will only throttle depth on non-reserved tags
76 */
77 bt = &tags->bitmap_tags;
78 wake_index = bt->wake_index;
79 for (i = 0; i < BT_WAIT_QUEUES; i++) {
80 struct bt_wait_state *bs = &bt->bs[wake_index];
81
82 if (waitqueue_active(&bs->wait))
83 wake_up(&bs->wait);
84
85 bt_index_inc(&wake_index);
86 }
87}
88
89/*
90 * For shared tag users, we track the number of currently active users
91 * and attempt to provide a fair share of the tag depth for each of them.
92 */
93static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
94 struct blk_mq_bitmap_tags *bt)
95{
96 unsigned int depth, users;
97
98 if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
99 return true;
100 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
101 return true;
102
103 /*
104 * Don't try dividing an ant
105 */
106 if (bt->depth == 1)
107 return true;
108
109 users = atomic_read(&hctx->tags->active_queues);
110 if (!users)
111 return true;
112
113 /*
114 * Allow at least some tags
115 */
116 depth = max((bt->depth + users - 1) / users, 4U);
117 return atomic_read(&hctx->nr_active) < depth;
118}
119
43static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) 120static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
44{ 121{
45 int tag, org_last_tag, end; 122 int tag, org_last_tag, end;
@@ -78,11 +155,15 @@ restart:
78 * multiple users will tend to stick to different cachelines, at least 155 * multiple users will tend to stick to different cachelines, at least
79 * until the map is exhausted. 156 * until the map is exhausted.
80 */ 157 */
81static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache) 158static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
159 unsigned int *tag_cache)
82{ 160{
83 unsigned int last_tag, org_last_tag; 161 unsigned int last_tag, org_last_tag;
84 int index, i, tag; 162 int index, i, tag;
85 163
164 if (!hctx_may_queue(hctx, bt))
165 return -1;
166
86 last_tag = org_last_tag = *tag_cache; 167 last_tag = org_last_tag = *tag_cache;
87 index = TAG_TO_INDEX(bt, last_tag); 168 index = TAG_TO_INDEX(bt, last_tag);
88 169
@@ -117,11 +198,6 @@ done:
117 return tag; 198 return tag;
118} 199}
119 200
120static inline void bt_index_inc(unsigned int *index)
121{
122 *index = (*index + 1) & (BT_WAIT_QUEUES - 1);
123}
124
125static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, 201static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
126 struct blk_mq_hw_ctx *hctx) 202 struct blk_mq_hw_ctx *hctx)
127{ 203{
@@ -142,7 +218,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
142 DEFINE_WAIT(wait); 218 DEFINE_WAIT(wait);
143 int tag; 219 int tag;
144 220
145 tag = __bt_get(bt, last_tag); 221 tag = __bt_get(hctx, bt, last_tag);
146 if (tag != -1) 222 if (tag != -1)
147 return tag; 223 return tag;
148 224
@@ -156,7 +232,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
156 was_empty = list_empty(&wait.task_list); 232 was_empty = list_empty(&wait.task_list);
157 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); 233 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
158 234
159 tag = __bt_get(bt, last_tag); 235 tag = __bt_get(hctx, bt, last_tag);
160 if (tag != -1) 236 if (tag != -1)
161 break; 237 break;
162 238
@@ -200,14 +276,13 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags,
200 return tag; 276 return tag;
201} 277}
202 278
203unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, 279unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
204 struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
205 gfp_t gfp, bool reserved) 280 gfp_t gfp, bool reserved)
206{ 281{
207 if (!reserved) 282 if (!reserved)
208 return __blk_mq_get_tag(tags, hctx, last_tag, gfp); 283 return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp);
209 284
210 return __blk_mq_get_reserved_tag(tags, gfp); 285 return __blk_mq_get_reserved_tag(hctx->tags, gfp);
211} 286}
212 287
213static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) 288static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
@@ -265,9 +340,11 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
265 bt_clear_tag(&tags->breserved_tags, tag); 340 bt_clear_tag(&tags->breserved_tags, tag);
266} 341}
267 342
268void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, 343void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
269 unsigned int *last_tag) 344 unsigned int *last_tag)
270{ 345{
346 struct blk_mq_tags *tags = hctx->tags;
347
271 if (tag >= tags->nr_reserved_tags) { 348 if (tag >= tags->nr_reserved_tags) {
272 const int real_tag = tag - tags->nr_reserved_tags; 349 const int real_tag = tag - tags->nr_reserved_tags;
273 350
@@ -465,6 +542,7 @@ ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
465 res = bt_unused_tags(&tags->breserved_tags); 542 res = bt_unused_tags(&tags->breserved_tags);
466 543
467 page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res); 544 page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
545 page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
468 546
469 return page - orig_page; 547 return page - orig_page;
470} 548}
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 9014269f3910..e144f68ec45f 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -35,6 +35,8 @@ struct blk_mq_tags {
35 unsigned int nr_tags; 35 unsigned int nr_tags;
36 unsigned int nr_reserved_tags; 36 unsigned int nr_reserved_tags;
37 37
38 atomic_t active_queues;
39
38 struct blk_mq_bitmap_tags bitmap_tags; 40 struct blk_mq_bitmap_tags bitmap_tags;
39 struct blk_mq_bitmap_tags breserved_tags; 41 struct blk_mq_bitmap_tags breserved_tags;
40 42
@@ -46,9 +48,9 @@ struct blk_mq_tags {
46extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); 48extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
47extern void blk_mq_free_tags(struct blk_mq_tags *tags); 49extern void blk_mq_free_tags(struct blk_mq_tags *tags);
48 50
49extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved); 51extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
50extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, bool reserved); 52extern void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved);
51extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, unsigned int *last_tag); 53extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag);
52extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); 54extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
53extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); 55extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
54extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); 56extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
@@ -65,4 +67,23 @@ enum {
65 BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, 67 BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1,
66}; 68};
67 69
70extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
71extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
72
73static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
74{
75 if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
76 return false;
77
78 return __blk_mq_tag_busy(hctx);
79}
80
81static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
82{
83 if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
84 return;
85
86 __blk_mq_tag_idle(hctx);
87}
88
68#endif 89#endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e862c4408427..0fbef7e9bef1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -99,9 +99,16 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
99 struct request *rq; 99 struct request *rq;
100 unsigned int tag; 100 unsigned int tag;
101 101
102 tag = blk_mq_get_tag(hctx->tags, hctx, &ctx->last_tag, gfp, reserved); 102 tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved);
103 if (tag != BLK_MQ_TAG_FAIL) { 103 if (tag != BLK_MQ_TAG_FAIL) {
104 rq = hctx->tags->rqs[tag]; 104 rq = hctx->tags->rqs[tag];
105
106 rq->cmd_flags = 0;
107 if (blk_mq_tag_busy(hctx)) {
108 rq->cmd_flags = REQ_MQ_INFLIGHT;
109 atomic_inc(&hctx->nr_active);
110 }
111
105 rq->tag = tag; 112 rq->tag = tag;
106 return rq; 113 return rq;
107 } 114 }
@@ -209,7 +216,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
209 /* csd/requeue_work/fifo_time is initialized before use */ 216 /* csd/requeue_work/fifo_time is initialized before use */
210 rq->q = q; 217 rq->q = q;
211 rq->mq_ctx = ctx; 218 rq->mq_ctx = ctx;
212 rq->cmd_flags = rw_flags; 219 rq->cmd_flags |= rw_flags;
213 rq->cmd_type = 0; 220 rq->cmd_type = 0;
214 /* do not touch atomic flags, it needs atomic ops against the timer */ 221 /* do not touch atomic flags, it needs atomic ops against the timer */
215 rq->cpu = -1; 222 rq->cpu = -1;
@@ -281,7 +288,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
281 break; 288 break;
282 } 289 }
283 290
284 blk_mq_wait_for_tags(hctx->tags, hctx, reserved); 291 blk_mq_wait_for_tags(hctx, reserved);
285 } while (1); 292 } while (1);
286 293
287 return rq; 294 return rq;
@@ -322,8 +329,11 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
322 const int tag = rq->tag; 329 const int tag = rq->tag;
323 struct request_queue *q = rq->q; 330 struct request_queue *q = rq->q;
324 331
332 if (rq->cmd_flags & REQ_MQ_INFLIGHT)
333 atomic_dec(&hctx->nr_active);
334
325 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 335 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
326 blk_mq_put_tag(hctx->tags, tag, &ctx->last_tag); 336 blk_mq_put_tag(hctx, tag, &ctx->last_tag);
327 blk_mq_queue_exit(q); 337 blk_mq_queue_exit(q);
328} 338}
329 339
@@ -590,8 +600,13 @@ static void blk_mq_rq_timer(unsigned long data)
590 queue_for_each_hw_ctx(q, hctx, i) 600 queue_for_each_hw_ctx(q, hctx, i)
591 blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); 601 blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);
592 602
593 if (next_set) 603 if (next_set) {
594 mod_timer(&q->timeout, round_jiffies_up(next)); 604 next = blk_rq_timeout(round_jiffies_up(next));
605 mod_timer(&q->timeout, next);
606 } else {
607 queue_for_each_hw_ctx(q, hctx, i)
608 blk_mq_tag_idle(hctx);
609 }
595} 610}
596 611
597/* 612/*
@@ -1501,6 +1516,56 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1501 } 1516 }
1502} 1517}
1503 1518
1519static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
1520{
1521 struct blk_mq_hw_ctx *hctx;
1522 struct request_queue *q;
1523 bool shared;
1524 int i;
1525
1526 if (set->tag_list.next == set->tag_list.prev)
1527 shared = false;
1528 else
1529 shared = true;
1530
1531 list_for_each_entry(q, &set->tag_list, tag_set_list) {
1532 blk_mq_freeze_queue(q);
1533
1534 queue_for_each_hw_ctx(q, hctx, i) {
1535 if (shared)
1536 hctx->flags |= BLK_MQ_F_TAG_SHARED;
1537 else
1538 hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
1539 }
1540 blk_mq_unfreeze_queue(q);
1541 }
1542}
1543
1544static void blk_mq_del_queue_tag_set(struct request_queue *q)
1545{
1546 struct blk_mq_tag_set *set = q->tag_set;
1547
1548 blk_mq_freeze_queue(q);
1549
1550 mutex_lock(&set->tag_list_lock);
1551 list_del_init(&q->tag_set_list);
1552 blk_mq_update_tag_set_depth(set);
1553 mutex_unlock(&set->tag_list_lock);
1554
1555 blk_mq_unfreeze_queue(q);
1556}
1557
1558static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
1559 struct request_queue *q)
1560{
1561 q->tag_set = set;
1562
1563 mutex_lock(&set->tag_list_lock);
1564 list_add_tail(&q->tag_set_list, &set->tag_list);
1565 blk_mq_update_tag_set_depth(set);
1566 mutex_unlock(&set->tag_list_lock);
1567}
1568
1504struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) 1569struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1505{ 1570{
1506 struct blk_mq_hw_ctx **hctxs; 1571 struct blk_mq_hw_ctx **hctxs;
@@ -1526,6 +1591,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1526 if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) 1591 if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
1527 goto err_hctxs; 1592 goto err_hctxs;
1528 1593
1594 atomic_set(&hctxs[i]->nr_active, 0);
1529 hctxs[i]->numa_node = NUMA_NO_NODE; 1595 hctxs[i]->numa_node = NUMA_NO_NODE;
1530 hctxs[i]->queue_num = i; 1596 hctxs[i]->queue_num = i;
1531 } 1597 }
@@ -1578,6 +1644,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1578 list_add_tail(&q->all_q_node, &all_q_list); 1644 list_add_tail(&q->all_q_node, &all_q_list);
1579 mutex_unlock(&all_q_mutex); 1645 mutex_unlock(&all_q_mutex);
1580 1646
1647 blk_mq_add_queue_tag_set(set, q);
1648
1581 return q; 1649 return q;
1582 1650
1583err_flush_rq: 1651err_flush_rq:
@@ -1605,6 +1673,8 @@ void blk_mq_free_queue(struct request_queue *q)
1605 struct blk_mq_hw_ctx *hctx; 1673 struct blk_mq_hw_ctx *hctx;
1606 int i; 1674 int i;
1607 1675
1676 blk_mq_del_queue_tag_set(q);
1677
1608 queue_for_each_hw_ctx(q, hctx, i) { 1678 queue_for_each_hw_ctx(q, hctx, i) {
1609 kfree(hctx->ctxs); 1679 kfree(hctx->ctxs);
1610 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1680 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
@@ -1696,6 +1766,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1696 goto out_unwind; 1766 goto out_unwind;
1697 } 1767 }
1698 1768
1769 mutex_init(&set->tag_list_lock);
1770 INIT_LIST_HEAD(&set->tag_list);
1771
1699 return 0; 1772 return 0;
1700 1773
1701out_unwind: 1774out_unwind:
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 448745683d28..43e8b515806f 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -166,6 +166,17 @@ void blk_abort_request(struct request *req)
166} 166}
167EXPORT_SYMBOL_GPL(blk_abort_request); 167EXPORT_SYMBOL_GPL(blk_abort_request);
168 168
169unsigned long blk_rq_timeout(unsigned long timeout)
170{
171 unsigned long maxt;
172
173 maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT);
174 if (time_after(timeout, maxt))
175 timeout = maxt;
176
177 return timeout;
178}
179
169/** 180/**
170 * blk_add_timer - Start timeout timer for a single request 181 * blk_add_timer - Start timeout timer for a single request
171 * @req: request that is about to start running. 182 * @req: request that is about to start running.
@@ -200,7 +211,7 @@ void blk_add_timer(struct request *req)
200 * than an existing one, modify the timer. Round up to next nearest 211 * than an existing one, modify the timer. Round up to next nearest
201 * second. 212 * second.
202 */ 213 */
203 expiry = round_jiffies_up(req->deadline); 214 expiry = blk_rq_timeout(round_jiffies_up(req->deadline));
204 215
205 if (!timer_pending(&q->timeout) || 216 if (!timer_pending(&q->timeout) ||
206 time_before(expiry, q->timeout.expires)) { 217 time_before(expiry, q->timeout.expires)) {
diff --git a/block/blk.h b/block/blk.h
index 79be2cbce7fd..95cab70000e3 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -9,6 +9,9 @@
9/* Number of requests a "batching" process may submit */ 9/* Number of requests a "batching" process may submit */
10#define BLK_BATCH_REQ 32 10#define BLK_BATCH_REQ 32
11 11
12/* Max future timer expiry for timeouts */
13#define BLK_MAX_TIMEOUT (5 * HZ)
14
12extern struct kmem_cache *blk_requestq_cachep; 15extern struct kmem_cache *blk_requestq_cachep;
13extern struct kmem_cache *request_cachep; 16extern struct kmem_cache *request_cachep;
14extern struct kobj_type blk_queue_ktype; 17extern struct kobj_type blk_queue_ktype;
@@ -37,6 +40,7 @@ bool __blk_end_bidi_request(struct request *rq, int error,
37void blk_rq_timed_out_timer(unsigned long data); 40void blk_rq_timed_out_timer(unsigned long data);
38void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, 41void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
39 unsigned int *next_set); 42 unsigned int *next_set);
43unsigned long blk_rq_timeout(unsigned long timeout);
40void blk_add_timer(struct request *req); 44void blk_add_timer(struct request *req);
41void blk_delete_timer(struct request *); 45void blk_delete_timer(struct request *);
42 46
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 952e558ee598..a06ca7b5ea05 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -54,6 +54,8 @@ struct blk_mq_hw_ctx {
54 unsigned int numa_node; 54 unsigned int numa_node;
55 unsigned int cmd_size; /* per-request extra data */ 55 unsigned int cmd_size; /* per-request extra data */
56 56
57 atomic_t nr_active;
58
57 struct blk_mq_cpu_notifier cpu_notifier; 59 struct blk_mq_cpu_notifier cpu_notifier;
58 struct kobject kobj; 60 struct kobject kobj;
59}; 61};
@@ -70,6 +72,9 @@ struct blk_mq_tag_set {
70 void *driver_data; 72 void *driver_data;
71 73
72 struct blk_mq_tags **tags; 74 struct blk_mq_tags **tags;
75
76 struct mutex tag_list_lock;
77 struct list_head tag_list;
73}; 78};
74 79
75typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); 80typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
@@ -132,8 +137,10 @@ enum {
132 137
133 BLK_MQ_F_SHOULD_MERGE = 1 << 0, 138 BLK_MQ_F_SHOULD_MERGE = 1 << 0,
134 BLK_MQ_F_SHOULD_SORT = 1 << 1, 139 BLK_MQ_F_SHOULD_SORT = 1 << 1,
140 BLK_MQ_F_TAG_SHARED = 1 << 2,
135 141
136 BLK_MQ_S_STOPPED = 0, 142 BLK_MQ_S_STOPPED = 0,
143 BLK_MQ_S_TAG_ACTIVE = 1,
137 144
138 BLK_MQ_MAX_DEPTH = 2048, 145 BLK_MQ_MAX_DEPTH = 2048,
139 146
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index aa0eaa2d0bd8..d8e4cea23a25 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -190,6 +190,7 @@ enum rq_flag_bits {
190 __REQ_PM, /* runtime pm request */ 190 __REQ_PM, /* runtime pm request */
191 __REQ_END, /* last of chain of requests */ 191 __REQ_END, /* last of chain of requests */
192 __REQ_HASHED, /* on IO scheduler merge hash */ 192 __REQ_HASHED, /* on IO scheduler merge hash */
193 __REQ_MQ_INFLIGHT, /* track inflight for MQ */
193 __REQ_NR_BITS, /* stops here */ 194 __REQ_NR_BITS, /* stops here */
194}; 195};
195 196
@@ -243,5 +244,6 @@ enum rq_flag_bits {
243#define REQ_PM (1ULL << __REQ_PM) 244#define REQ_PM (1ULL << __REQ_PM)
244#define REQ_END (1ULL << __REQ_END) 245#define REQ_END (1ULL << __REQ_END)
245#define REQ_HASHED (1ULL << __REQ_HASHED) 246#define REQ_HASHED (1ULL << __REQ_HASHED)
247#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
246 248
247#endif /* __LINUX_BLK_TYPES_H */ 249#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 94b27210641b..6bc011a09e82 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -481,6 +481,9 @@ struct request_queue {
481 wait_queue_head_t mq_freeze_wq; 481 wait_queue_head_t mq_freeze_wq;
482 struct percpu_counter mq_usage_counter; 482 struct percpu_counter mq_usage_counter;
483 struct list_head all_q_node; 483 struct list_head all_q_node;
484
485 struct blk_mq_tag_set *tag_set;
486 struct list_head tag_set_list;
484}; 487};
485 488
486#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 489#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */