summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-05-20 13:49:02 -0400
committerJens Axboe <axboe@fb.com>2014-05-20 13:49:02 -0400
commite3a2b3f931f59d5284abd13faf8bded726884ffd (patch)
treef5426a4745996e95afc2f01f826e846710929dc2 /block
parent64b14519e5913e8d4de9f2e5d9ef59abba3ed83d (diff)
blk-mq: allow changing of queue depth through sysfs
For request_fn based devices, the block layer exports a 'nr_requests' file through sysfs to allow adjusting of queue depth on the fly. Currently this returns -EINVAL for blk-mq, since it's not wired up. Wire this up for blk-mq, so that it now also always dynamic adjustments of the allowed queue depth for any given block device managed by blk-mq. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c41
-rw-r--r--block/blk-mq-tag.c80
-rw-r--r--block/blk-mq-tag.h1
-rw-r--r--block/blk-mq.c22
-rw-r--r--block/blk-mq.h1
-rw-r--r--block/blk-sysfs.c45
-rw-r--r--block/blk.h2
7 files changed, 133 insertions, 59 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index a6bd3e702201..fe81e19099a1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags)
848 __freed_request(rl, sync ^ 1); 848 __freed_request(rl, sync ^ 1);
849} 849}
850 850
851int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
852{
853 struct request_list *rl;
854
855 spin_lock_irq(q->queue_lock);
856 q->nr_requests = nr;
857 blk_queue_congestion_threshold(q);
858
859 /* congestion isn't cgroup aware and follows root blkcg for now */
860 rl = &q->root_rl;
861
862 if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
863 blk_set_queue_congested(q, BLK_RW_SYNC);
864 else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
865 blk_clear_queue_congested(q, BLK_RW_SYNC);
866
867 if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
868 blk_set_queue_congested(q, BLK_RW_ASYNC);
869 else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
870 blk_clear_queue_congested(q, BLK_RW_ASYNC);
871
872 blk_queue_for_each_rl(rl, q) {
873 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
874 blk_set_rl_full(rl, BLK_RW_SYNC);
875 } else {
876 blk_clear_rl_full(rl, BLK_RW_SYNC);
877 wake_up(&rl->wait[BLK_RW_SYNC]);
878 }
879
880 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
881 blk_set_rl_full(rl, BLK_RW_ASYNC);
882 } else {
883 blk_clear_rl_full(rl, BLK_RW_ASYNC);
884 wake_up(&rl->wait[BLK_RW_ASYNC]);
885 }
886 }
887
888 spin_unlock_irq(q->queue_lock);
889 return 0;
890}
891
851/* 892/*
852 * Determine if elevator data should be initialized when allocating the 893 * Determine if elevator data should be initialized when allocating the
853 * request associated with @bio. 894 * request associated with @bio.
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e6b3fbae9862..f6dea968b710 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
57} 57}
58 58
59/* 59/*
60 * If a previously busy queue goes inactive, potential waiters could now 60 * Wakeup all potentially sleeping on normal (non-reserved) tags
61 * be allowed to queue. Wake them up and check.
62 */ 61 */
63void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) 62static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
64{ 63{
65 struct blk_mq_tags *tags = hctx->tags;
66 struct blk_mq_bitmap_tags *bt; 64 struct blk_mq_bitmap_tags *bt;
67 int i, wake_index; 65 int i, wake_index;
68 66
69 if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
70 return;
71
72 atomic_dec(&tags->active_queues);
73
74 /*
75 * Will only throttle depth on non-reserved tags
76 */
77 bt = &tags->bitmap_tags; 67 bt = &tags->bitmap_tags;
78 wake_index = bt->wake_index; 68 wake_index = bt->wake_index;
79 for (i = 0; i < BT_WAIT_QUEUES; i++) { 69 for (i = 0; i < BT_WAIT_QUEUES; i++) {
@@ -87,6 +77,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
87} 77}
88 78
89/* 79/*
80 * If a previously busy queue goes inactive, potential waiters could now
81 * be allowed to queue. Wake them up and check.
82 */
83void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
84{
85 struct blk_mq_tags *tags = hctx->tags;
86
87 if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
88 return;
89
90 atomic_dec(&tags->active_queues);
91
92 blk_mq_tag_wakeup_all(tags);
93}
94
95/*
90 * For shared tag users, we track the number of currently active users 96 * For shared tag users, we track the number of currently active users
91 * and attempt to provide a fair share of the tag depth for each of them. 97 * and attempt to provide a fair share of the tag depth for each of them.
92 */ 98 */
@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
408 return bt->depth - used; 414 return bt->depth - used;
409} 415}
410 416
417static void bt_update_count(struct blk_mq_bitmap_tags *bt,
418 unsigned int depth)
419{
420 unsigned int tags_per_word = 1U << bt->bits_per_word;
421 unsigned int map_depth = depth;
422
423 if (depth) {
424 int i;
425
426 for (i = 0; i < bt->map_nr; i++) {
427 bt->map[i].depth = min(map_depth, tags_per_word);
428 map_depth -= bt->map[i].depth;
429 }
430 }
431
432 bt->wake_cnt = BT_WAIT_BATCH;
433 if (bt->wake_cnt > depth / 4)
434 bt->wake_cnt = max(1U, depth / 4);
435
436 bt->depth = depth;
437}
438
411static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, 439static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
412 int node, bool reserved) 440 int node, bool reserved)
413{ 441{
@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
420 * condition. 448 * condition.
421 */ 449 */
422 if (depth) { 450 if (depth) {
423 unsigned int nr, i, map_depth, tags_per_word; 451 unsigned int nr, tags_per_word;
424 452
425 tags_per_word = (1 << bt->bits_per_word); 453 tags_per_word = (1 << bt->bits_per_word);
426 454
@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
444 return -ENOMEM; 472 return -ENOMEM;
445 473
446 bt->map_nr = nr; 474 bt->map_nr = nr;
447 map_depth = depth;
448 for (i = 0; i < nr; i++) {
449 bt->map[i].depth = min(map_depth, tags_per_word);
450 map_depth -= tags_per_word;
451 }
452 } 475 }
453 476
454 bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); 477 bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
460 for (i = 0; i < BT_WAIT_QUEUES; i++) 483 for (i = 0; i < BT_WAIT_QUEUES; i++)
461 init_waitqueue_head(&bt->bs[i].wait); 484 init_waitqueue_head(&bt->bs[i].wait);
462 485
463 bt->wake_cnt = BT_WAIT_BATCH; 486 bt_update_count(bt, depth);
464 if (bt->wake_cnt > depth / 4)
465 bt->wake_cnt = max(1U, depth / 4);
466
467 bt->depth = depth;
468 return 0; 487 return 0;
469} 488}
470 489
@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
525 *tag = prandom_u32() % depth; 544 *tag = prandom_u32() % depth;
526} 545}
527 546
547int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
548{
549 tdepth -= tags->nr_reserved_tags;
550 if (tdepth > tags->nr_tags)
551 return -EINVAL;
552
553 /*
554 * Don't need (or can't) update reserved tags here, they remain
555 * static and should never need resizing.
556 */
557 bt_update_count(&tags->bitmap_tags, tdepth);
558 blk_mq_tag_wakeup_all(tags);
559 return 0;
560}
561
528ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) 562ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
529{ 563{
530 char *orig_page = page; 564 char *orig_page = page;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index e144f68ec45f..e7ff5ceeeb97 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data
55extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); 55extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
56extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); 56extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
57extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); 57extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
58extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
58 59
59enum { 60enum {
60 BLK_MQ_TAG_CACHE_MIN = 1, 61 BLK_MQ_TAG_CACHE_MIN = 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0fbef7e9bef1..7b71ab1b1536 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
1789} 1789}
1790EXPORT_SYMBOL(blk_mq_free_tag_set); 1790EXPORT_SYMBOL(blk_mq_free_tag_set);
1791 1791
1792int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
1793{
1794 struct blk_mq_tag_set *set = q->tag_set;
1795 struct blk_mq_hw_ctx *hctx;
1796 int i, ret;
1797
1798 if (!set || nr > set->queue_depth)
1799 return -EINVAL;
1800
1801 ret = 0;
1802 queue_for_each_hw_ctx(q, hctx, i) {
1803 ret = blk_mq_tag_update_depth(hctx->tags, nr);
1804 if (ret)
1805 break;
1806 }
1807
1808 if (!ret)
1809 q->nr_requests = nr;
1810
1811 return ret;
1812}
1813
1792void blk_mq_disable_hotplug(void) 1814void blk_mq_disable_hotplug(void)
1793{ 1815{
1794 mutex_lock(&all_q_mutex); 1816 mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5e5a378962b7..7db4fe4bd002 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q);
32void blk_mq_free_queue(struct request_queue *q); 32void blk_mq_free_queue(struct request_queue *q);
33void blk_mq_clone_flush_request(struct request *flush_rq, 33void blk_mq_clone_flush_request(struct request *flush_rq,
34 struct request *orig_rq); 34 struct request *orig_rq);
35int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
35 36
36/* 37/*
37 * CPU hotplug helpers 38 * CPU hotplug helpers
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7500f876dae4..4d6811ac13fd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
48static ssize_t 48static ssize_t
49queue_requests_store(struct request_queue *q, const char *page, size_t count) 49queue_requests_store(struct request_queue *q, const char *page, size_t count)
50{ 50{
51 struct request_list *rl;
52 unsigned long nr; 51 unsigned long nr;
53 int ret; 52 int ret, err;
54 53
55 if (!q->request_fn) 54 if (!q->request_fn && !q->mq_ops)
56 return -EINVAL; 55 return -EINVAL;
57 56
58 ret = queue_var_store(&nr, page, count); 57 ret = queue_var_store(&nr, page, count);
@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
62 if (nr < BLKDEV_MIN_RQ) 61 if (nr < BLKDEV_MIN_RQ)
63 nr = BLKDEV_MIN_RQ; 62 nr = BLKDEV_MIN_RQ;
64 63
65 spin_lock_irq(q->queue_lock); 64 if (q->request_fn)
66 q->nr_requests = nr; 65 err = blk_update_nr_requests(q, nr);
67 blk_queue_congestion_threshold(q); 66 else
68 67 err = blk_mq_update_nr_requests(q, nr);
69 /* congestion isn't cgroup aware and follows root blkcg for now */ 68
70 rl = &q->root_rl; 69 if (err)
71 70 return err;
72 if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
73 blk_set_queue_congested(q, BLK_RW_SYNC);
74 else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
75 blk_clear_queue_congested(q, BLK_RW_SYNC);
76
77 if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
78 blk_set_queue_congested(q, BLK_RW_ASYNC);
79 else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
80 blk_clear_queue_congested(q, BLK_RW_ASYNC);
81
82 blk_queue_for_each_rl(rl, q) {
83 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
84 blk_set_rl_full(rl, BLK_RW_SYNC);
85 } else {
86 blk_clear_rl_full(rl, BLK_RW_SYNC);
87 wake_up(&rl->wait[BLK_RW_SYNC]);
88 }
89
90 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
91 blk_set_rl_full(rl, BLK_RW_ASYNC);
92 } else {
93 blk_clear_rl_full(rl, BLK_RW_ASYNC);
94 wake_up(&rl->wait[BLK_RW_ASYNC]);
95 }
96 }
97 71
98 spin_unlock_irq(q->queue_lock);
99 return ret; 72 return ret;
100} 73}
101 74
diff --git a/block/blk.h b/block/blk.h
index 95cab70000e3..45385e9abf6f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
188 return q->nr_congestion_off; 188 return q->nr_congestion_off;
189} 189}
190 190
191extern int blk_update_nr_requests(struct request_queue *, unsigned int);
192
191/* 193/*
192 * Contribute to IO statistics IFF: 194 * Contribute to IO statistics IFF:
193 * 195 *