diff options
author | Jens Axboe <axboe@fb.com> | 2014-05-20 13:49:02 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-05-20 13:49:02 -0400 |
commit | e3a2b3f931f59d5284abd13faf8bded726884ffd (patch) | |
tree | f5426a4745996e95afc2f01f826e846710929dc2 /block | |
parent | 64b14519e5913e8d4de9f2e5d9ef59abba3ed83d (diff) |
blk-mq: allow changing of queue depth through sysfs
For request_fn based devices, the block layer exports a 'nr_requests'
file through sysfs to allow adjusting of queue depth on the fly.
Currently this returns -EINVAL for blk-mq, since it's not wired up.
Wire this up for blk-mq, so that it now also always dynamic
adjustments of the allowed queue depth for any given block device
managed by blk-mq.
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 41 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 80 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 22 | ||||
-rw-r--r-- | block/blk-mq.h | 1 | ||||
-rw-r--r-- | block/blk-sysfs.c | 45 | ||||
-rw-r--r-- | block/blk.h | 2 |
7 files changed, 133 insertions, 59 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index a6bd3e702201..fe81e19099a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags) | |||
848 | __freed_request(rl, sync ^ 1); | 848 | __freed_request(rl, sync ^ 1); |
849 | } | 849 | } |
850 | 850 | ||
851 | int blk_update_nr_requests(struct request_queue *q, unsigned int nr) | ||
852 | { | ||
853 | struct request_list *rl; | ||
854 | |||
855 | spin_lock_irq(q->queue_lock); | ||
856 | q->nr_requests = nr; | ||
857 | blk_queue_congestion_threshold(q); | ||
858 | |||
859 | /* congestion isn't cgroup aware and follows root blkcg for now */ | ||
860 | rl = &q->root_rl; | ||
861 | |||
862 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | ||
863 | blk_set_queue_congested(q, BLK_RW_SYNC); | ||
864 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | ||
865 | blk_clear_queue_congested(q, BLK_RW_SYNC); | ||
866 | |||
867 | if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) | ||
868 | blk_set_queue_congested(q, BLK_RW_ASYNC); | ||
869 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | ||
870 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | ||
871 | |||
872 | blk_queue_for_each_rl(rl, q) { | ||
873 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | ||
874 | blk_set_rl_full(rl, BLK_RW_SYNC); | ||
875 | } else { | ||
876 | blk_clear_rl_full(rl, BLK_RW_SYNC); | ||
877 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
878 | } | ||
879 | |||
880 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
881 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
882 | } else { | ||
883 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
884 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
885 | } | ||
886 | } | ||
887 | |||
888 | spin_unlock_irq(q->queue_lock); | ||
889 | return 0; | ||
890 | } | ||
891 | |||
851 | /* | 892 | /* |
852 | * Determine if elevator data should be initialized when allocating the | 893 | * Determine if elevator data should be initialized when allocating the |
853 | * request associated with @bio. | 894 | * request associated with @bio. |
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index e6b3fbae9862..f6dea968b710 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) | |||
57 | } | 57 | } |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * If a previously busy queue goes inactive, potential waiters could now | 60 | * Wakeup all potentially sleeping on normal (non-reserved) tags |
61 | * be allowed to queue. Wake them up and check. | ||
62 | */ | 61 | */ |
63 | void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | 62 | static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) |
64 | { | 63 | { |
65 | struct blk_mq_tags *tags = hctx->tags; | ||
66 | struct blk_mq_bitmap_tags *bt; | 64 | struct blk_mq_bitmap_tags *bt; |
67 | int i, wake_index; | 65 | int i, wake_index; |
68 | 66 | ||
69 | if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
70 | return; | ||
71 | |||
72 | atomic_dec(&tags->active_queues); | ||
73 | |||
74 | /* | ||
75 | * Will only throttle depth on non-reserved tags | ||
76 | */ | ||
77 | bt = &tags->bitmap_tags; | 67 | bt = &tags->bitmap_tags; |
78 | wake_index = bt->wake_index; | 68 | wake_index = bt->wake_index; |
79 | for (i = 0; i < BT_WAIT_QUEUES; i++) { | 69 | for (i = 0; i < BT_WAIT_QUEUES; i++) { |
@@ -87,6 +77,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | |||
87 | } | 77 | } |
88 | 78 | ||
89 | /* | 79 | /* |
80 | * If a previously busy queue goes inactive, potential waiters could now | ||
81 | * be allowed to queue. Wake them up and check. | ||
82 | */ | ||
83 | void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | ||
84 | { | ||
85 | struct blk_mq_tags *tags = hctx->tags; | ||
86 | |||
87 | if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
88 | return; | ||
89 | |||
90 | atomic_dec(&tags->active_queues); | ||
91 | |||
92 | blk_mq_tag_wakeup_all(tags); | ||
93 | } | ||
94 | |||
95 | /* | ||
90 | * For shared tag users, we track the number of currently active users | 96 | * For shared tag users, we track the number of currently active users |
91 | * and attempt to provide a fair share of the tag depth for each of them. | 97 | * and attempt to provide a fair share of the tag depth for each of them. |
92 | */ | 98 | */ |
@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) | |||
408 | return bt->depth - used; | 414 | return bt->depth - used; |
409 | } | 415 | } |
410 | 416 | ||
417 | static void bt_update_count(struct blk_mq_bitmap_tags *bt, | ||
418 | unsigned int depth) | ||
419 | { | ||
420 | unsigned int tags_per_word = 1U << bt->bits_per_word; | ||
421 | unsigned int map_depth = depth; | ||
422 | |||
423 | if (depth) { | ||
424 | int i; | ||
425 | |||
426 | for (i = 0; i < bt->map_nr; i++) { | ||
427 | bt->map[i].depth = min(map_depth, tags_per_word); | ||
428 | map_depth -= bt->map[i].depth; | ||
429 | } | ||
430 | } | ||
431 | |||
432 | bt->wake_cnt = BT_WAIT_BATCH; | ||
433 | if (bt->wake_cnt > depth / 4) | ||
434 | bt->wake_cnt = max(1U, depth / 4); | ||
435 | |||
436 | bt->depth = depth; | ||
437 | } | ||
438 | |||
411 | static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, | 439 | static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, |
412 | int node, bool reserved) | 440 | int node, bool reserved) |
413 | { | 441 | { |
@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, | |||
420 | * condition. | 448 | * condition. |
421 | */ | 449 | */ |
422 | if (depth) { | 450 | if (depth) { |
423 | unsigned int nr, i, map_depth, tags_per_word; | 451 | unsigned int nr, tags_per_word; |
424 | 452 | ||
425 | tags_per_word = (1 << bt->bits_per_word); | 453 | tags_per_word = (1 << bt->bits_per_word); |
426 | 454 | ||
@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, | |||
444 | return -ENOMEM; | 472 | return -ENOMEM; |
445 | 473 | ||
446 | bt->map_nr = nr; | 474 | bt->map_nr = nr; |
447 | map_depth = depth; | ||
448 | for (i = 0; i < nr; i++) { | ||
449 | bt->map[i].depth = min(map_depth, tags_per_word); | ||
450 | map_depth -= tags_per_word; | ||
451 | } | ||
452 | } | 475 | } |
453 | 476 | ||
454 | bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); | 477 | bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); |
@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, | |||
460 | for (i = 0; i < BT_WAIT_QUEUES; i++) | 483 | for (i = 0; i < BT_WAIT_QUEUES; i++) |
461 | init_waitqueue_head(&bt->bs[i].wait); | 484 | init_waitqueue_head(&bt->bs[i].wait); |
462 | 485 | ||
463 | bt->wake_cnt = BT_WAIT_BATCH; | 486 | bt_update_count(bt, depth); |
464 | if (bt->wake_cnt > depth / 4) | ||
465 | bt->wake_cnt = max(1U, depth / 4); | ||
466 | |||
467 | bt->depth = depth; | ||
468 | return 0; | 487 | return 0; |
469 | } | 488 | } |
470 | 489 | ||
@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag) | |||
525 | *tag = prandom_u32() % depth; | 544 | *tag = prandom_u32() % depth; |
526 | } | 545 | } |
527 | 546 | ||
547 | int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) | ||
548 | { | ||
549 | tdepth -= tags->nr_reserved_tags; | ||
550 | if (tdepth > tags->nr_tags) | ||
551 | return -EINVAL; | ||
552 | |||
553 | /* | ||
554 | * Don't need (or can't) update reserved tags here, they remain | ||
555 | * static and should never need resizing. | ||
556 | */ | ||
557 | bt_update_count(&tags->bitmap_tags, tdepth); | ||
558 | blk_mq_tag_wakeup_all(tags); | ||
559 | return 0; | ||
560 | } | ||
561 | |||
528 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | 562 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) |
529 | { | 563 | { |
530 | char *orig_page = page; | 564 | char *orig_page = page; |
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index e144f68ec45f..e7ff5ceeeb97 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data | |||
55 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); | 55 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); |
56 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); | 56 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); |
57 | extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); | 57 | extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); |
58 | extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); | ||
58 | 59 | ||
59 | enum { | 60 | enum { |
60 | BLK_MQ_TAG_CACHE_MIN = 1, | 61 | BLK_MQ_TAG_CACHE_MIN = 1, |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 0fbef7e9bef1..7b71ab1b1536 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) | |||
1789 | } | 1789 | } |
1790 | EXPORT_SYMBOL(blk_mq_free_tag_set); | 1790 | EXPORT_SYMBOL(blk_mq_free_tag_set); |
1791 | 1791 | ||
1792 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) | ||
1793 | { | ||
1794 | struct blk_mq_tag_set *set = q->tag_set; | ||
1795 | struct blk_mq_hw_ctx *hctx; | ||
1796 | int i, ret; | ||
1797 | |||
1798 | if (!set || nr > set->queue_depth) | ||
1799 | return -EINVAL; | ||
1800 | |||
1801 | ret = 0; | ||
1802 | queue_for_each_hw_ctx(q, hctx, i) { | ||
1803 | ret = blk_mq_tag_update_depth(hctx->tags, nr); | ||
1804 | if (ret) | ||
1805 | break; | ||
1806 | } | ||
1807 | |||
1808 | if (!ret) | ||
1809 | q->nr_requests = nr; | ||
1810 | |||
1811 | return ret; | ||
1812 | } | ||
1813 | |||
1792 | void blk_mq_disable_hotplug(void) | 1814 | void blk_mq_disable_hotplug(void) |
1793 | { | 1815 | { |
1794 | mutex_lock(&all_q_mutex); | 1816 | mutex_lock(&all_q_mutex); |
diff --git a/block/blk-mq.h b/block/blk-mq.h index 5e5a378962b7..7db4fe4bd002 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q); | |||
32 | void blk_mq_free_queue(struct request_queue *q); | 32 | void blk_mq_free_queue(struct request_queue *q); |
33 | void blk_mq_clone_flush_request(struct request *flush_rq, | 33 | void blk_mq_clone_flush_request(struct request *flush_rq, |
34 | struct request *orig_rq); | 34 | struct request *orig_rq); |
35 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); | ||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * CPU hotplug helpers | 38 | * CPU hotplug helpers |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 7500f876dae4..4d6811ac13fd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) | |||
48 | static ssize_t | 48 | static ssize_t |
49 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 49 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
50 | { | 50 | { |
51 | struct request_list *rl; | ||
52 | unsigned long nr; | 51 | unsigned long nr; |
53 | int ret; | 52 | int ret, err; |
54 | 53 | ||
55 | if (!q->request_fn) | 54 | if (!q->request_fn && !q->mq_ops) |
56 | return -EINVAL; | 55 | return -EINVAL; |
57 | 56 | ||
58 | ret = queue_var_store(&nr, page, count); | 57 | ret = queue_var_store(&nr, page, count); |
@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
62 | if (nr < BLKDEV_MIN_RQ) | 61 | if (nr < BLKDEV_MIN_RQ) |
63 | nr = BLKDEV_MIN_RQ; | 62 | nr = BLKDEV_MIN_RQ; |
64 | 63 | ||
65 | spin_lock_irq(q->queue_lock); | 64 | if (q->request_fn) |
66 | q->nr_requests = nr; | 65 | err = blk_update_nr_requests(q, nr); |
67 | blk_queue_congestion_threshold(q); | 66 | else |
68 | 67 | err = blk_mq_update_nr_requests(q, nr); | |
69 | /* congestion isn't cgroup aware and follows root blkcg for now */ | 68 | |
70 | rl = &q->root_rl; | 69 | if (err) |
71 | 70 | return err; | |
72 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | ||
73 | blk_set_queue_congested(q, BLK_RW_SYNC); | ||
74 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | ||
75 | blk_clear_queue_congested(q, BLK_RW_SYNC); | ||
76 | |||
77 | if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) | ||
78 | blk_set_queue_congested(q, BLK_RW_ASYNC); | ||
79 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | ||
80 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | ||
81 | |||
82 | blk_queue_for_each_rl(rl, q) { | ||
83 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | ||
84 | blk_set_rl_full(rl, BLK_RW_SYNC); | ||
85 | } else { | ||
86 | blk_clear_rl_full(rl, BLK_RW_SYNC); | ||
87 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
88 | } | ||
89 | |||
90 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
91 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
92 | } else { | ||
93 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
94 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
95 | } | ||
96 | } | ||
97 | 71 | ||
98 | spin_unlock_irq(q->queue_lock); | ||
99 | return ret; | 72 | return ret; |
100 | } | 73 | } |
101 | 74 | ||
diff --git a/block/blk.h b/block/blk.h index 95cab70000e3..45385e9abf6f 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
188 | return q->nr_congestion_off; | 188 | return q->nr_congestion_off; |
189 | } | 189 | } |
190 | 190 | ||
191 | extern int blk_update_nr_requests(struct request_queue *, unsigned int); | ||
192 | |||
191 | /* | 193 | /* |
192 | * Contribute to IO statistics IFF: | 194 | * Contribute to IO statistics IFF: |
193 | * | 195 | * |