summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2018-07-03 11:32:35 -0400
committerJens Axboe <axboe@kernel.dk>2018-07-09 11:07:54 -0400
commita79050434b45959f397042080fd1d70ffa9bd9df (patch)
treed5689153d497925d326a8b7e9963f4c3f88685ea
parent2ecbf456352d0699f51b4c6d70ea5bf29766579c (diff)
blk-rq-qos: refactor out common elements of blk-wbt
blkcg-qos is going to do essentially what wbt does, only on a cgroup basis. Break out the common code that will be shared between blkcg-qos and wbt into blk-rq-qos.* so they can both utilize the same infrastructure. Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-core.c12
-rw-r--r--block/blk-mq.c12
-rw-r--r--block/blk-rq-qos.c178
-rw-r--r--block/blk-rq-qos.h106
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c22
-rw-r--r--block/blk-wbt.c326
-rw-r--r--block/blk-wbt.h63
-rw-r--r--include/linux/blkdev.h4
10 files changed, 478 insertions, 251 deletions
diff --git a/block/Makefile b/block/Makefile
index a8f94cdb75c3..57d0f47ab05f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
9 blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ 9 blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
10 blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ 10 blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
11 genhd.o partition-generic.o ioprio.o \ 11 genhd.o partition-generic.o ioprio.o \
12 badblocks.o partitions/ 12 badblocks.o partitions/ blk-rq-qos.o
13 13
14obj-$(CONFIG_BOUNCE) += bounce.o 14obj-$(CONFIG_BOUNCE) += bounce.o
15obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o 15obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 2ff8e131a892..b33a73bcf2d0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1645,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
1645 blk_delete_timer(rq); 1645 blk_delete_timer(rq);
1646 blk_clear_rq_complete(rq); 1646 blk_clear_rq_complete(rq);
1647 trace_block_rq_requeue(q, rq); 1647 trace_block_rq_requeue(q, rq);
1648 wbt_requeue(q->rq_wb, rq); 1648 rq_qos_requeue(q, rq);
1649 1649
1650 if (rq->rq_flags & RQF_QUEUED) 1650 if (rq->rq_flags & RQF_QUEUED)
1651 blk_queue_end_tag(q, rq); 1651 blk_queue_end_tag(q, rq);
@@ -1752,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1752 /* this is a bio leak */ 1752 /* this is a bio leak */
1753 WARN_ON(req->bio != NULL); 1753 WARN_ON(req->bio != NULL);
1754 1754
1755 wbt_done(q->rq_wb, req); 1755 rq_qos_done(q, req);
1756 1756
1757 /* 1757 /*
1758 * Request may not have originated from ll_rw_blk. if not, 1758 * Request may not have originated from ll_rw_blk. if not,
@@ -2044,7 +2044,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
2044 } 2044 }
2045 2045
2046get_rq: 2046get_rq:
2047 wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock); 2047 wb_acct = rq_qos_throttle(q, bio, q->queue_lock);
2048 2048
2049 /* 2049 /*
2050 * Grab a free request. This is might sleep but can not fail. 2050 * Grab a free request. This is might sleep but can not fail.
@@ -2054,7 +2054,7 @@ get_rq:
2054 req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); 2054 req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
2055 if (IS_ERR(req)) { 2055 if (IS_ERR(req)) {
2056 blk_queue_exit(q); 2056 blk_queue_exit(q);
2057 __wbt_done(q->rq_wb, wb_acct); 2057 rq_qos_cleanup(q, wb_acct);
2058 if (PTR_ERR(req) == -ENOMEM) 2058 if (PTR_ERR(req) == -ENOMEM)
2059 bio->bi_status = BLK_STS_RESOURCE; 2059 bio->bi_status = BLK_STS_RESOURCE;
2060 else 2060 else
@@ -2983,7 +2983,7 @@ void blk_start_request(struct request *req)
2983 req->throtl_size = blk_rq_sectors(req); 2983 req->throtl_size = blk_rq_sectors(req);
2984#endif 2984#endif
2985 req->rq_flags |= RQF_STATS; 2985 req->rq_flags |= RQF_STATS;
2986 wbt_issue(req->q->rq_wb, req); 2986 rq_qos_issue(req->q, req);
2987 } 2987 }
2988 2988
2989 BUG_ON(blk_rq_is_complete(req)); 2989 BUG_ON(blk_rq_is_complete(req));
@@ -3207,7 +3207,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
3207 blk_account_io_done(req, now); 3207 blk_account_io_done(req, now);
3208 3208
3209 if (req->end_io) { 3209 if (req->end_io) {
3210 wbt_done(req->q->rq_wb, req); 3210 rq_qos_done(q, req);
3211 req->end_io(req, error); 3211 req->end_io(req, error);
3212 } else { 3212 } else {
3213 if (blk_bidi_rq(req)) 3213 if (blk_bidi_rq(req))
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 850fdd02c385..ea2a226457fa 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -504,7 +504,7 @@ void blk_mq_free_request(struct request *rq)
504 if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) 504 if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
505 laptop_io_completion(q->backing_dev_info); 505 laptop_io_completion(q->backing_dev_info);
506 506
507 wbt_done(q->rq_wb, rq); 507 rq_qos_done(q, rq);
508 508
509 if (blk_rq_rl(rq)) 509 if (blk_rq_rl(rq))
510 blk_put_rl(blk_rq_rl(rq)); 510 blk_put_rl(blk_rq_rl(rq));
@@ -527,7 +527,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
527 blk_account_io_done(rq, now); 527 blk_account_io_done(rq, now);
528 528
529 if (rq->end_io) { 529 if (rq->end_io) {
530 wbt_done(rq->q->rq_wb, rq); 530 rq_qos_done(rq->q, rq);
531 rq->end_io(rq, error); 531 rq->end_io(rq, error);
532 } else { 532 } else {
533 if (unlikely(blk_bidi_rq(rq))) 533 if (unlikely(blk_bidi_rq(rq)))
@@ -641,7 +641,7 @@ void blk_mq_start_request(struct request *rq)
641 rq->throtl_size = blk_rq_sectors(rq); 641 rq->throtl_size = blk_rq_sectors(rq);
642#endif 642#endif
643 rq->rq_flags |= RQF_STATS; 643 rq->rq_flags |= RQF_STATS;
644 wbt_issue(q->rq_wb, rq); 644 rq_qos_issue(q, rq);
645 } 645 }
646 646
647 WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); 647 WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
@@ -667,7 +667,7 @@ static void __blk_mq_requeue_request(struct request *rq)
667 blk_mq_put_driver_tag(rq); 667 blk_mq_put_driver_tag(rq);
668 668
669 trace_block_rq_requeue(q, rq); 669 trace_block_rq_requeue(q, rq);
670 wbt_requeue(q->rq_wb, rq); 670 rq_qos_requeue(q, rq);
671 671
672 if (blk_mq_request_started(rq)) { 672 if (blk_mq_request_started(rq)) {
673 WRITE_ONCE(rq->state, MQ_RQ_IDLE); 673 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
@@ -1806,13 +1806,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1806 if (blk_mq_sched_bio_merge(q, bio)) 1806 if (blk_mq_sched_bio_merge(q, bio))
1807 return BLK_QC_T_NONE; 1807 return BLK_QC_T_NONE;
1808 1808
1809 wb_acct = wbt_wait(q->rq_wb, bio, NULL); 1809 wb_acct = rq_qos_throttle(q, bio, NULL);
1810 1810
1811 trace_block_getrq(q, bio, bio->bi_opf); 1811 trace_block_getrq(q, bio, bio->bi_opf);
1812 1812
1813 rq = blk_mq_get_request(q, bio, bio->bi_opf, &data); 1813 rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
1814 if (unlikely(!rq)) { 1814 if (unlikely(!rq)) {
1815 __wbt_done(q->rq_wb, wb_acct); 1815 rq_qos_cleanup(q, wb_acct);
1816 if (bio->bi_opf & REQ_NOWAIT) 1816 if (bio->bi_opf & REQ_NOWAIT)
1817 bio_wouldblock_error(bio); 1817 bio_wouldblock_error(bio);
1818 return BLK_QC_T_NONE; 1818 return BLK_QC_T_NONE;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
new file mode 100644
index 000000000000..d2f2af8aa10c
--- /dev/null
+++ b/block/blk-rq-qos.c
@@ -0,0 +1,178 @@
1#include "blk-rq-qos.h"
2
3#include "blk-wbt.h"
4
5/*
6 * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
7 * false if 'v' + 1 would be bigger than 'below'.
8 */
9static bool atomic_inc_below(atomic_t *v, int below)
10{
11 int cur = atomic_read(v);
12
13 for (;;) {
14 int old;
15
16 if (cur >= below)
17 return false;
18 old = atomic_cmpxchg(v, cur, cur + 1);
19 if (old == cur)
20 break;
21 cur = old;
22 }
23
24 return true;
25}
26
27bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit)
28{
29 return atomic_inc_below(&rq_wait->inflight, limit);
30}
31
32void rq_qos_cleanup(struct request_queue *q, enum wbt_flags wb_acct)
33{
34 struct rq_qos *rqos;
35
36 for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
37 if (rqos->ops->cleanup)
38 rqos->ops->cleanup(rqos, wb_acct);
39 }
40}
41
42void rq_qos_done(struct request_queue *q, struct request *rq)
43{
44 struct rq_qos *rqos;
45
46 for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
47 if (rqos->ops->done)
48 rqos->ops->done(rqos, rq);
49 }
50}
51
52void rq_qos_issue(struct request_queue *q, struct request *rq)
53{
54 struct rq_qos *rqos;
55
56 for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
57 if (rqos->ops->issue)
58 rqos->ops->issue(rqos, rq);
59 }
60}
61
62void rq_qos_requeue(struct request_queue *q, struct request *rq)
63{
64 struct rq_qos *rqos;
65
66 for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
67 if (rqos->ops->requeue)
68 rqos->ops->requeue(rqos, rq);
69 }
70}
71
72enum wbt_flags rq_qos_throttle(struct request_queue *q, struct bio *bio,
73 spinlock_t *lock)
74{
75 struct rq_qos *rqos;
76 enum wbt_flags flags = 0;
77
78 for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
79 if (rqos->ops->throttle)
80 flags |= rqos->ops->throttle(rqos, bio, lock);
81 }
82 return flags;
83}
84
85/*
86 * Return true, if we can't increase the depth further by scaling
87 */
88bool rq_depth_calc_max_depth(struct rq_depth *rqd)
89{
90 unsigned int depth;
91 bool ret = false;
92
93 /*
94 * For QD=1 devices, this is a special case. It's important for those
95 * to have one request ready when one completes, so force a depth of
96 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
97 * since the device can't have more than that in flight. If we're
98 * scaling down, then keep a setting of 1/1/1.
99 */
100 if (rqd->queue_depth == 1) {
101 if (rqd->scale_step > 0)
102 rqd->max_depth = 1;
103 else {
104 rqd->max_depth = 2;
105 ret = true;
106 }
107 } else {
108 /*
109 * scale_step == 0 is our default state. If we have suffered
110 * latency spikes, step will be > 0, and we shrink the
111 * allowed write depths. If step is < 0, we're only doing
112 * writes, and we allow a temporarily higher depth to
113 * increase performance.
114 */
115 depth = min_t(unsigned int, rqd->default_depth,
116 rqd->queue_depth);
117 if (rqd->scale_step > 0)
118 depth = 1 + ((depth - 1) >> min(31, rqd->scale_step));
119 else if (rqd->scale_step < 0) {
120 unsigned int maxd = 3 * rqd->queue_depth / 4;
121
122 depth = 1 + ((depth - 1) << -rqd->scale_step);
123 if (depth > maxd) {
124 depth = maxd;
125 ret = true;
126 }
127 }
128
129 rqd->max_depth = depth;
130 }
131
132 return ret;
133}
134
135void rq_depth_scale_up(struct rq_depth *rqd)
136{
137 /*
138 * Hit max in previous round, stop here
139 */
140 if (rqd->scaled_max)
141 return;
142
143 rqd->scale_step--;
144
145 rqd->scaled_max = rq_depth_calc_max_depth(rqd);
146}
147
148/*
149 * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
150 * had a latency violation.
151 */
152void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
153{
154 /*
155 * Stop scaling down when we've hit the limit. This also prevents
156 * ->scale_step from going to crazy values, if the device can't
157 * keep up.
158 */
159 if (rqd->max_depth == 1)
160 return;
161
162 if (rqd->scale_step < 0 && hard_throttle)
163 rqd->scale_step = 0;
164 else
165 rqd->scale_step++;
166
167 rqd->scaled_max = false;
168 rq_depth_calc_max_depth(rqd);
169}
170
171void rq_qos_exit(struct request_queue *q)
172{
173 while (q->rq_qos) {
174 struct rq_qos *rqos = q->rq_qos;
175 q->rq_qos = rqos->next;
176 rqos->ops->exit(rqos);
177 }
178}
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
new file mode 100644
index 000000000000..f9a39bd6ece3
--- /dev/null
+++ b/block/blk-rq-qos.h
@@ -0,0 +1,106 @@
1#ifndef RQ_QOS_H
2#define RQ_QOS_H
3
4#include <linux/kernel.h>
5#include <linux/blkdev.h>
6#include <linux/blk_types.h>
7#include <linux/atomic.h>
8#include <linux/wait.h>
9
10enum rq_qos_id {
11 RQ_QOS_WBT,
12 RQ_QOS_CGROUP,
13};
14
15struct rq_wait {
16 wait_queue_head_t wait;
17 atomic_t inflight;
18};
19
20struct rq_qos {
21 struct rq_qos_ops *ops;
22 struct request_queue *q;
23 enum rq_qos_id id;
24 struct rq_qos *next;
25};
26
27struct rq_qos_ops {
28 enum wbt_flags (*throttle)(struct rq_qos *, struct bio *,
29 spinlock_t *);
30 void (*issue)(struct rq_qos *, struct request *);
31 void (*requeue)(struct rq_qos *, struct request *);
32 void (*done)(struct rq_qos *, struct request *);
33 void (*cleanup)(struct rq_qos *, enum wbt_flags);
34 void (*exit)(struct rq_qos *);
35};
36
37struct rq_depth {
38 unsigned int max_depth;
39
40 int scale_step;
41 bool scaled_max;
42
43 unsigned int queue_depth;
44 unsigned int default_depth;
45};
46
47static inline struct rq_qos *rq_qos_id(struct request_queue *q,
48 enum rq_qos_id id)
49{
50 struct rq_qos *rqos;
51 for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
52 if (rqos->id == id)
53 break;
54 }
55 return rqos;
56}
57
58static inline struct rq_qos *wbt_rq_qos(struct request_queue *q)
59{
60 return rq_qos_id(q, RQ_QOS_WBT);
61}
62
63static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
64{
65 return rq_qos_id(q, RQ_QOS_CGROUP);
66}
67
68static inline void rq_wait_init(struct rq_wait *rq_wait)
69{
70 atomic_set(&rq_wait->inflight, 0);
71 init_waitqueue_head(&rq_wait->wait);
72}
73
74static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
75{
76 rqos->next = q->rq_qos;
77 q->rq_qos = rqos;
78}
79
80static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
81{
82 struct rq_qos *cur, *prev = NULL;
83 for (cur = q->rq_qos; cur; cur = cur->next) {
84 if (cur == rqos) {
85 if (prev)
86 prev->next = rqos->next;
87 else
88 q->rq_qos = cur;
89 break;
90 }
91 prev = cur;
92 }
93}
94
95bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit);
96void rq_depth_scale_up(struct rq_depth *rqd);
97void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
98bool rq_depth_calc_max_depth(struct rq_depth *rqd);
99
100void rq_qos_cleanup(struct request_queue *, enum wbt_flags);
101void rq_qos_done(struct request_queue *, struct request *);
102void rq_qos_issue(struct request_queue *, struct request *);
103void rq_qos_requeue(struct request_queue *, struct request *);
104enum wbt_flags rq_qos_throttle(struct request_queue *, struct bio *, spinlock_t *);
105void rq_qos_exit(struct request_queue *);
106#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d1de71124656..053de87d1fda 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -875,7 +875,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
875void blk_set_queue_depth(struct request_queue *q, unsigned int depth) 875void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
876{ 876{
877 q->queue_depth = depth; 877 q->queue_depth = depth;
878 wbt_set_queue_depth(q->rq_wb, depth); 878 wbt_set_queue_depth(q, depth);
879} 879}
880EXPORT_SYMBOL(blk_set_queue_depth); 880EXPORT_SYMBOL(blk_set_queue_depth);
881 881
@@ -900,7 +900,7 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
900 queue_flag_clear(QUEUE_FLAG_FUA, q); 900 queue_flag_clear(QUEUE_FLAG_FUA, q);
901 spin_unlock_irq(q->queue_lock); 901 spin_unlock_irq(q->queue_lock);
902 902
903 wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); 903 wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
904} 904}
905EXPORT_SYMBOL_GPL(blk_queue_write_cache); 905EXPORT_SYMBOL_GPL(blk_queue_write_cache);
906 906
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 94987b1f69e1..49c29a5d06bb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -422,16 +422,16 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
422 422
423static ssize_t queue_wb_lat_show(struct request_queue *q, char *page) 423static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
424{ 424{
425 if (!q->rq_wb) 425 if (!wbt_rq_qos(q))
426 return -EINVAL; 426 return -EINVAL;
427 427
428 return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000)); 428 return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
429} 429}
430 430
431static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, 431static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
432 size_t count) 432 size_t count)
433{ 433{
434 struct rq_wb *rwb; 434 struct rq_qos *rqos;
435 ssize_t ret; 435 ssize_t ret;
436 s64 val; 436 s64 val;
437 437
@@ -441,23 +441,21 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
441 if (val < -1) 441 if (val < -1)
442 return -EINVAL; 442 return -EINVAL;
443 443
444 rwb = q->rq_wb; 444 rqos = wbt_rq_qos(q);
445 if (!rwb) { 445 if (!rqos) {
446 ret = wbt_init(q); 446 ret = wbt_init(q);
447 if (ret) 447 if (ret)
448 return ret; 448 return ret;
449 } 449 }
450 450
451 rwb = q->rq_wb;
452 if (val == -1) 451 if (val == -1)
453 rwb->min_lat_nsec = wbt_default_latency_nsec(q); 452 val = wbt_default_latency_nsec(q);
454 else if (val >= 0) 453 else if (val >= 0)
455 rwb->min_lat_nsec = val * 1000ULL; 454 val *= 1000ULL;
456 455
457 if (rwb->enable_state == WBT_STATE_ON_DEFAULT) 456 wbt_set_min_lat(q, val);
458 rwb->enable_state = WBT_STATE_ON_MANUAL;
459 457
460 wbt_update_limits(rwb); 458 wbt_update_limits(q);
461 return count; 459 return count;
462} 460}
463 461
@@ -964,7 +962,7 @@ void blk_unregister_queue(struct gendisk *disk)
964 kobject_del(&q->kobj); 962 kobject_del(&q->kobj);
965 blk_trace_remove_sysfs(disk_to_dev(disk)); 963 blk_trace_remove_sysfs(disk_to_dev(disk));
966 964
967 wbt_exit(q); 965 rq_qos_exit(q);
968 966
969 mutex_lock(&q->sysfs_lock); 967 mutex_lock(&q->sysfs_lock);
970 if (q->request_fn || (q->mq_ops && q->elevator)) 968 if (q->request_fn || (q->mq_ops && q->elevator))
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 4f89b28fa652..6fe20fb823e4 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -25,6 +25,7 @@
25#include <linux/swap.h> 25#include <linux/swap.h>
26 26
27#include "blk-wbt.h" 27#include "blk-wbt.h"
28#include "blk-rq-qos.h"
28 29
29#define CREATE_TRACE_POINTS 30#define CREATE_TRACE_POINTS
30#include <trace/events/wbt.h> 31#include <trace/events/wbt.h>
@@ -78,28 +79,6 @@ static inline bool rwb_enabled(struct rq_wb *rwb)
78 return rwb && rwb->wb_normal != 0; 79 return rwb && rwb->wb_normal != 0;
79} 80}
80 81
81/*
82 * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
83 * false if 'v' + 1 would be bigger than 'below'.
84 */
85static bool atomic_inc_below(atomic_t *v, int below)
86{
87 int cur = atomic_read(v);
88
89 for (;;) {
90 int old;
91
92 if (cur >= below)
93 return false;
94 old = atomic_cmpxchg(v, cur, cur + 1);
95 if (old == cur)
96 break;
97 cur = old;
98 }
99
100 return true;
101}
102
103static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) 82static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
104{ 83{
105 if (rwb_enabled(rwb)) { 84 if (rwb_enabled(rwb)) {
@@ -116,7 +95,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
116 */ 95 */
117static bool wb_recent_wait(struct rq_wb *rwb) 96static bool wb_recent_wait(struct rq_wb *rwb)
118{ 97{
119 struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb; 98 struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
120 99
121 return time_before(jiffies, wb->dirty_sleep + HZ); 100 return time_before(jiffies, wb->dirty_sleep + HZ);
122} 101}
@@ -144,8 +123,9 @@ static void rwb_wake_all(struct rq_wb *rwb)
144 } 123 }
145} 124}
146 125
147void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) 126static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
148{ 127{
128 struct rq_wb *rwb = RQWB(rqos);
149 struct rq_wait *rqw; 129 struct rq_wait *rqw;
150 int inflight, limit; 130 int inflight, limit;
151 131
@@ -194,10 +174,9 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
194 * Called on completion of a request. Note that it's also called when 174 * Called on completion of a request. Note that it's also called when
195 * a request is merged, when the request gets freed. 175 * a request is merged, when the request gets freed.
196 */ 176 */
197void wbt_done(struct rq_wb *rwb, struct request *rq) 177static void wbt_done(struct rq_qos *rqos, struct request *rq)
198{ 178{
199 if (!rwb) 179 struct rq_wb *rwb = RQWB(rqos);
200 return;
201 180
202 if (!wbt_is_tracked(rq)) { 181 if (!wbt_is_tracked(rq)) {
203 if (rwb->sync_cookie == rq) { 182 if (rwb->sync_cookie == rq) {
@@ -209,72 +188,11 @@ void wbt_done(struct rq_wb *rwb, struct request *rq)
209 wb_timestamp(rwb, &rwb->last_comp); 188 wb_timestamp(rwb, &rwb->last_comp);
210 } else { 189 } else {
211 WARN_ON_ONCE(rq == rwb->sync_cookie); 190 WARN_ON_ONCE(rq == rwb->sync_cookie);
212 __wbt_done(rwb, wbt_flags(rq)); 191 __wbt_done(rqos, wbt_flags(rq));
213 } 192 }
214 wbt_clear_state(rq); 193 wbt_clear_state(rq);
215} 194}
216 195
217/*
218 * Return true, if we can't increase the depth further by scaling
219 */
220static bool calc_wb_limits(struct rq_wb *rwb)
221{
222 unsigned int depth;
223 bool ret = false;
224
225 if (!rwb->min_lat_nsec) {
226 rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
227 return false;
228 }
229
230 /*
231 * For QD=1 devices, this is a special case. It's important for those
232 * to have one request ready when one completes, so force a depth of
233 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
234 * since the device can't have more than that in flight. If we're
235 * scaling down, then keep a setting of 1/1/1.
236 */
237 if (rwb->queue_depth == 1) {
238 if (rwb->scale_step > 0)
239 rwb->wb_max = rwb->wb_normal = 1;
240 else {
241 rwb->wb_max = rwb->wb_normal = 2;
242 ret = true;
243 }
244 rwb->wb_background = 1;
245 } else {
246 /*
247 * scale_step == 0 is our default state. If we have suffered
248 * latency spikes, step will be > 0, and we shrink the
249 * allowed write depths. If step is < 0, we're only doing
250 * writes, and we allow a temporarily higher depth to
251 * increase performance.
252 */
253 depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth);
254 if (rwb->scale_step > 0)
255 depth = 1 + ((depth - 1) >> min(31, rwb->scale_step));
256 else if (rwb->scale_step < 0) {
257 unsigned int maxd = 3 * rwb->queue_depth / 4;
258
259 depth = 1 + ((depth - 1) << -rwb->scale_step);
260 if (depth > maxd) {
261 depth = maxd;
262 ret = true;
263 }
264 }
265
266 /*
267 * Set our max/normal/bg queue depths based on how far
268 * we have scaled down (->scale_step).
269 */
270 rwb->wb_max = depth;
271 rwb->wb_normal = (rwb->wb_max + 1) / 2;
272 rwb->wb_background = (rwb->wb_max + 3) / 4;
273 }
274
275 return ret;
276}
277
278static inline bool stat_sample_valid(struct blk_rq_stat *stat) 196static inline bool stat_sample_valid(struct blk_rq_stat *stat)
279{ 197{
280 /* 198 /*
@@ -307,7 +225,8 @@ enum {
307 225
308static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) 226static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
309{ 227{
310 struct backing_dev_info *bdi = rwb->queue->backing_dev_info; 228 struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
229 struct rq_depth *rqd = &rwb->rq_depth;
311 u64 thislat; 230 u64 thislat;
312 231
313 /* 232 /*
@@ -351,7 +270,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
351 return LAT_EXCEEDED; 270 return LAT_EXCEEDED;
352 } 271 }
353 272
354 if (rwb->scale_step) 273 if (rqd->scale_step)
355 trace_wbt_stat(bdi, stat); 274 trace_wbt_stat(bdi, stat);
356 275
357 return LAT_OK; 276 return LAT_OK;
@@ -359,58 +278,48 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
359 278
360static void rwb_trace_step(struct rq_wb *rwb, const char *msg) 279static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
361{ 280{
362 struct backing_dev_info *bdi = rwb->queue->backing_dev_info; 281 struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
282 struct rq_depth *rqd = &rwb->rq_depth;
363 283
364 trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec, 284 trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
365 rwb->wb_background, rwb->wb_normal, rwb->wb_max); 285 rwb->wb_background, rwb->wb_normal, rqd->max_depth);
366} 286}
367 287
368static void scale_up(struct rq_wb *rwb) 288static void calc_wb_limits(struct rq_wb *rwb)
369{ 289{
370 /* 290 if (rwb->min_lat_nsec == 0) {
371 * Hit max in previous round, stop here 291 rwb->wb_normal = rwb->wb_background = 0;
372 */ 292 } else if (rwb->rq_depth.max_depth <= 2) {
373 if (rwb->scaled_max) 293 rwb->wb_normal = rwb->rq_depth.max_depth;
374 return; 294 rwb->wb_background = 1;
295 } else {
296 rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
297 rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
298 }
299}
375 300
376 rwb->scale_step--; 301static void scale_up(struct rq_wb *rwb)
302{
303 rq_depth_scale_up(&rwb->rq_depth);
304 calc_wb_limits(rwb);
377 rwb->unknown_cnt = 0; 305 rwb->unknown_cnt = 0;
378 306 rwb_trace_step(rwb, "scale up");
379 rwb->scaled_max = calc_wb_limits(rwb);
380
381 rwb_wake_all(rwb);
382
383 rwb_trace_step(rwb, "step up");
384} 307}
385 308
386/*
387 * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
388 * had a latency violation.
389 */
390static void scale_down(struct rq_wb *rwb, bool hard_throttle) 309static void scale_down(struct rq_wb *rwb, bool hard_throttle)
391{ 310{
392 /* 311 rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
393 * Stop scaling down when we've hit the limit. This also prevents
394 * ->scale_step from going to crazy values, if the device can't
395 * keep up.
396 */
397 if (rwb->wb_max == 1)
398 return;
399
400 if (rwb->scale_step < 0 && hard_throttle)
401 rwb->scale_step = 0;
402 else
403 rwb->scale_step++;
404
405 rwb->scaled_max = false;
406 rwb->unknown_cnt = 0;
407 calc_wb_limits(rwb); 312 calc_wb_limits(rwb);
408 rwb_trace_step(rwb, "step down"); 313 rwb->unknown_cnt = 0;
314 rwb_wake_all(rwb);
315 rwb_trace_step(rwb, "scale down");
409} 316}
410 317
411static void rwb_arm_timer(struct rq_wb *rwb) 318static void rwb_arm_timer(struct rq_wb *rwb)
412{ 319{
413 if (rwb->scale_step > 0) { 320 struct rq_depth *rqd = &rwb->rq_depth;
321
322 if (rqd->scale_step > 0) {
414 /* 323 /*
415 * We should speed this up, using some variant of a fast 324 * We should speed this up, using some variant of a fast
416 * integer inverse square root calculation. Since we only do 325 * integer inverse square root calculation. Since we only do
@@ -418,7 +327,7 @@ static void rwb_arm_timer(struct rq_wb *rwb)
418 * though. 327 * though.
419 */ 328 */
420 rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, 329 rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
421 int_sqrt((rwb->scale_step + 1) << 8)); 330 int_sqrt((rqd->scale_step + 1) << 8));
422 } else { 331 } else {
423 /* 332 /*
424 * For step < 0, we don't want to increase/decrease the 333 * For step < 0, we don't want to increase/decrease the
@@ -433,12 +342,13 @@ static void rwb_arm_timer(struct rq_wb *rwb)
433static void wb_timer_fn(struct blk_stat_callback *cb) 342static void wb_timer_fn(struct blk_stat_callback *cb)
434{ 343{
435 struct rq_wb *rwb = cb->data; 344 struct rq_wb *rwb = cb->data;
345 struct rq_depth *rqd = &rwb->rq_depth;
436 unsigned int inflight = wbt_inflight(rwb); 346 unsigned int inflight = wbt_inflight(rwb);
437 int status; 347 int status;
438 348
439 status = latency_exceeded(rwb, cb->stat); 349 status = latency_exceeded(rwb, cb->stat);
440 350
441 trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step, 351 trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
442 inflight); 352 inflight);
443 353
444 /* 354 /*
@@ -469,9 +379,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
469 * currently don't have a valid read/write sample. For that 379 * currently don't have a valid read/write sample. For that
470 * case, slowly return to center state (step == 0). 380 * case, slowly return to center state (step == 0).
471 */ 381 */
472 if (rwb->scale_step > 0) 382 if (rqd->scale_step > 0)
473 scale_up(rwb); 383 scale_up(rwb);
474 else if (rwb->scale_step < 0) 384 else if (rqd->scale_step < 0)
475 scale_down(rwb, false); 385 scale_down(rwb, false);
476 break; 386 break;
477 default: 387 default:
@@ -481,19 +391,50 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
481 /* 391 /*
482 * Re-arm timer, if we have IO in flight 392 * Re-arm timer, if we have IO in flight
483 */ 393 */
484 if (rwb->scale_step || inflight) 394 if (rqd->scale_step || inflight)
485 rwb_arm_timer(rwb); 395 rwb_arm_timer(rwb);
486} 396}
487 397
488void wbt_update_limits(struct rq_wb *rwb) 398static void __wbt_update_limits(struct rq_wb *rwb)
489{ 399{
490 rwb->scale_step = 0; 400 struct rq_depth *rqd = &rwb->rq_depth;
491 rwb->scaled_max = false; 401
402 rqd->scale_step = 0;
403 rqd->scaled_max = false;
404
405 rq_depth_calc_max_depth(rqd);
492 calc_wb_limits(rwb); 406 calc_wb_limits(rwb);
493 407
494 rwb_wake_all(rwb); 408 rwb_wake_all(rwb);
495} 409}
496 410
411void wbt_update_limits(struct request_queue *q)
412{
413 struct rq_qos *rqos = wbt_rq_qos(q);
414 if (!rqos)
415 return;
416 __wbt_update_limits(RQWB(rqos));
417}
418
419u64 wbt_get_min_lat(struct request_queue *q)
420{
421 struct rq_qos *rqos = wbt_rq_qos(q);
422 if (!rqos)
423 return 0;
424 return RQWB(rqos)->min_lat_nsec;
425}
426
427void wbt_set_min_lat(struct request_queue *q, u64 val)
428{
429 struct rq_qos *rqos = wbt_rq_qos(q);
430 if (!rqos)
431 return;
432 RQWB(rqos)->min_lat_nsec = val;
433 RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
434 __wbt_update_limits(RQWB(rqos));
435}
436
437
497static bool close_io(struct rq_wb *rwb) 438static bool close_io(struct rq_wb *rwb)
498{ 439{
499 const unsigned long now = jiffies; 440 const unsigned long now = jiffies;
@@ -520,7 +461,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
520 * IO for a bit. 461 * IO for a bit.
521 */ 462 */
522 if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) 463 if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
523 limit = rwb->wb_max; 464 limit = rwb->rq_depth.max_depth;
524 else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { 465 else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
525 /* 466 /*
526 * If less than 100ms since we completed unrelated IO, 467 * If less than 100ms since we completed unrelated IO,
@@ -554,7 +495,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
554 rqw->wait.head.next != &wait->entry) 495 rqw->wait.head.next != &wait->entry)
555 return false; 496 return false;
556 497
557 return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw)); 498 return rq_wait_inc_below(rqw, get_limit(rwb, rw));
558} 499}
559 500
560/* 501/*
@@ -614,8 +555,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
614 * in an irq held spinlock, if it holds one when calling this function. 555 * in an irq held spinlock, if it holds one when calling this function.
615 * If we do sleep, we'll release and re-grab it. 556 * If we do sleep, we'll release and re-grab it.
616 */ 557 */
617enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) 558static enum wbt_flags wbt_wait(struct rq_qos *rqos, struct bio *bio,
559 spinlock_t *lock)
618{ 560{
561 struct rq_wb *rwb = RQWB(rqos);
619 enum wbt_flags ret = 0; 562 enum wbt_flags ret = 0;
620 563
621 if (!rwb_enabled(rwb)) 564 if (!rwb_enabled(rwb))
@@ -643,8 +586,10 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
643 return ret | WBT_TRACKED; 586 return ret | WBT_TRACKED;
644} 587}
645 588
646void wbt_issue(struct rq_wb *rwb, struct request *rq) 589void wbt_issue(struct rq_qos *rqos, struct request *rq)
647{ 590{
591 struct rq_wb *rwb = RQWB(rqos);
592
648 if (!rwb_enabled(rwb)) 593 if (!rwb_enabled(rwb))
649 return; 594 return;
650 595
@@ -661,8 +606,9 @@ void wbt_issue(struct rq_wb *rwb, struct request *rq)
661 } 606 }
662} 607}
663 608
664void wbt_requeue(struct rq_wb *rwb, struct request *rq) 609void wbt_requeue(struct rq_qos *rqos, struct request *rq)
665{ 610{
611 struct rq_wb *rwb = RQWB(rqos);
666 if (!rwb_enabled(rwb)) 612 if (!rwb_enabled(rwb))
667 return; 613 return;
668 if (rq == rwb->sync_cookie) { 614 if (rq == rwb->sync_cookie) {
@@ -671,39 +617,30 @@ void wbt_requeue(struct rq_wb *rwb, struct request *rq)
671 } 617 }
672} 618}
673 619
674void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth) 620void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
675{ 621{
676 if (rwb) { 622 struct rq_qos *rqos = wbt_rq_qos(q);
677 rwb->queue_depth = depth; 623 if (rqos) {
678 wbt_update_limits(rwb); 624 RQWB(rqos)->rq_depth.queue_depth = depth;
625 __wbt_update_limits(RQWB(rqos));
679 } 626 }
680} 627}
681 628
682void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on) 629void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
683{
684 if (rwb)
685 rwb->wc = write_cache_on;
686}
687
688/*
689 * Disable wbt, if enabled by default.
690 */
691void wbt_disable_default(struct request_queue *q)
692{ 630{
693 struct rq_wb *rwb = q->rq_wb; 631 struct rq_qos *rqos = wbt_rq_qos(q);
694 632 if (rqos)
695 if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT) 633 RQWB(rqos)->wc = write_cache_on;
696 wbt_exit(q);
697} 634}
698EXPORT_SYMBOL_GPL(wbt_disable_default);
699 635
700/* 636/*
701 * Enable wbt if defaults are configured that way 637 * Enable wbt if defaults are configured that way
702 */ 638 */
703void wbt_enable_default(struct request_queue *q) 639void wbt_enable_default(struct request_queue *q)
704{ 640{
641 struct rq_qos *rqos = wbt_rq_qos(q);
705 /* Throttling already enabled? */ 642 /* Throttling already enabled? */
706 if (q->rq_wb) 643 if (rqos)
707 return; 644 return;
708 645
709 /* Queue not registered? Maybe shutting down... */ 646 /* Queue not registered? Maybe shutting down... */
@@ -741,6 +678,41 @@ static int wbt_data_dir(const struct request *rq)
741 return -1; 678 return -1;
742} 679}
743 680
681static void wbt_exit(struct rq_qos *rqos)
682{
683 struct rq_wb *rwb = RQWB(rqos);
684 struct request_queue *q = rqos->q;
685
686 blk_stat_remove_callback(q, rwb->cb);
687 blk_stat_free_callback(rwb->cb);
688 kfree(rwb);
689}
690
691/*
692 * Disable wbt, if enabled by default.
693 */
694void wbt_disable_default(struct request_queue *q)
695{
696 struct rq_qos *rqos = wbt_rq_qos(q);
697 struct rq_wb *rwb;
698 if (!rqos)
699 return;
700 rwb = RQWB(rqos);
701 if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
702 rwb->wb_normal = 0;
703}
704EXPORT_SYMBOL_GPL(wbt_disable_default);
705
706
707static struct rq_qos_ops wbt_rqos_ops = {
708 .throttle = wbt_wait,
709 .issue = wbt_issue,
710 .requeue = wbt_requeue,
711 .done = wbt_done,
712 .cleanup = __wbt_done,
713 .exit = wbt_exit,
714};
715
744int wbt_init(struct request_queue *q) 716int wbt_init(struct request_queue *q)
745{ 717{
746 struct rq_wb *rwb; 718 struct rq_wb *rwb;
@@ -756,39 +728,29 @@ int wbt_init(struct request_queue *q)
756 return -ENOMEM; 728 return -ENOMEM;
757 } 729 }
758 730
759 for (i = 0; i < WBT_NUM_RWQ; i++) { 731 for (i = 0; i < WBT_NUM_RWQ; i++)
760 atomic_set(&rwb->rq_wait[i].inflight, 0); 732 rq_wait_init(&rwb->rq_wait[i]);
761 init_waitqueue_head(&rwb->rq_wait[i].wait);
762 }
763 733
734 rwb->rqos.id = RQ_QOS_WBT;
735 rwb->rqos.ops = &wbt_rqos_ops;
736 rwb->rqos.q = q;
764 rwb->last_comp = rwb->last_issue = jiffies; 737 rwb->last_comp = rwb->last_issue = jiffies;
765 rwb->queue = q;
766 rwb->win_nsec = RWB_WINDOW_NSEC; 738 rwb->win_nsec = RWB_WINDOW_NSEC;
767 rwb->enable_state = WBT_STATE_ON_DEFAULT; 739 rwb->enable_state = WBT_STATE_ON_DEFAULT;
768 wbt_update_limits(rwb); 740 rwb->wc = 1;
741 rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
742 __wbt_update_limits(rwb);
769 743
770 /* 744 /*
771 * Assign rwb and add the stats callback. 745 * Assign rwb and add the stats callback.
772 */ 746 */
773 q->rq_wb = rwb; 747 rq_qos_add(q, &rwb->rqos);
774 blk_stat_add_callback(q, rwb->cb); 748 blk_stat_add_callback(q, rwb->cb);
775 749
776 rwb->min_lat_nsec = wbt_default_latency_nsec(q); 750 rwb->min_lat_nsec = wbt_default_latency_nsec(q);
777 751
778 wbt_set_queue_depth(rwb, blk_queue_depth(q)); 752 wbt_set_queue_depth(q, blk_queue_depth(q));
779 wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); 753 wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
780 754
781 return 0; 755 return 0;
782} 756}
783
784void wbt_exit(struct request_queue *q)
785{
786 struct rq_wb *rwb = q->rq_wb;
787
788 if (rwb) {
789 blk_stat_remove_callback(q, rwb->cb);
790 blk_stat_free_callback(rwb->cb);
791 q->rq_wb = NULL;
792 kfree(rwb);
793 }
794}
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 300df531d0a6..53b20a58c0a2 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -9,6 +9,7 @@
9#include <linux/ktime.h> 9#include <linux/ktime.h>
10 10
11#include "blk-stat.h" 11#include "blk-stat.h"
12#include "blk-rq-qos.h"
12 13
13enum wbt_flags { 14enum wbt_flags {
14 WBT_TRACKED = 1, /* write, tracked for throttling */ 15 WBT_TRACKED = 1, /* write, tracked for throttling */
@@ -35,20 +36,12 @@ enum {
35 WBT_STATE_ON_MANUAL = 2, 36 WBT_STATE_ON_MANUAL = 2,
36}; 37};
37 38
38struct rq_wait {
39 wait_queue_head_t wait;
40 atomic_t inflight;
41};
42
43struct rq_wb { 39struct rq_wb {
44 /* 40 /*
45 * Settings that govern how we throttle 41 * Settings that govern how we throttle
46 */ 42 */
47 unsigned int wb_background; /* background writeback */ 43 unsigned int wb_background; /* background writeback */
48 unsigned int wb_normal; /* normal writeback */ 44 unsigned int wb_normal; /* normal writeback */
49 unsigned int wb_max; /* max throughput writeback */
50 int scale_step;
51 bool scaled_max;
52 45
53 short enable_state; /* WBT_STATE_* */ 46 short enable_state; /* WBT_STATE_* */
54 47
@@ -67,15 +60,20 @@ struct rq_wb {
67 void *sync_cookie; 60 void *sync_cookie;
68 61
69 unsigned int wc; 62 unsigned int wc;
70 unsigned int queue_depth;
71 63
72 unsigned long last_issue; /* last non-throttled issue */ 64 unsigned long last_issue; /* last non-throttled issue */
73 unsigned long last_comp; /* last non-throttled comp */ 65 unsigned long last_comp; /* last non-throttled comp */
74 unsigned long min_lat_nsec; 66 unsigned long min_lat_nsec;
75 struct request_queue *queue; 67 struct rq_qos rqos;
76 struct rq_wait rq_wait[WBT_NUM_RWQ]; 68 struct rq_wait rq_wait[WBT_NUM_RWQ];
69 struct rq_depth rq_depth;
77}; 70};
78 71
72static inline struct rq_wb *RQWB(struct rq_qos *rqos)
73{
74 return container_of(rqos, struct rq_wb, rqos);
75}
76
79static inline unsigned int wbt_inflight(struct rq_wb *rwb) 77static inline unsigned int wbt_inflight(struct rq_wb *rwb)
80{ 78{
81 unsigned int i, ret = 0; 79 unsigned int i, ret = 0;
@@ -86,6 +84,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
86 return ret; 84 return ret;
87} 85}
88 86
87
89#ifdef CONFIG_BLK_WBT 88#ifdef CONFIG_BLK_WBT
90 89
91static inline void wbt_track(struct request *rq, enum wbt_flags flags) 90static inline void wbt_track(struct request *rq, enum wbt_flags flags)
@@ -93,19 +92,16 @@ static inline void wbt_track(struct request *rq, enum wbt_flags flags)
93 rq->wbt_flags |= flags; 92 rq->wbt_flags |= flags;
94} 93}
95 94
96void __wbt_done(struct rq_wb *, enum wbt_flags);
97void wbt_done(struct rq_wb *, struct request *);
98enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
99int wbt_init(struct request_queue *); 95int wbt_init(struct request_queue *);
100void wbt_exit(struct request_queue *); 96void wbt_update_limits(struct request_queue *);
101void wbt_update_limits(struct rq_wb *);
102void wbt_requeue(struct rq_wb *, struct request *);
103void wbt_issue(struct rq_wb *, struct request *);
104void wbt_disable_default(struct request_queue *); 97void wbt_disable_default(struct request_queue *);
105void wbt_enable_default(struct request_queue *); 98void wbt_enable_default(struct request_queue *);
106 99
107void wbt_set_queue_depth(struct rq_wb *, unsigned int); 100u64 wbt_get_min_lat(struct request_queue *q);
108void wbt_set_write_cache(struct rq_wb *, bool); 101void wbt_set_min_lat(struct request_queue *q, u64 val);
102
103void wbt_set_queue_depth(struct request_queue *, unsigned int);
104void wbt_set_write_cache(struct request_queue *, bool);
109 105
110u64 wbt_default_latency_nsec(struct request_queue *); 106u64 wbt_default_latency_nsec(struct request_queue *);
111 107
@@ -114,43 +110,30 @@ u64 wbt_default_latency_nsec(struct request_queue *);
114static inline void wbt_track(struct request *rq, enum wbt_flags flags) 110static inline void wbt_track(struct request *rq, enum wbt_flags flags)
115{ 111{
116} 112}
117static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
118{
119}
120static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
121{
122}
123static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
124 spinlock_t *lock)
125{
126 return 0;
127}
128static inline int wbt_init(struct request_queue *q) 113static inline int wbt_init(struct request_queue *q)
129{ 114{
130 return -EINVAL; 115 return -EINVAL;
131} 116}
132static inline void wbt_exit(struct request_queue *q) 117static inline void wbt_update_limits(struct request_queue *q)
133{
134}
135static inline void wbt_update_limits(struct rq_wb *rwb)
136{ 118{
137} 119}
138static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq) 120static inline void wbt_disable_default(struct request_queue *q)
139{ 121{
140} 122}
141static inline void wbt_issue(struct rq_wb *rwb, struct request *rq) 123static inline void wbt_enable_default(struct request_queue *q)
142{ 124{
143} 125}
144static inline void wbt_disable_default(struct request_queue *q) 126static inline void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
145{ 127{
146} 128}
147static inline void wbt_enable_default(struct request_queue *q) 129static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
148{ 130{
149} 131}
150static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth) 132static inline u64 wbt_get_min_lat(struct request_queue *q)
151{ 133{
134 return 0;
152} 135}
153static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc) 136static inline void wbt_set_min_lat(struct request_queue *q, u64 val)
154{ 137{
155} 138}
156static inline u64 wbt_default_latency_nsec(struct request_queue *q) 139static inline u64 wbt_default_latency_nsec(struct request_queue *q)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9d05646d5059..137759862f07 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -42,7 +42,7 @@ struct bsg_job;
42struct blkcg_gq; 42struct blkcg_gq;
43struct blk_flush_queue; 43struct blk_flush_queue;
44struct pr_ops; 44struct pr_ops;
45struct rq_wb; 45struct rq_qos;
46struct blk_queue_stats; 46struct blk_queue_stats;
47struct blk_stat_callback; 47struct blk_stat_callback;
48 48
@@ -443,7 +443,7 @@ struct request_queue {
443 int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ 443 int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
444 444
445 struct blk_queue_stats *stats; 445 struct blk_queue_stats *stats;
446 struct rq_wb *rq_wb; 446 struct rq_qos *rq_qos;
447 447
448 /* 448 /*
449 * If blkcg is not used, @q->root_rl serves all requests. If blkcg 449 * If blkcg is not used, @q->root_rl serves all requests. If blkcg