diff options
author | Josef Bacik <jbacik@fb.com> | 2018-07-03 11:32:35 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-07-09 11:07:54 -0400 |
commit | a79050434b45959f397042080fd1d70ffa9bd9df (patch) | |
tree | d5689153d497925d326a8b7e9963f4c3f88685ea | |
parent | 2ecbf456352d0699f51b4c6d70ea5bf29766579c (diff) |
blk-rq-qos: refactor out common elements of blk-wbt
blkcg-qos is going to do essentially what wbt does, only on a cgroup
basis. Break out the common code that will be shared between blkcg-qos
and wbt into blk-rq-qos.* so they can both utilize the same
infrastructure.
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/blk-core.c | 12 | ||||
-rw-r--r-- | block/blk-mq.c | 12 | ||||
-rw-r--r-- | block/blk-rq-qos.c | 178 | ||||
-rw-r--r-- | block/blk-rq-qos.h | 106 | ||||
-rw-r--r-- | block/blk-settings.c | 4 | ||||
-rw-r--r-- | block/blk-sysfs.c | 22 | ||||
-rw-r--r-- | block/blk-wbt.c | 326 | ||||
-rw-r--r-- | block/blk-wbt.h | 63 | ||||
-rw-r--r-- | include/linux/blkdev.h | 4 |
10 files changed, 478 insertions, 251 deletions
diff --git a/block/Makefile b/block/Makefile index a8f94cdb75c3..57d0f47ab05f 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -9,7 +9,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | |||
9 | blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ | 9 | blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ |
10 | blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ | 10 | blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ |
11 | genhd.o partition-generic.o ioprio.o \ | 11 | genhd.o partition-generic.o ioprio.o \ |
12 | badblocks.o partitions/ | 12 | badblocks.o partitions/ blk-rq-qos.o |
13 | 13 | ||
14 | obj-$(CONFIG_BOUNCE) += bounce.o | 14 | obj-$(CONFIG_BOUNCE) += bounce.o |
15 | obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o | 15 | obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o |
diff --git a/block/blk-core.c b/block/blk-core.c index 2ff8e131a892..b33a73bcf2d0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1645,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) | |||
1645 | blk_delete_timer(rq); | 1645 | blk_delete_timer(rq); |
1646 | blk_clear_rq_complete(rq); | 1646 | blk_clear_rq_complete(rq); |
1647 | trace_block_rq_requeue(q, rq); | 1647 | trace_block_rq_requeue(q, rq); |
1648 | wbt_requeue(q->rq_wb, rq); | 1648 | rq_qos_requeue(q, rq); |
1649 | 1649 | ||
1650 | if (rq->rq_flags & RQF_QUEUED) | 1650 | if (rq->rq_flags & RQF_QUEUED) |
1651 | blk_queue_end_tag(q, rq); | 1651 | blk_queue_end_tag(q, rq); |
@@ -1752,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
1752 | /* this is a bio leak */ | 1752 | /* this is a bio leak */ |
1753 | WARN_ON(req->bio != NULL); | 1753 | WARN_ON(req->bio != NULL); |
1754 | 1754 | ||
1755 | wbt_done(q->rq_wb, req); | 1755 | rq_qos_done(q, req); |
1756 | 1756 | ||
1757 | /* | 1757 | /* |
1758 | * Request may not have originated from ll_rw_blk. if not, | 1758 | * Request may not have originated from ll_rw_blk. if not, |
@@ -2044,7 +2044,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) | |||
2044 | } | 2044 | } |
2045 | 2045 | ||
2046 | get_rq: | 2046 | get_rq: |
2047 | wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock); | 2047 | wb_acct = rq_qos_throttle(q, bio, q->queue_lock); |
2048 | 2048 | ||
2049 | /* | 2049 | /* |
2050 | * Grab a free request. This is might sleep but can not fail. | 2050 | * Grab a free request. This is might sleep but can not fail. |
@@ -2054,7 +2054,7 @@ get_rq: | |||
2054 | req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); | 2054 | req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); |
2055 | if (IS_ERR(req)) { | 2055 | if (IS_ERR(req)) { |
2056 | blk_queue_exit(q); | 2056 | blk_queue_exit(q); |
2057 | __wbt_done(q->rq_wb, wb_acct); | 2057 | rq_qos_cleanup(q, wb_acct); |
2058 | if (PTR_ERR(req) == -ENOMEM) | 2058 | if (PTR_ERR(req) == -ENOMEM) |
2059 | bio->bi_status = BLK_STS_RESOURCE; | 2059 | bio->bi_status = BLK_STS_RESOURCE; |
2060 | else | 2060 | else |
@@ -2983,7 +2983,7 @@ void blk_start_request(struct request *req) | |||
2983 | req->throtl_size = blk_rq_sectors(req); | 2983 | req->throtl_size = blk_rq_sectors(req); |
2984 | #endif | 2984 | #endif |
2985 | req->rq_flags |= RQF_STATS; | 2985 | req->rq_flags |= RQF_STATS; |
2986 | wbt_issue(req->q->rq_wb, req); | 2986 | rq_qos_issue(req->q, req); |
2987 | } | 2987 | } |
2988 | 2988 | ||
2989 | BUG_ON(blk_rq_is_complete(req)); | 2989 | BUG_ON(blk_rq_is_complete(req)); |
@@ -3207,7 +3207,7 @@ void blk_finish_request(struct request *req, blk_status_t error) | |||
3207 | blk_account_io_done(req, now); | 3207 | blk_account_io_done(req, now); |
3208 | 3208 | ||
3209 | if (req->end_io) { | 3209 | if (req->end_io) { |
3210 | wbt_done(req->q->rq_wb, req); | 3210 | rq_qos_done(q, req); |
3211 | req->end_io(req, error); | 3211 | req->end_io(req, error); |
3212 | } else { | 3212 | } else { |
3213 | if (blk_bidi_rq(req)) | 3213 | if (blk_bidi_rq(req)) |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 850fdd02c385..ea2a226457fa 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -504,7 +504,7 @@ void blk_mq_free_request(struct request *rq) | |||
504 | if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) | 504 | if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) |
505 | laptop_io_completion(q->backing_dev_info); | 505 | laptop_io_completion(q->backing_dev_info); |
506 | 506 | ||
507 | wbt_done(q->rq_wb, rq); | 507 | rq_qos_done(q, rq); |
508 | 508 | ||
509 | if (blk_rq_rl(rq)) | 509 | if (blk_rq_rl(rq)) |
510 | blk_put_rl(blk_rq_rl(rq)); | 510 | blk_put_rl(blk_rq_rl(rq)); |
@@ -527,7 +527,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) | |||
527 | blk_account_io_done(rq, now); | 527 | blk_account_io_done(rq, now); |
528 | 528 | ||
529 | if (rq->end_io) { | 529 | if (rq->end_io) { |
530 | wbt_done(rq->q->rq_wb, rq); | 530 | rq_qos_done(rq->q, rq); |
531 | rq->end_io(rq, error); | 531 | rq->end_io(rq, error); |
532 | } else { | 532 | } else { |
533 | if (unlikely(blk_bidi_rq(rq))) | 533 | if (unlikely(blk_bidi_rq(rq))) |
@@ -641,7 +641,7 @@ void blk_mq_start_request(struct request *rq) | |||
641 | rq->throtl_size = blk_rq_sectors(rq); | 641 | rq->throtl_size = blk_rq_sectors(rq); |
642 | #endif | 642 | #endif |
643 | rq->rq_flags |= RQF_STATS; | 643 | rq->rq_flags |= RQF_STATS; |
644 | wbt_issue(q->rq_wb, rq); | 644 | rq_qos_issue(q, rq); |
645 | } | 645 | } |
646 | 646 | ||
647 | WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); | 647 | WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); |
@@ -667,7 +667,7 @@ static void __blk_mq_requeue_request(struct request *rq) | |||
667 | blk_mq_put_driver_tag(rq); | 667 | blk_mq_put_driver_tag(rq); |
668 | 668 | ||
669 | trace_block_rq_requeue(q, rq); | 669 | trace_block_rq_requeue(q, rq); |
670 | wbt_requeue(q->rq_wb, rq); | 670 | rq_qos_requeue(q, rq); |
671 | 671 | ||
672 | if (blk_mq_request_started(rq)) { | 672 | if (blk_mq_request_started(rq)) { |
673 | WRITE_ONCE(rq->state, MQ_RQ_IDLE); | 673 | WRITE_ONCE(rq->state, MQ_RQ_IDLE); |
@@ -1806,13 +1806,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1806 | if (blk_mq_sched_bio_merge(q, bio)) | 1806 | if (blk_mq_sched_bio_merge(q, bio)) |
1807 | return BLK_QC_T_NONE; | 1807 | return BLK_QC_T_NONE; |
1808 | 1808 | ||
1809 | wb_acct = wbt_wait(q->rq_wb, bio, NULL); | 1809 | wb_acct = rq_qos_throttle(q, bio, NULL); |
1810 | 1810 | ||
1811 | trace_block_getrq(q, bio, bio->bi_opf); | 1811 | trace_block_getrq(q, bio, bio->bi_opf); |
1812 | 1812 | ||
1813 | rq = blk_mq_get_request(q, bio, bio->bi_opf, &data); | 1813 | rq = blk_mq_get_request(q, bio, bio->bi_opf, &data); |
1814 | if (unlikely(!rq)) { | 1814 | if (unlikely(!rq)) { |
1815 | __wbt_done(q->rq_wb, wb_acct); | 1815 | rq_qos_cleanup(q, wb_acct); |
1816 | if (bio->bi_opf & REQ_NOWAIT) | 1816 | if (bio->bi_opf & REQ_NOWAIT) |
1817 | bio_wouldblock_error(bio); | 1817 | bio_wouldblock_error(bio); |
1818 | return BLK_QC_T_NONE; | 1818 | return BLK_QC_T_NONE; |
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c new file mode 100644 index 000000000000..d2f2af8aa10c --- /dev/null +++ b/block/blk-rq-qos.c | |||
@@ -0,0 +1,178 @@ | |||
1 | #include "blk-rq-qos.h" | ||
2 | |||
3 | #include "blk-wbt.h" | ||
4 | |||
5 | /* | ||
6 | * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, | ||
7 | * false if 'v' + 1 would be bigger than 'below'. | ||
8 | */ | ||
9 | static bool atomic_inc_below(atomic_t *v, int below) | ||
10 | { | ||
11 | int cur = atomic_read(v); | ||
12 | |||
13 | for (;;) { | ||
14 | int old; | ||
15 | |||
16 | if (cur >= below) | ||
17 | return false; | ||
18 | old = atomic_cmpxchg(v, cur, cur + 1); | ||
19 | if (old == cur) | ||
20 | break; | ||
21 | cur = old; | ||
22 | } | ||
23 | |||
24 | return true; | ||
25 | } | ||
26 | |||
27 | bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit) | ||
28 | { | ||
29 | return atomic_inc_below(&rq_wait->inflight, limit); | ||
30 | } | ||
31 | |||
32 | void rq_qos_cleanup(struct request_queue *q, enum wbt_flags wb_acct) | ||
33 | { | ||
34 | struct rq_qos *rqos; | ||
35 | |||
36 | for (rqos = q->rq_qos; rqos; rqos = rqos->next) { | ||
37 | if (rqos->ops->cleanup) | ||
38 | rqos->ops->cleanup(rqos, wb_acct); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | void rq_qos_done(struct request_queue *q, struct request *rq) | ||
43 | { | ||
44 | struct rq_qos *rqos; | ||
45 | |||
46 | for (rqos = q->rq_qos; rqos; rqos = rqos->next) { | ||
47 | if (rqos->ops->done) | ||
48 | rqos->ops->done(rqos, rq); | ||
49 | } | ||
50 | } | ||
51 | |||
52 | void rq_qos_issue(struct request_queue *q, struct request *rq) | ||
53 | { | ||
54 | struct rq_qos *rqos; | ||
55 | |||
56 | for(rqos = q->rq_qos; rqos; rqos = rqos->next) { | ||
57 | if (rqos->ops->issue) | ||
58 | rqos->ops->issue(rqos, rq); | ||
59 | } | ||
60 | } | ||
61 | |||
62 | void rq_qos_requeue(struct request_queue *q, struct request *rq) | ||
63 | { | ||
64 | struct rq_qos *rqos; | ||
65 | |||
66 | for(rqos = q->rq_qos; rqos; rqos = rqos->next) { | ||
67 | if (rqos->ops->requeue) | ||
68 | rqos->ops->requeue(rqos, rq); | ||
69 | } | ||
70 | } | ||
71 | |||
72 | enum wbt_flags rq_qos_throttle(struct request_queue *q, struct bio *bio, | ||
73 | spinlock_t *lock) | ||
74 | { | ||
75 | struct rq_qos *rqos; | ||
76 | enum wbt_flags flags = 0; | ||
77 | |||
78 | for(rqos = q->rq_qos; rqos; rqos = rqos->next) { | ||
79 | if (rqos->ops->throttle) | ||
80 | flags |= rqos->ops->throttle(rqos, bio, lock); | ||
81 | } | ||
82 | return flags; | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * Return true, if we can't increase the depth further by scaling | ||
87 | */ | ||
88 | bool rq_depth_calc_max_depth(struct rq_depth *rqd) | ||
89 | { | ||
90 | unsigned int depth; | ||
91 | bool ret = false; | ||
92 | |||
93 | /* | ||
94 | * For QD=1 devices, this is a special case. It's important for those | ||
95 | * to have one request ready when one completes, so force a depth of | ||
96 | * 2 for those devices. On the backend, it'll be a depth of 1 anyway, | ||
97 | * since the device can't have more than that in flight. If we're | ||
98 | * scaling down, then keep a setting of 1/1/1. | ||
99 | */ | ||
100 | if (rqd->queue_depth == 1) { | ||
101 | if (rqd->scale_step > 0) | ||
102 | rqd->max_depth = 1; | ||
103 | else { | ||
104 | rqd->max_depth = 2; | ||
105 | ret = true; | ||
106 | } | ||
107 | } else { | ||
108 | /* | ||
109 | * scale_step == 0 is our default state. If we have suffered | ||
110 | * latency spikes, step will be > 0, and we shrink the | ||
111 | * allowed write depths. If step is < 0, we're only doing | ||
112 | * writes, and we allow a temporarily higher depth to | ||
113 | * increase performance. | ||
114 | */ | ||
115 | depth = min_t(unsigned int, rqd->default_depth, | ||
116 | rqd->queue_depth); | ||
117 | if (rqd->scale_step > 0) | ||
118 | depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); | ||
119 | else if (rqd->scale_step < 0) { | ||
120 | unsigned int maxd = 3 * rqd->queue_depth / 4; | ||
121 | |||
122 | depth = 1 + ((depth - 1) << -rqd->scale_step); | ||
123 | if (depth > maxd) { | ||
124 | depth = maxd; | ||
125 | ret = true; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | rqd->max_depth = depth; | ||
130 | } | ||
131 | |||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | void rq_depth_scale_up(struct rq_depth *rqd) | ||
136 | { | ||
137 | /* | ||
138 | * Hit max in previous round, stop here | ||
139 | */ | ||
140 | if (rqd->scaled_max) | ||
141 | return; | ||
142 | |||
143 | rqd->scale_step--; | ||
144 | |||
145 | rqd->scaled_max = rq_depth_calc_max_depth(rqd); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we | ||
150 | * had a latency violation. | ||
151 | */ | ||
152 | void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) | ||
153 | { | ||
154 | /* | ||
155 | * Stop scaling down when we've hit the limit. This also prevents | ||
156 | * ->scale_step from going to crazy values, if the device can't | ||
157 | * keep up. | ||
158 | */ | ||
159 | if (rqd->max_depth == 1) | ||
160 | return; | ||
161 | |||
162 | if (rqd->scale_step < 0 && hard_throttle) | ||
163 | rqd->scale_step = 0; | ||
164 | else | ||
165 | rqd->scale_step++; | ||
166 | |||
167 | rqd->scaled_max = false; | ||
168 | rq_depth_calc_max_depth(rqd); | ||
169 | } | ||
170 | |||
171 | void rq_qos_exit(struct request_queue *q) | ||
172 | { | ||
173 | while (q->rq_qos) { | ||
174 | struct rq_qos *rqos = q->rq_qos; | ||
175 | q->rq_qos = rqos->next; | ||
176 | rqos->ops->exit(rqos); | ||
177 | } | ||
178 | } | ||
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h new file mode 100644 index 000000000000..f9a39bd6ece3 --- /dev/null +++ b/block/blk-rq-qos.h | |||
@@ -0,0 +1,106 @@ | |||
1 | #ifndef RQ_QOS_H | ||
2 | #define RQ_QOS_H | ||
3 | |||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/blkdev.h> | ||
6 | #include <linux/blk_types.h> | ||
7 | #include <linux/atomic.h> | ||
8 | #include <linux/wait.h> | ||
9 | |||
10 | enum rq_qos_id { | ||
11 | RQ_QOS_WBT, | ||
12 | RQ_QOS_CGROUP, | ||
13 | }; | ||
14 | |||
15 | struct rq_wait { | ||
16 | wait_queue_head_t wait; | ||
17 | atomic_t inflight; | ||
18 | }; | ||
19 | |||
20 | struct rq_qos { | ||
21 | struct rq_qos_ops *ops; | ||
22 | struct request_queue *q; | ||
23 | enum rq_qos_id id; | ||
24 | struct rq_qos *next; | ||
25 | }; | ||
26 | |||
27 | struct rq_qos_ops { | ||
28 | enum wbt_flags (*throttle)(struct rq_qos *, struct bio *, | ||
29 | spinlock_t *); | ||
30 | void (*issue)(struct rq_qos *, struct request *); | ||
31 | void (*requeue)(struct rq_qos *, struct request *); | ||
32 | void (*done)(struct rq_qos *, struct request *); | ||
33 | void (*cleanup)(struct rq_qos *, enum wbt_flags); | ||
34 | void (*exit)(struct rq_qos *); | ||
35 | }; | ||
36 | |||
37 | struct rq_depth { | ||
38 | unsigned int max_depth; | ||
39 | |||
40 | int scale_step; | ||
41 | bool scaled_max; | ||
42 | |||
43 | unsigned int queue_depth; | ||
44 | unsigned int default_depth; | ||
45 | }; | ||
46 | |||
47 | static inline struct rq_qos *rq_qos_id(struct request_queue *q, | ||
48 | enum rq_qos_id id) | ||
49 | { | ||
50 | struct rq_qos *rqos; | ||
51 | for (rqos = q->rq_qos; rqos; rqos = rqos->next) { | ||
52 | if (rqos->id == id) | ||
53 | break; | ||
54 | } | ||
55 | return rqos; | ||
56 | } | ||
57 | |||
58 | static inline struct rq_qos *wbt_rq_qos(struct request_queue *q) | ||
59 | { | ||
60 | return rq_qos_id(q, RQ_QOS_WBT); | ||
61 | } | ||
62 | |||
63 | static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q) | ||
64 | { | ||
65 | return rq_qos_id(q, RQ_QOS_CGROUP); | ||
66 | } | ||
67 | |||
68 | static inline void rq_wait_init(struct rq_wait *rq_wait) | ||
69 | { | ||
70 | atomic_set(&rq_wait->inflight, 0); | ||
71 | init_waitqueue_head(&rq_wait->wait); | ||
72 | } | ||
73 | |||
74 | static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) | ||
75 | { | ||
76 | rqos->next = q->rq_qos; | ||
77 | q->rq_qos = rqos; | ||
78 | } | ||
79 | |||
80 | static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) | ||
81 | { | ||
82 | struct rq_qos *cur, *prev = NULL; | ||
83 | for (cur = q->rq_qos; cur; cur = cur->next) { | ||
84 | if (cur == rqos) { | ||
85 | if (prev) | ||
86 | prev->next = rqos->next; | ||
87 | else | ||
88 | q->rq_qos = cur; | ||
89 | break; | ||
90 | } | ||
91 | prev = cur; | ||
92 | } | ||
93 | } | ||
94 | |||
95 | bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit); | ||
96 | void rq_depth_scale_up(struct rq_depth *rqd); | ||
97 | void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle); | ||
98 | bool rq_depth_calc_max_depth(struct rq_depth *rqd); | ||
99 | |||
100 | void rq_qos_cleanup(struct request_queue *, enum wbt_flags); | ||
101 | void rq_qos_done(struct request_queue *, struct request *); | ||
102 | void rq_qos_issue(struct request_queue *, struct request *); | ||
103 | void rq_qos_requeue(struct request_queue *, struct request *); | ||
104 | enum wbt_flags rq_qos_throttle(struct request_queue *, struct bio *, spinlock_t *); | ||
105 | void rq_qos_exit(struct request_queue *); | ||
106 | #endif | ||
diff --git a/block/blk-settings.c b/block/blk-settings.c index d1de71124656..053de87d1fda 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -875,7 +875,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); | |||
875 | void blk_set_queue_depth(struct request_queue *q, unsigned int depth) | 875 | void blk_set_queue_depth(struct request_queue *q, unsigned int depth) |
876 | { | 876 | { |
877 | q->queue_depth = depth; | 877 | q->queue_depth = depth; |
878 | wbt_set_queue_depth(q->rq_wb, depth); | 878 | wbt_set_queue_depth(q, depth); |
879 | } | 879 | } |
880 | EXPORT_SYMBOL(blk_set_queue_depth); | 880 | EXPORT_SYMBOL(blk_set_queue_depth); |
881 | 881 | ||
@@ -900,7 +900,7 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) | |||
900 | queue_flag_clear(QUEUE_FLAG_FUA, q); | 900 | queue_flag_clear(QUEUE_FLAG_FUA, q); |
901 | spin_unlock_irq(q->queue_lock); | 901 | spin_unlock_irq(q->queue_lock); |
902 | 902 | ||
903 | wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); | 903 | wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); |
904 | } | 904 | } |
905 | EXPORT_SYMBOL_GPL(blk_queue_write_cache); | 905 | EXPORT_SYMBOL_GPL(blk_queue_write_cache); |
906 | 906 | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 94987b1f69e1..49c29a5d06bb 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -422,16 +422,16 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page, | |||
422 | 422 | ||
423 | static ssize_t queue_wb_lat_show(struct request_queue *q, char *page) | 423 | static ssize_t queue_wb_lat_show(struct request_queue *q, char *page) |
424 | { | 424 | { |
425 | if (!q->rq_wb) | 425 | if (!wbt_rq_qos(q)) |
426 | return -EINVAL; | 426 | return -EINVAL; |
427 | 427 | ||
428 | return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000)); | 428 | return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); |
429 | } | 429 | } |
430 | 430 | ||
431 | static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, | 431 | static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, |
432 | size_t count) | 432 | size_t count) |
433 | { | 433 | { |
434 | struct rq_wb *rwb; | 434 | struct rq_qos *rqos; |
435 | ssize_t ret; | 435 | ssize_t ret; |
436 | s64 val; | 436 | s64 val; |
437 | 437 | ||
@@ -441,23 +441,21 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, | |||
441 | if (val < -1) | 441 | if (val < -1) |
442 | return -EINVAL; | 442 | return -EINVAL; |
443 | 443 | ||
444 | rwb = q->rq_wb; | 444 | rqos = wbt_rq_qos(q); |
445 | if (!rwb) { | 445 | if (!rqos) { |
446 | ret = wbt_init(q); | 446 | ret = wbt_init(q); |
447 | if (ret) | 447 | if (ret) |
448 | return ret; | 448 | return ret; |
449 | } | 449 | } |
450 | 450 | ||
451 | rwb = q->rq_wb; | ||
452 | if (val == -1) | 451 | if (val == -1) |
453 | rwb->min_lat_nsec = wbt_default_latency_nsec(q); | 452 | val = wbt_default_latency_nsec(q); |
454 | else if (val >= 0) | 453 | else if (val >= 0) |
455 | rwb->min_lat_nsec = val * 1000ULL; | 454 | val *= 1000ULL; |
456 | 455 | ||
457 | if (rwb->enable_state == WBT_STATE_ON_DEFAULT) | 456 | wbt_set_min_lat(q, val); |
458 | rwb->enable_state = WBT_STATE_ON_MANUAL; | ||
459 | 457 | ||
460 | wbt_update_limits(rwb); | 458 | wbt_update_limits(q); |
461 | return count; | 459 | return count; |
462 | } | 460 | } |
463 | 461 | ||
@@ -964,7 +962,7 @@ void blk_unregister_queue(struct gendisk *disk) | |||
964 | kobject_del(&q->kobj); | 962 | kobject_del(&q->kobj); |
965 | blk_trace_remove_sysfs(disk_to_dev(disk)); | 963 | blk_trace_remove_sysfs(disk_to_dev(disk)); |
966 | 964 | ||
967 | wbt_exit(q); | 965 | rq_qos_exit(q); |
968 | 966 | ||
969 | mutex_lock(&q->sysfs_lock); | 967 | mutex_lock(&q->sysfs_lock); |
970 | if (q->request_fn || (q->mq_ops && q->elevator)) | 968 | if (q->request_fn || (q->mq_ops && q->elevator)) |
diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 4f89b28fa652..6fe20fb823e4 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/swap.h> | 25 | #include <linux/swap.h> |
26 | 26 | ||
27 | #include "blk-wbt.h" | 27 | #include "blk-wbt.h" |
28 | #include "blk-rq-qos.h" | ||
28 | 29 | ||
29 | #define CREATE_TRACE_POINTS | 30 | #define CREATE_TRACE_POINTS |
30 | #include <trace/events/wbt.h> | 31 | #include <trace/events/wbt.h> |
@@ -78,28 +79,6 @@ static inline bool rwb_enabled(struct rq_wb *rwb) | |||
78 | return rwb && rwb->wb_normal != 0; | 79 | return rwb && rwb->wb_normal != 0; |
79 | } | 80 | } |
80 | 81 | ||
81 | /* | ||
82 | * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, | ||
83 | * false if 'v' + 1 would be bigger than 'below'. | ||
84 | */ | ||
85 | static bool atomic_inc_below(atomic_t *v, int below) | ||
86 | { | ||
87 | int cur = atomic_read(v); | ||
88 | |||
89 | for (;;) { | ||
90 | int old; | ||
91 | |||
92 | if (cur >= below) | ||
93 | return false; | ||
94 | old = atomic_cmpxchg(v, cur, cur + 1); | ||
95 | if (old == cur) | ||
96 | break; | ||
97 | cur = old; | ||
98 | } | ||
99 | |||
100 | return true; | ||
101 | } | ||
102 | |||
103 | static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) | 82 | static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) |
104 | { | 83 | { |
105 | if (rwb_enabled(rwb)) { | 84 | if (rwb_enabled(rwb)) { |
@@ -116,7 +95,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) | |||
116 | */ | 95 | */ |
117 | static bool wb_recent_wait(struct rq_wb *rwb) | 96 | static bool wb_recent_wait(struct rq_wb *rwb) |
118 | { | 97 | { |
119 | struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb; | 98 | struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb; |
120 | 99 | ||
121 | return time_before(jiffies, wb->dirty_sleep + HZ); | 100 | return time_before(jiffies, wb->dirty_sleep + HZ); |
122 | } | 101 | } |
@@ -144,8 +123,9 @@ static void rwb_wake_all(struct rq_wb *rwb) | |||
144 | } | 123 | } |
145 | } | 124 | } |
146 | 125 | ||
147 | void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) | 126 | static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) |
148 | { | 127 | { |
128 | struct rq_wb *rwb = RQWB(rqos); | ||
149 | struct rq_wait *rqw; | 129 | struct rq_wait *rqw; |
150 | int inflight, limit; | 130 | int inflight, limit; |
151 | 131 | ||
@@ -194,10 +174,9 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) | |||
194 | * Called on completion of a request. Note that it's also called when | 174 | * Called on completion of a request. Note that it's also called when |
195 | * a request is merged, when the request gets freed. | 175 | * a request is merged, when the request gets freed. |
196 | */ | 176 | */ |
197 | void wbt_done(struct rq_wb *rwb, struct request *rq) | 177 | static void wbt_done(struct rq_qos *rqos, struct request *rq) |
198 | { | 178 | { |
199 | if (!rwb) | 179 | struct rq_wb *rwb = RQWB(rqos); |
200 | return; | ||
201 | 180 | ||
202 | if (!wbt_is_tracked(rq)) { | 181 | if (!wbt_is_tracked(rq)) { |
203 | if (rwb->sync_cookie == rq) { | 182 | if (rwb->sync_cookie == rq) { |
@@ -209,72 +188,11 @@ void wbt_done(struct rq_wb *rwb, struct request *rq) | |||
209 | wb_timestamp(rwb, &rwb->last_comp); | 188 | wb_timestamp(rwb, &rwb->last_comp); |
210 | } else { | 189 | } else { |
211 | WARN_ON_ONCE(rq == rwb->sync_cookie); | 190 | WARN_ON_ONCE(rq == rwb->sync_cookie); |
212 | __wbt_done(rwb, wbt_flags(rq)); | 191 | __wbt_done(rqos, wbt_flags(rq)); |
213 | } | 192 | } |
214 | wbt_clear_state(rq); | 193 | wbt_clear_state(rq); |
215 | } | 194 | } |
216 | 195 | ||
217 | /* | ||
218 | * Return true, if we can't increase the depth further by scaling | ||
219 | */ | ||
220 | static bool calc_wb_limits(struct rq_wb *rwb) | ||
221 | { | ||
222 | unsigned int depth; | ||
223 | bool ret = false; | ||
224 | |||
225 | if (!rwb->min_lat_nsec) { | ||
226 | rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0; | ||
227 | return false; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * For QD=1 devices, this is a special case. It's important for those | ||
232 | * to have one request ready when one completes, so force a depth of | ||
233 | * 2 for those devices. On the backend, it'll be a depth of 1 anyway, | ||
234 | * since the device can't have more than that in flight. If we're | ||
235 | * scaling down, then keep a setting of 1/1/1. | ||
236 | */ | ||
237 | if (rwb->queue_depth == 1) { | ||
238 | if (rwb->scale_step > 0) | ||
239 | rwb->wb_max = rwb->wb_normal = 1; | ||
240 | else { | ||
241 | rwb->wb_max = rwb->wb_normal = 2; | ||
242 | ret = true; | ||
243 | } | ||
244 | rwb->wb_background = 1; | ||
245 | } else { | ||
246 | /* | ||
247 | * scale_step == 0 is our default state. If we have suffered | ||
248 | * latency spikes, step will be > 0, and we shrink the | ||
249 | * allowed write depths. If step is < 0, we're only doing | ||
250 | * writes, and we allow a temporarily higher depth to | ||
251 | * increase performance. | ||
252 | */ | ||
253 | depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth); | ||
254 | if (rwb->scale_step > 0) | ||
255 | depth = 1 + ((depth - 1) >> min(31, rwb->scale_step)); | ||
256 | else if (rwb->scale_step < 0) { | ||
257 | unsigned int maxd = 3 * rwb->queue_depth / 4; | ||
258 | |||
259 | depth = 1 + ((depth - 1) << -rwb->scale_step); | ||
260 | if (depth > maxd) { | ||
261 | depth = maxd; | ||
262 | ret = true; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * Set our max/normal/bg queue depths based on how far | ||
268 | * we have scaled down (->scale_step). | ||
269 | */ | ||
270 | rwb->wb_max = depth; | ||
271 | rwb->wb_normal = (rwb->wb_max + 1) / 2; | ||
272 | rwb->wb_background = (rwb->wb_max + 3) / 4; | ||
273 | } | ||
274 | |||
275 | return ret; | ||
276 | } | ||
277 | |||
278 | static inline bool stat_sample_valid(struct blk_rq_stat *stat) | 196 | static inline bool stat_sample_valid(struct blk_rq_stat *stat) |
279 | { | 197 | { |
280 | /* | 198 | /* |
@@ -307,7 +225,8 @@ enum { | |||
307 | 225 | ||
308 | static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) | 226 | static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) |
309 | { | 227 | { |
310 | struct backing_dev_info *bdi = rwb->queue->backing_dev_info; | 228 | struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; |
229 | struct rq_depth *rqd = &rwb->rq_depth; | ||
311 | u64 thislat; | 230 | u64 thislat; |
312 | 231 | ||
313 | /* | 232 | /* |
@@ -351,7 +270,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) | |||
351 | return LAT_EXCEEDED; | 270 | return LAT_EXCEEDED; |
352 | } | 271 | } |
353 | 272 | ||
354 | if (rwb->scale_step) | 273 | if (rqd->scale_step) |
355 | trace_wbt_stat(bdi, stat); | 274 | trace_wbt_stat(bdi, stat); |
356 | 275 | ||
357 | return LAT_OK; | 276 | return LAT_OK; |
@@ -359,58 +278,48 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) | |||
359 | 278 | ||
360 | static void rwb_trace_step(struct rq_wb *rwb, const char *msg) | 279 | static void rwb_trace_step(struct rq_wb *rwb, const char *msg) |
361 | { | 280 | { |
362 | struct backing_dev_info *bdi = rwb->queue->backing_dev_info; | 281 | struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; |
282 | struct rq_depth *rqd = &rwb->rq_depth; | ||
363 | 283 | ||
364 | trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec, | 284 | trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, |
365 | rwb->wb_background, rwb->wb_normal, rwb->wb_max); | 285 | rwb->wb_background, rwb->wb_normal, rqd->max_depth); |
366 | } | 286 | } |
367 | 287 | ||
368 | static void scale_up(struct rq_wb *rwb) | 288 | static void calc_wb_limits(struct rq_wb *rwb) |
369 | { | 289 | { |
370 | /* | 290 | if (rwb->min_lat_nsec == 0) { |
371 | * Hit max in previous round, stop here | 291 | rwb->wb_normal = rwb->wb_background = 0; |
372 | */ | 292 | } else if (rwb->rq_depth.max_depth <= 2) { |
373 | if (rwb->scaled_max) | 293 | rwb->wb_normal = rwb->rq_depth.max_depth; |
374 | return; | 294 | rwb->wb_background = 1; |
295 | } else { | ||
296 | rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; | ||
297 | rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; | ||
298 | } | ||
299 | } | ||
375 | 300 | ||
376 | rwb->scale_step--; | 301 | static void scale_up(struct rq_wb *rwb) |
302 | { | ||
303 | rq_depth_scale_up(&rwb->rq_depth); | ||
304 | calc_wb_limits(rwb); | ||
377 | rwb->unknown_cnt = 0; | 305 | rwb->unknown_cnt = 0; |
378 | 306 | rwb_trace_step(rwb, "scale up"); | |
379 | rwb->scaled_max = calc_wb_limits(rwb); | ||
380 | |||
381 | rwb_wake_all(rwb); | ||
382 | |||
383 | rwb_trace_step(rwb, "step up"); | ||
384 | } | 307 | } |
385 | 308 | ||
386 | /* | ||
387 | * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we | ||
388 | * had a latency violation. | ||
389 | */ | ||
390 | static void scale_down(struct rq_wb *rwb, bool hard_throttle) | 309 | static void scale_down(struct rq_wb *rwb, bool hard_throttle) |
391 | { | 310 | { |
392 | /* | 311 | rq_depth_scale_down(&rwb->rq_depth, hard_throttle); |
393 | * Stop scaling down when we've hit the limit. This also prevents | ||
394 | * ->scale_step from going to crazy values, if the device can't | ||
395 | * keep up. | ||
396 | */ | ||
397 | if (rwb->wb_max == 1) | ||
398 | return; | ||
399 | |||
400 | if (rwb->scale_step < 0 && hard_throttle) | ||
401 | rwb->scale_step = 0; | ||
402 | else | ||
403 | rwb->scale_step++; | ||
404 | |||
405 | rwb->scaled_max = false; | ||
406 | rwb->unknown_cnt = 0; | ||
407 | calc_wb_limits(rwb); | 312 | calc_wb_limits(rwb); |
408 | rwb_trace_step(rwb, "step down"); | 313 | rwb->unknown_cnt = 0; |
314 | rwb_wake_all(rwb); | ||
315 | rwb_trace_step(rwb, "scale down"); | ||
409 | } | 316 | } |
410 | 317 | ||
411 | static void rwb_arm_timer(struct rq_wb *rwb) | 318 | static void rwb_arm_timer(struct rq_wb *rwb) |
412 | { | 319 | { |
413 | if (rwb->scale_step > 0) { | 320 | struct rq_depth *rqd = &rwb->rq_depth; |
321 | |||
322 | if (rqd->scale_step > 0) { | ||
414 | /* | 323 | /* |
415 | * We should speed this up, using some variant of a fast | 324 | * We should speed this up, using some variant of a fast |
416 | * integer inverse square root calculation. Since we only do | 325 | * integer inverse square root calculation. Since we only do |
@@ -418,7 +327,7 @@ static void rwb_arm_timer(struct rq_wb *rwb) | |||
418 | * though. | 327 | * though. |
419 | */ | 328 | */ |
420 | rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, | 329 | rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, |
421 | int_sqrt((rwb->scale_step + 1) << 8)); | 330 | int_sqrt((rqd->scale_step + 1) << 8)); |
422 | } else { | 331 | } else { |
423 | /* | 332 | /* |
424 | * For step < 0, we don't want to increase/decrease the | 333 | * For step < 0, we don't want to increase/decrease the |
@@ -433,12 +342,13 @@ static void rwb_arm_timer(struct rq_wb *rwb) | |||
433 | static void wb_timer_fn(struct blk_stat_callback *cb) | 342 | static void wb_timer_fn(struct blk_stat_callback *cb) |
434 | { | 343 | { |
435 | struct rq_wb *rwb = cb->data; | 344 | struct rq_wb *rwb = cb->data; |
345 | struct rq_depth *rqd = &rwb->rq_depth; | ||
436 | unsigned int inflight = wbt_inflight(rwb); | 346 | unsigned int inflight = wbt_inflight(rwb); |
437 | int status; | 347 | int status; |
438 | 348 | ||
439 | status = latency_exceeded(rwb, cb->stat); | 349 | status = latency_exceeded(rwb, cb->stat); |
440 | 350 | ||
441 | trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step, | 351 | trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step, |
442 | inflight); | 352 | inflight); |
443 | 353 | ||
444 | /* | 354 | /* |
@@ -469,9 +379,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb) | |||
469 | * currently don't have a valid read/write sample. For that | 379 | * currently don't have a valid read/write sample. For that |
470 | * case, slowly return to center state (step == 0). | 380 | * case, slowly return to center state (step == 0). |
471 | */ | 381 | */ |
472 | if (rwb->scale_step > 0) | 382 | if (rqd->scale_step > 0) |
473 | scale_up(rwb); | 383 | scale_up(rwb); |
474 | else if (rwb->scale_step < 0) | 384 | else if (rqd->scale_step < 0) |
475 | scale_down(rwb, false); | 385 | scale_down(rwb, false); |
476 | break; | 386 | break; |
477 | default: | 387 | default: |
@@ -481,19 +391,50 @@ static void wb_timer_fn(struct blk_stat_callback *cb) | |||
481 | /* | 391 | /* |
482 | * Re-arm timer, if we have IO in flight | 392 | * Re-arm timer, if we have IO in flight |
483 | */ | 393 | */ |
484 | if (rwb->scale_step || inflight) | 394 | if (rqd->scale_step || inflight) |
485 | rwb_arm_timer(rwb); | 395 | rwb_arm_timer(rwb); |
486 | } | 396 | } |
487 | 397 | ||
488 | void wbt_update_limits(struct rq_wb *rwb) | 398 | static void __wbt_update_limits(struct rq_wb *rwb) |
489 | { | 399 | { |
490 | rwb->scale_step = 0; | 400 | struct rq_depth *rqd = &rwb->rq_depth; |
491 | rwb->scaled_max = false; | 401 | |
402 | rqd->scale_step = 0; | ||
403 | rqd->scaled_max = false; | ||
404 | |||
405 | rq_depth_calc_max_depth(rqd); | ||
492 | calc_wb_limits(rwb); | 406 | calc_wb_limits(rwb); |
493 | 407 | ||
494 | rwb_wake_all(rwb); | 408 | rwb_wake_all(rwb); |
495 | } | 409 | } |
496 | 410 | ||
411 | void wbt_update_limits(struct request_queue *q) | ||
412 | { | ||
413 | struct rq_qos *rqos = wbt_rq_qos(q); | ||
414 | if (!rqos) | ||
415 | return; | ||
416 | __wbt_update_limits(RQWB(rqos)); | ||
417 | } | ||
418 | |||
419 | u64 wbt_get_min_lat(struct request_queue *q) | ||
420 | { | ||
421 | struct rq_qos *rqos = wbt_rq_qos(q); | ||
422 | if (!rqos) | ||
423 | return 0; | ||
424 | return RQWB(rqos)->min_lat_nsec; | ||
425 | } | ||
426 | |||
427 | void wbt_set_min_lat(struct request_queue *q, u64 val) | ||
428 | { | ||
429 | struct rq_qos *rqos = wbt_rq_qos(q); | ||
430 | if (!rqos) | ||
431 | return; | ||
432 | RQWB(rqos)->min_lat_nsec = val; | ||
433 | RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; | ||
434 | __wbt_update_limits(RQWB(rqos)); | ||
435 | } | ||
436 | |||
437 | |||
497 | static bool close_io(struct rq_wb *rwb) | 438 | static bool close_io(struct rq_wb *rwb) |
498 | { | 439 | { |
499 | const unsigned long now = jiffies; | 440 | const unsigned long now = jiffies; |
@@ -520,7 +461,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) | |||
520 | * IO for a bit. | 461 | * IO for a bit. |
521 | */ | 462 | */ |
522 | if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) | 463 | if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) |
523 | limit = rwb->wb_max; | 464 | limit = rwb->rq_depth.max_depth; |
524 | else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { | 465 | else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { |
525 | /* | 466 | /* |
526 | * If less than 100ms since we completed unrelated IO, | 467 | * If less than 100ms since we completed unrelated IO, |
@@ -554,7 +495,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw, | |||
554 | rqw->wait.head.next != &wait->entry) | 495 | rqw->wait.head.next != &wait->entry) |
555 | return false; | 496 | return false; |
556 | 497 | ||
557 | return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw)); | 498 | return rq_wait_inc_below(rqw, get_limit(rwb, rw)); |
558 | } | 499 | } |
559 | 500 | ||
560 | /* | 501 | /* |
@@ -614,8 +555,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) | |||
614 | * in an irq held spinlock, if it holds one when calling this function. | 555 | * in an irq held spinlock, if it holds one when calling this function. |
615 | * If we do sleep, we'll release and re-grab it. | 556 | * If we do sleep, we'll release and re-grab it. |
616 | */ | 557 | */ |
617 | enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) | 558 | static enum wbt_flags wbt_wait(struct rq_qos *rqos, struct bio *bio, |
559 | spinlock_t *lock) | ||
618 | { | 560 | { |
561 | struct rq_wb *rwb = RQWB(rqos); | ||
619 | enum wbt_flags ret = 0; | 562 | enum wbt_flags ret = 0; |
620 | 563 | ||
621 | if (!rwb_enabled(rwb)) | 564 | if (!rwb_enabled(rwb)) |
@@ -643,8 +586,10 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) | |||
643 | return ret | WBT_TRACKED; | 586 | return ret | WBT_TRACKED; |
644 | } | 587 | } |
645 | 588 | ||
646 | void wbt_issue(struct rq_wb *rwb, struct request *rq) | 589 | void wbt_issue(struct rq_qos *rqos, struct request *rq) |
647 | { | 590 | { |
591 | struct rq_wb *rwb = RQWB(rqos); | ||
592 | |||
648 | if (!rwb_enabled(rwb)) | 593 | if (!rwb_enabled(rwb)) |
649 | return; | 594 | return; |
650 | 595 | ||
@@ -661,8 +606,9 @@ void wbt_issue(struct rq_wb *rwb, struct request *rq) | |||
661 | } | 606 | } |
662 | } | 607 | } |
663 | 608 | ||
664 | void wbt_requeue(struct rq_wb *rwb, struct request *rq) | 609 | void wbt_requeue(struct rq_qos *rqos, struct request *rq) |
665 | { | 610 | { |
611 | struct rq_wb *rwb = RQWB(rqos); | ||
666 | if (!rwb_enabled(rwb)) | 612 | if (!rwb_enabled(rwb)) |
667 | return; | 613 | return; |
668 | if (rq == rwb->sync_cookie) { | 614 | if (rq == rwb->sync_cookie) { |
@@ -671,39 +617,30 @@ void wbt_requeue(struct rq_wb *rwb, struct request *rq) | |||
671 | } | 617 | } |
672 | } | 618 | } |
673 | 619 | ||
674 | void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth) | 620 | void wbt_set_queue_depth(struct request_queue *q, unsigned int depth) |
675 | { | 621 | { |
676 | if (rwb) { | 622 | struct rq_qos *rqos = wbt_rq_qos(q); |
677 | rwb->queue_depth = depth; | 623 | if (rqos) { |
678 | wbt_update_limits(rwb); | 624 | RQWB(rqos)->rq_depth.queue_depth = depth; |
625 | __wbt_update_limits(RQWB(rqos)); | ||
679 | } | 626 | } |
680 | } | 627 | } |
681 | 628 | ||
682 | void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on) | 629 | void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) |
683 | { | ||
684 | if (rwb) | ||
685 | rwb->wc = write_cache_on; | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Disable wbt, if enabled by default. | ||
690 | */ | ||
691 | void wbt_disable_default(struct request_queue *q) | ||
692 | { | 630 | { |
693 | struct rq_wb *rwb = q->rq_wb; | 631 | struct rq_qos *rqos = wbt_rq_qos(q); |
694 | 632 | if (rqos) | |
695 | if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT) | 633 | RQWB(rqos)->wc = write_cache_on; |
696 | wbt_exit(q); | ||
697 | } | 634 | } |
698 | EXPORT_SYMBOL_GPL(wbt_disable_default); | ||
699 | 635 | ||
700 | /* | 636 | /* |
701 | * Enable wbt if defaults are configured that way | 637 | * Enable wbt if defaults are configured that way |
702 | */ | 638 | */ |
703 | void wbt_enable_default(struct request_queue *q) | 639 | void wbt_enable_default(struct request_queue *q) |
704 | { | 640 | { |
641 | struct rq_qos *rqos = wbt_rq_qos(q); | ||
705 | /* Throttling already enabled? */ | 642 | /* Throttling already enabled? */ |
706 | if (q->rq_wb) | 643 | if (rqos) |
707 | return; | 644 | return; |
708 | 645 | ||
709 | /* Queue not registered? Maybe shutting down... */ | 646 | /* Queue not registered? Maybe shutting down... */ |
@@ -741,6 +678,41 @@ static int wbt_data_dir(const struct request *rq) | |||
741 | return -1; | 678 | return -1; |
742 | } | 679 | } |
743 | 680 | ||
681 | static void wbt_exit(struct rq_qos *rqos) | ||
682 | { | ||
683 | struct rq_wb *rwb = RQWB(rqos); | ||
684 | struct request_queue *q = rqos->q; | ||
685 | |||
686 | blk_stat_remove_callback(q, rwb->cb); | ||
687 | blk_stat_free_callback(rwb->cb); | ||
688 | kfree(rwb); | ||
689 | } | ||
690 | |||
691 | /* | ||
692 | * Disable wbt, if enabled by default. | ||
693 | */ | ||
694 | void wbt_disable_default(struct request_queue *q) | ||
695 | { | ||
696 | struct rq_qos *rqos = wbt_rq_qos(q); | ||
697 | struct rq_wb *rwb; | ||
698 | if (!rqos) | ||
699 | return; | ||
700 | rwb = RQWB(rqos); | ||
701 | if (rwb->enable_state == WBT_STATE_ON_DEFAULT) | ||
702 | rwb->wb_normal = 0; | ||
703 | } | ||
704 | EXPORT_SYMBOL_GPL(wbt_disable_default); | ||
705 | |||
706 | |||
707 | static struct rq_qos_ops wbt_rqos_ops = { | ||
708 | .throttle = wbt_wait, | ||
709 | .issue = wbt_issue, | ||
710 | .requeue = wbt_requeue, | ||
711 | .done = wbt_done, | ||
712 | .cleanup = __wbt_done, | ||
713 | .exit = wbt_exit, | ||
714 | }; | ||
715 | |||
744 | int wbt_init(struct request_queue *q) | 716 | int wbt_init(struct request_queue *q) |
745 | { | 717 | { |
746 | struct rq_wb *rwb; | 718 | struct rq_wb *rwb; |
@@ -756,39 +728,29 @@ int wbt_init(struct request_queue *q) | |||
756 | return -ENOMEM; | 728 | return -ENOMEM; |
757 | } | 729 | } |
758 | 730 | ||
759 | for (i = 0; i < WBT_NUM_RWQ; i++) { | 731 | for (i = 0; i < WBT_NUM_RWQ; i++) |
760 | atomic_set(&rwb->rq_wait[i].inflight, 0); | 732 | rq_wait_init(&rwb->rq_wait[i]); |
761 | init_waitqueue_head(&rwb->rq_wait[i].wait); | ||
762 | } | ||
763 | 733 | ||
734 | rwb->rqos.id = RQ_QOS_WBT; | ||
735 | rwb->rqos.ops = &wbt_rqos_ops; | ||
736 | rwb->rqos.q = q; | ||
764 | rwb->last_comp = rwb->last_issue = jiffies; | 737 | rwb->last_comp = rwb->last_issue = jiffies; |
765 | rwb->queue = q; | ||
766 | rwb->win_nsec = RWB_WINDOW_NSEC; | 738 | rwb->win_nsec = RWB_WINDOW_NSEC; |
767 | rwb->enable_state = WBT_STATE_ON_DEFAULT; | 739 | rwb->enable_state = WBT_STATE_ON_DEFAULT; |
768 | wbt_update_limits(rwb); | 740 | rwb->wc = 1; |
741 | rwb->rq_depth.default_depth = RWB_DEF_DEPTH; | ||
742 | __wbt_update_limits(rwb); | ||
769 | 743 | ||
770 | /* | 744 | /* |
771 | * Assign rwb and add the stats callback. | 745 | * Assign rwb and add the stats callback. |
772 | */ | 746 | */ |
773 | q->rq_wb = rwb; | 747 | rq_qos_add(q, &rwb->rqos); |
774 | blk_stat_add_callback(q, rwb->cb); | 748 | blk_stat_add_callback(q, rwb->cb); |
775 | 749 | ||
776 | rwb->min_lat_nsec = wbt_default_latency_nsec(q); | 750 | rwb->min_lat_nsec = wbt_default_latency_nsec(q); |
777 | 751 | ||
778 | wbt_set_queue_depth(rwb, blk_queue_depth(q)); | 752 | wbt_set_queue_depth(q, blk_queue_depth(q)); |
779 | wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); | 753 | wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); |
780 | 754 | ||
781 | return 0; | 755 | return 0; |
782 | } | 756 | } |
783 | |||
784 | void wbt_exit(struct request_queue *q) | ||
785 | { | ||
786 | struct rq_wb *rwb = q->rq_wb; | ||
787 | |||
788 | if (rwb) { | ||
789 | blk_stat_remove_callback(q, rwb->cb); | ||
790 | blk_stat_free_callback(rwb->cb); | ||
791 | q->rq_wb = NULL; | ||
792 | kfree(rwb); | ||
793 | } | ||
794 | } | ||
diff --git a/block/blk-wbt.h b/block/blk-wbt.h index 300df531d0a6..53b20a58c0a2 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/ktime.h> | 9 | #include <linux/ktime.h> |
10 | 10 | ||
11 | #include "blk-stat.h" | 11 | #include "blk-stat.h" |
12 | #include "blk-rq-qos.h" | ||
12 | 13 | ||
13 | enum wbt_flags { | 14 | enum wbt_flags { |
14 | WBT_TRACKED = 1, /* write, tracked for throttling */ | 15 | WBT_TRACKED = 1, /* write, tracked for throttling */ |
@@ -35,20 +36,12 @@ enum { | |||
35 | WBT_STATE_ON_MANUAL = 2, | 36 | WBT_STATE_ON_MANUAL = 2, |
36 | }; | 37 | }; |
37 | 38 | ||
38 | struct rq_wait { | ||
39 | wait_queue_head_t wait; | ||
40 | atomic_t inflight; | ||
41 | }; | ||
42 | |||
43 | struct rq_wb { | 39 | struct rq_wb { |
44 | /* | 40 | /* |
45 | * Settings that govern how we throttle | 41 | * Settings that govern how we throttle |
46 | */ | 42 | */ |
47 | unsigned int wb_background; /* background writeback */ | 43 | unsigned int wb_background; /* background writeback */ |
48 | unsigned int wb_normal; /* normal writeback */ | 44 | unsigned int wb_normal; /* normal writeback */ |
49 | unsigned int wb_max; /* max throughput writeback */ | ||
50 | int scale_step; | ||
51 | bool scaled_max; | ||
52 | 45 | ||
53 | short enable_state; /* WBT_STATE_* */ | 46 | short enable_state; /* WBT_STATE_* */ |
54 | 47 | ||
@@ -67,15 +60,20 @@ struct rq_wb { | |||
67 | void *sync_cookie; | 60 | void *sync_cookie; |
68 | 61 | ||
69 | unsigned int wc; | 62 | unsigned int wc; |
70 | unsigned int queue_depth; | ||
71 | 63 | ||
72 | unsigned long last_issue; /* last non-throttled issue */ | 64 | unsigned long last_issue; /* last non-throttled issue */ |
73 | unsigned long last_comp; /* last non-throttled comp */ | 65 | unsigned long last_comp; /* last non-throttled comp */ |
74 | unsigned long min_lat_nsec; | 66 | unsigned long min_lat_nsec; |
75 | struct request_queue *queue; | 67 | struct rq_qos rqos; |
76 | struct rq_wait rq_wait[WBT_NUM_RWQ]; | 68 | struct rq_wait rq_wait[WBT_NUM_RWQ]; |
69 | struct rq_depth rq_depth; | ||
77 | }; | 70 | }; |
78 | 71 | ||
72 | static inline struct rq_wb *RQWB(struct rq_qos *rqos) | ||
73 | { | ||
74 | return container_of(rqos, struct rq_wb, rqos); | ||
75 | } | ||
76 | |||
79 | static inline unsigned int wbt_inflight(struct rq_wb *rwb) | 77 | static inline unsigned int wbt_inflight(struct rq_wb *rwb) |
80 | { | 78 | { |
81 | unsigned int i, ret = 0; | 79 | unsigned int i, ret = 0; |
@@ -86,6 +84,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb) | |||
86 | return ret; | 84 | return ret; |
87 | } | 85 | } |
88 | 86 | ||
87 | |||
89 | #ifdef CONFIG_BLK_WBT | 88 | #ifdef CONFIG_BLK_WBT |
90 | 89 | ||
91 | static inline void wbt_track(struct request *rq, enum wbt_flags flags) | 90 | static inline void wbt_track(struct request *rq, enum wbt_flags flags) |
@@ -93,19 +92,16 @@ static inline void wbt_track(struct request *rq, enum wbt_flags flags) | |||
93 | rq->wbt_flags |= flags; | 92 | rq->wbt_flags |= flags; |
94 | } | 93 | } |
95 | 94 | ||
96 | void __wbt_done(struct rq_wb *, enum wbt_flags); | ||
97 | void wbt_done(struct rq_wb *, struct request *); | ||
98 | enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *); | ||
99 | int wbt_init(struct request_queue *); | 95 | int wbt_init(struct request_queue *); |
100 | void wbt_exit(struct request_queue *); | 96 | void wbt_update_limits(struct request_queue *); |
101 | void wbt_update_limits(struct rq_wb *); | ||
102 | void wbt_requeue(struct rq_wb *, struct request *); | ||
103 | void wbt_issue(struct rq_wb *, struct request *); | ||
104 | void wbt_disable_default(struct request_queue *); | 97 | void wbt_disable_default(struct request_queue *); |
105 | void wbt_enable_default(struct request_queue *); | 98 | void wbt_enable_default(struct request_queue *); |
106 | 99 | ||
107 | void wbt_set_queue_depth(struct rq_wb *, unsigned int); | 100 | u64 wbt_get_min_lat(struct request_queue *q); |
108 | void wbt_set_write_cache(struct rq_wb *, bool); | 101 | void wbt_set_min_lat(struct request_queue *q, u64 val); |
102 | |||
103 | void wbt_set_queue_depth(struct request_queue *, unsigned int); | ||
104 | void wbt_set_write_cache(struct request_queue *, bool); | ||
109 | 105 | ||
110 | u64 wbt_default_latency_nsec(struct request_queue *); | 106 | u64 wbt_default_latency_nsec(struct request_queue *); |
111 | 107 | ||
@@ -114,43 +110,30 @@ u64 wbt_default_latency_nsec(struct request_queue *); | |||
114 | static inline void wbt_track(struct request *rq, enum wbt_flags flags) | 110 | static inline void wbt_track(struct request *rq, enum wbt_flags flags) |
115 | { | 111 | { |
116 | } | 112 | } |
117 | static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags) | ||
118 | { | ||
119 | } | ||
120 | static inline void wbt_done(struct rq_wb *rwb, struct request *rq) | ||
121 | { | ||
122 | } | ||
123 | static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, | ||
124 | spinlock_t *lock) | ||
125 | { | ||
126 | return 0; | ||
127 | } | ||
128 | static inline int wbt_init(struct request_queue *q) | 113 | static inline int wbt_init(struct request_queue *q) |
129 | { | 114 | { |
130 | return -EINVAL; | 115 | return -EINVAL; |
131 | } | 116 | } |
132 | static inline void wbt_exit(struct request_queue *q) | 117 | static inline void wbt_update_limits(struct request_queue *q) |
133 | { | ||
134 | } | ||
135 | static inline void wbt_update_limits(struct rq_wb *rwb) | ||
136 | { | 118 | { |
137 | } | 119 | } |
138 | static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq) | 120 | static inline void wbt_disable_default(struct request_queue *q) |
139 | { | 121 | { |
140 | } | 122 | } |
141 | static inline void wbt_issue(struct rq_wb *rwb, struct request *rq) | 123 | static inline void wbt_enable_default(struct request_queue *q) |
142 | { | 124 | { |
143 | } | 125 | } |
144 | static inline void wbt_disable_default(struct request_queue *q) | 126 | static inline void wbt_set_queue_depth(struct request_queue *q, unsigned int depth) |
145 | { | 127 | { |
146 | } | 128 | } |
147 | static inline void wbt_enable_default(struct request_queue *q) | 129 | static inline void wbt_set_write_cache(struct request_queue *q, bool wc) |
148 | { | 130 | { |
149 | } | 131 | } |
150 | static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth) | 132 | static inline u64 wbt_get_min_lat(struct request_queue *q) |
151 | { | 133 | { |
134 | return 0; | ||
152 | } | 135 | } |
153 | static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc) | 136 | static inline void wbt_set_min_lat(struct request_queue *q, u64 val) |
154 | { | 137 | { |
155 | } | 138 | } |
156 | static inline u64 wbt_default_latency_nsec(struct request_queue *q) | 139 | static inline u64 wbt_default_latency_nsec(struct request_queue *q) |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9d05646d5059..137759862f07 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -42,7 +42,7 @@ struct bsg_job; | |||
42 | struct blkcg_gq; | 42 | struct blkcg_gq; |
43 | struct blk_flush_queue; | 43 | struct blk_flush_queue; |
44 | struct pr_ops; | 44 | struct pr_ops; |
45 | struct rq_wb; | 45 | struct rq_qos; |
46 | struct blk_queue_stats; | 46 | struct blk_queue_stats; |
47 | struct blk_stat_callback; | 47 | struct blk_stat_callback; |
48 | 48 | ||
@@ -443,7 +443,7 @@ struct request_queue { | |||
443 | int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ | 443 | int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ |
444 | 444 | ||
445 | struct blk_queue_stats *stats; | 445 | struct blk_queue_stats *stats; |
446 | struct rq_wb *rq_wb; | 446 | struct rq_qos *rq_qos; |
447 | 447 | ||
448 | /* | 448 | /* |
449 | * If blkcg is not used, @q->root_rl serves all requests. If blkcg | 449 | * If blkcg is not used, @q->root_rl serves all requests. If blkcg |