summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-stat.c15
-rw-r--r--block/blk-stat.h3
-rw-r--r--block/blk-throttle.c166
-rw-r--r--block/blk.h2
-rw-r--r--include/linux/blk_types.h9
5 files changed, 185 insertions, 10 deletions
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 188b535cf4d6..e77ec52f5bb5 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -9,12 +9,14 @@
9 9
10#include "blk-stat.h" 10#include "blk-stat.h"
11#include "blk-mq.h" 11#include "blk-mq.h"
12#include "blk.h"
12 13
13#define BLK_RQ_STAT_BATCH 64 14#define BLK_RQ_STAT_BATCH 64
14 15
15struct blk_queue_stats { 16struct blk_queue_stats {
16 struct list_head callbacks; 17 struct list_head callbacks;
17 spinlock_t lock; 18 spinlock_t lock;
19 bool enable_accounting;
18}; 20};
19 21
20unsigned int blk_stat_rq_ddir(const struct request *rq) 22unsigned int blk_stat_rq_ddir(const struct request *rq)
@@ -96,6 +98,8 @@ void blk_stat_add(struct request *rq)
96 98
97 value = now - blk_stat_time(&rq->issue_stat); 99 value = now - blk_stat_time(&rq->issue_stat);
98 100
101 blk_throtl_stat_add(rq, value);
102
99 rcu_read_lock(); 103 rcu_read_lock();
100 list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { 104 list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
101 if (blk_stat_is_active(cb)) { 105 if (blk_stat_is_active(cb)) {
@@ -190,7 +194,7 @@ void blk_stat_remove_callback(struct request_queue *q,
190{ 194{
191 spin_lock(&q->stats->lock); 195 spin_lock(&q->stats->lock);
192 list_del_rcu(&cb->list); 196 list_del_rcu(&cb->list);
193 if (list_empty(&q->stats->callbacks)) 197 if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
194 clear_bit(QUEUE_FLAG_STATS, &q->queue_flags); 198 clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
195 spin_unlock(&q->stats->lock); 199 spin_unlock(&q->stats->lock);
196 200
@@ -215,6 +219,14 @@ void blk_stat_free_callback(struct blk_stat_callback *cb)
215} 219}
216EXPORT_SYMBOL_GPL(blk_stat_free_callback); 220EXPORT_SYMBOL_GPL(blk_stat_free_callback);
217 221
222void blk_stat_enable_accounting(struct request_queue *q)
223{
224 spin_lock(&q->stats->lock);
225 q->stats->enable_accounting = true;
226 set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
227 spin_unlock(&q->stats->lock);
228}
229
218struct blk_queue_stats *blk_alloc_queue_stats(void) 230struct blk_queue_stats *blk_alloc_queue_stats(void)
219{ 231{
220 struct blk_queue_stats *stats; 232 struct blk_queue_stats *stats;
@@ -225,6 +237,7 @@ struct blk_queue_stats *blk_alloc_queue_stats(void)
225 237
226 INIT_LIST_HEAD(&stats->callbacks); 238 INIT_LIST_HEAD(&stats->callbacks);
227 spin_lock_init(&stats->lock); 239 spin_lock_init(&stats->lock);
240 stats->enable_accounting = false;
228 241
229 return stats; 242 return stats;
230} 243}
diff --git a/block/blk-stat.h b/block/blk-stat.h
index ee47f816d5bd..53f08a63bf15 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -108,6 +108,9 @@ static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
108 (((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT); 108 (((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
109} 109}
110 110
111/* record time/size info in request but not add a callback */
112void blk_stat_enable_accounting(struct request_queue *q);
113
111/* 114/*
112 * blk_stat_rq_ddir() - Bucket callback function for the request data direction. 115 * blk_stat_rq_ddir() - Bucket callback function for the request data direction.
113 * @rq: Request. 116 * @rq: Request.
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6e1c29860eec..140da29f5800 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -28,6 +28,8 @@ static int throtl_quantum = 32;
28/* default latency target is 0, eg, guarantee IO latency by default */ 28/* default latency target is 0, eg, guarantee IO latency by default */
29#define DFL_LATENCY_TARGET (0) 29#define DFL_LATENCY_TARGET (0)
30 30
31#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
32
31static struct blkcg_policy blkcg_policy_throtl; 33static struct blkcg_policy blkcg_policy_throtl;
32 34
33/* A workqueue to queue throttle related work */ 35/* A workqueue to queue throttle related work */
@@ -165,6 +167,19 @@ struct throtl_grp {
165 unsigned long idletime_threshold; /* us */ 167 unsigned long idletime_threshold; /* us */
166}; 168};
167 169
170/* We measure latency for request size from <= 4k to >= 1M */
171#define LATENCY_BUCKET_SIZE 9
172
173struct latency_bucket {
174 unsigned long total_latency; /* ns / 1024 */
175 int samples;
176};
177
178struct avg_latency_bucket {
179 unsigned long latency; /* ns / 1024 */
180 bool valid;
181};
182
168struct throtl_data 183struct throtl_data
169{ 184{
170 /* service tree for active throtl groups */ 185 /* service tree for active throtl groups */
@@ -188,6 +203,13 @@ struct throtl_data
188 unsigned long low_downgrade_time; 203 unsigned long low_downgrade_time;
189 204
190 unsigned int scale; 205 unsigned int scale;
206
207 struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE];
208 struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
209 struct latency_bucket __percpu *latency_buckets;
210 unsigned long last_calculate_time;
211
212 bool track_bio_latency;
191}; 213};
192 214
193static void throtl_pending_timer_fn(unsigned long arg); 215static void throtl_pending_timer_fn(unsigned long arg);
@@ -306,6 +328,9 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
306 return ret; 328 return ret;
307} 329}
308 330
331#define request_bucket_index(sectors) \
332 clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
333
309/** 334/**
310 * throtl_log - log debug message via blktrace 335 * throtl_log - log debug message via blktrace
311 * @sq: the service_queue being reported 336 * @sq: the service_queue being reported
@@ -1931,6 +1956,73 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
1931 tg->checked_last_finish_time = last_finish_time; 1956 tg->checked_last_finish_time = last_finish_time;
1932} 1957}
1933 1958
1959#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
1960static void throtl_update_latency_buckets(struct throtl_data *td)
1961{
1962 struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE];
1963 int i, cpu;
1964 unsigned long last_latency = 0;
1965 unsigned long latency;
1966
1967 if (!blk_queue_nonrot(td->queue))
1968 return;
1969 if (time_before(jiffies, td->last_calculate_time + HZ))
1970 return;
1971 td->last_calculate_time = jiffies;
1972
1973 memset(avg_latency, 0, sizeof(avg_latency));
1974 for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
1975 struct latency_bucket *tmp = &td->tmp_buckets[i];
1976
1977 for_each_possible_cpu(cpu) {
1978 struct latency_bucket *bucket;
1979
1980 /* this isn't race free, but ok in practice */
1981 bucket = per_cpu_ptr(td->latency_buckets, cpu);
1982 tmp->total_latency += bucket[i].total_latency;
1983 tmp->samples += bucket[i].samples;
1984 bucket[i].total_latency = 0;
1985 bucket[i].samples = 0;
1986 }
1987
1988 if (tmp->samples >= 32) {
1989 int samples = tmp->samples;
1990
1991 latency = tmp->total_latency;
1992
1993 tmp->total_latency = 0;
1994 tmp->samples = 0;
1995 latency /= samples;
1996 if (latency == 0)
1997 continue;
1998 avg_latency[i].latency = latency;
1999 }
2000 }
2001
2002 for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
2003 if (!avg_latency[i].latency) {
2004 if (td->avg_buckets[i].latency < last_latency)
2005 td->avg_buckets[i].latency = last_latency;
2006 continue;
2007 }
2008
2009 if (!td->avg_buckets[i].valid)
2010 latency = avg_latency[i].latency;
2011 else
2012 latency = (td->avg_buckets[i].latency * 7 +
2013 avg_latency[i].latency) >> 3;
2014
2015 td->avg_buckets[i].latency = max(latency, last_latency);
2016 td->avg_buckets[i].valid = true;
2017 last_latency = td->avg_buckets[i].latency;
2018 }
2019}
2020#else
2021static inline void throtl_update_latency_buckets(struct throtl_data *td)
2022{
2023}
2024#endif
2025
1934bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 2026bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
1935 struct bio *bio) 2027 struct bio *bio)
1936{ 2028{
@@ -1939,6 +2031,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
1939 struct throtl_service_queue *sq; 2031 struct throtl_service_queue *sq;
1940 bool rw = bio_data_dir(bio); 2032 bool rw = bio_data_dir(bio);
1941 bool throttled = false; 2033 bool throttled = false;
2034 struct throtl_data *td = tg->td;
1942 int ret; 2035 int ret;
1943 2036
1944 WARN_ON_ONCE(!rcu_read_lock_held()); 2037 WARN_ON_ONCE(!rcu_read_lock_held());
@@ -1949,6 +2042,8 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
1949 2042
1950 spin_lock_irq(q->queue_lock); 2043 spin_lock_irq(q->queue_lock);
1951 2044
2045 throtl_update_latency_buckets(td);
2046
1952 if (unlikely(blk_queue_bypass(q))) 2047 if (unlikely(blk_queue_bypass(q)))
1953 goto out_unlock; 2048 goto out_unlock;
1954 2049
@@ -1956,6 +2051,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
1956#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 2051#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
1957 if (ret == 0 || ret == -EBUSY) 2052 if (ret == 0 || ret == -EBUSY)
1958 bio->bi_cg_private = tg; 2053 bio->bi_cg_private = tg;
2054 blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
1959#endif 2055#endif
1960 blk_throtl_update_idletime(tg); 2056 blk_throtl_update_idletime(tg);
1961 2057
@@ -1974,8 +2070,8 @@ again:
1974 /* if above limits, break to queue */ 2070 /* if above limits, break to queue */
1975 if (!tg_may_dispatch(tg, bio, NULL)) { 2071 if (!tg_may_dispatch(tg, bio, NULL)) {
1976 tg->last_low_overflow_time[rw] = jiffies; 2072 tg->last_low_overflow_time[rw] = jiffies;
1977 if (throtl_can_upgrade(tg->td, tg)) { 2073 if (throtl_can_upgrade(td, tg)) {
1978 throtl_upgrade_state(tg->td); 2074 throtl_upgrade_state(td);
1979 goto again; 2075 goto again;
1980 } 2076 }
1981 break; 2077 break;
@@ -2019,7 +2115,7 @@ again:
2019 2115
2020 tg->last_low_overflow_time[rw] = jiffies; 2116 tg->last_low_overflow_time[rw] = jiffies;
2021 2117
2022 tg->td->nr_queued[rw]++; 2118 td->nr_queued[rw]++;
2023 throtl_add_bio_tg(bio, qn, tg); 2119 throtl_add_bio_tg(bio, qn, tg);
2024 throttled = true; 2120 throttled = true;
2025 2121
@@ -2044,20 +2140,67 @@ out:
2044 */ 2140 */
2045 if (!throttled) 2141 if (!throttled)
2046 bio_clear_flag(bio, BIO_THROTTLED); 2142 bio_clear_flag(bio, BIO_THROTTLED);
2143
2144#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2145 if (throttled || !td->track_bio_latency)
2146 bio->bi_issue_stat.stat |= SKIP_LATENCY;
2147#endif
2047 return throttled; 2148 return throttled;
2048} 2149}
2049 2150
2050#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 2151#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2152static void throtl_track_latency(struct throtl_data *td, sector_t size,
2153 int op, unsigned long time)
2154{
2155 struct latency_bucket *latency;
2156 int index;
2157
2158 if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ ||
2159 !blk_queue_nonrot(td->queue))
2160 return;
2161
2162 index = request_bucket_index(size);
2163
2164 latency = get_cpu_ptr(td->latency_buckets);
2165 latency[index].total_latency += time;
2166 latency[index].samples++;
2167 put_cpu_ptr(td->latency_buckets);
2168}
2169
2170void blk_throtl_stat_add(struct request *rq, u64 time_ns)
2171{
2172 struct request_queue *q = rq->q;
2173 struct throtl_data *td = q->td;
2174
2175 throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
2176 req_op(rq), time_ns >> 10);
2177}
2178
2051void blk_throtl_bio_endio(struct bio *bio) 2179void blk_throtl_bio_endio(struct bio *bio)
2052{ 2180{
2053 struct throtl_grp *tg; 2181 struct throtl_grp *tg;
2182 u64 finish_time_ns;
2183 unsigned long finish_time;
2184 unsigned long start_time;
2185 unsigned long lat;
2054 2186
2055 tg = bio->bi_cg_private; 2187 tg = bio->bi_cg_private;
2056 if (!tg) 2188 if (!tg)
2057 return; 2189 return;
2058 bio->bi_cg_private = NULL; 2190 bio->bi_cg_private = NULL;
2059 2191
2060 tg->last_finish_time = ktime_get_ns() >> 10; 2192 finish_time_ns = ktime_get_ns();
2193 tg->last_finish_time = finish_time_ns >> 10;
2194
2195 start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
2196 finish_time = __blk_stat_time(finish_time_ns) >> 10;
2197 /* this is only for bio based driver */
2198 if (start_time && finish_time > start_time &&
2199 !(bio->bi_issue_stat.stat & SKIP_LATENCY)) {
2200 lat = finish_time - start_time;
2201 throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
2202 bio_op(bio), lat);
2203 }
2061} 2204}
2062#endif 2205#endif
2063 2206
@@ -2133,6 +2276,12 @@ int blk_throtl_init(struct request_queue *q)
2133 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); 2276 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
2134 if (!td) 2277 if (!td)
2135 return -ENOMEM; 2278 return -ENOMEM;
2279 td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) *
2280 LATENCY_BUCKET_SIZE, __alignof__(u64));
2281 if (!td->latency_buckets) {
2282 kfree(td);
2283 return -ENOMEM;
2284 }
2136 2285
2137 INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn); 2286 INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
2138 throtl_service_queue_init(&td->service_queue); 2287 throtl_service_queue_init(&td->service_queue);
@@ -2147,8 +2296,10 @@ int blk_throtl_init(struct request_queue *q)
2147 2296
2148 /* activate policy */ 2297 /* activate policy */
2149 ret = blkcg_activate_policy(q, &blkcg_policy_throtl); 2298 ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
2150 if (ret) 2299 if (ret) {
2300 free_percpu(td->latency_buckets);
2151 kfree(td); 2301 kfree(td);
2302 }
2152 return ret; 2303 return ret;
2153} 2304}
2154 2305
@@ -2157,6 +2308,7 @@ void blk_throtl_exit(struct request_queue *q)
2157 BUG_ON(!q->td); 2308 BUG_ON(!q->td);
2158 throtl_shutdown_wq(q); 2309 throtl_shutdown_wq(q);
2159 blkcg_deactivate_policy(q, &blkcg_policy_throtl); 2310 blkcg_deactivate_policy(q, &blkcg_policy_throtl);
2311 free_percpu(q->td->latency_buckets);
2160 kfree(q->td); 2312 kfree(q->td);
2161} 2313}
2162 2314
@@ -2181,6 +2333,10 @@ void blk_throtl_register_queue(struct request_queue *q)
2181 td->throtl_slice = DFL_THROTL_SLICE_HD; 2333 td->throtl_slice = DFL_THROTL_SLICE_HD;
2182#endif 2334#endif
2183 2335
2336 td->track_bio_latency = !q->mq_ops && !q->request_fn;
2337 if (!td->track_bio_latency)
2338 blk_stat_enable_accounting(q);
2339
2184 /* 2340 /*
2185 * some tg are created before queue is fully initialized, eg, nonrot 2341 * some tg are created before queue is fully initialized, eg, nonrot
2186 * isn't initialized yet 2342 * isn't initialized yet
diff --git a/block/blk.h b/block/blk.h
index 3ac833ec2adb..07d375183f31 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -331,8 +331,10 @@ extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
331extern ssize_t blk_throtl_sample_time_store(struct request_queue *q, 331extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
332 const char *page, size_t count); 332 const char *page, size_t count);
333extern void blk_throtl_bio_endio(struct bio *bio); 333extern void blk_throtl_bio_endio(struct bio *bio);
334extern void blk_throtl_stat_add(struct request *rq, u64 time);
334#else 335#else
335static inline void blk_throtl_bio_endio(struct bio *bio) { } 336static inline void blk_throtl_bio_endio(struct bio *bio) { }
337static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
336#endif 338#endif
337 339
338#endif /* BLK_INTERNAL_H */ 340#endif /* BLK_INTERNAL_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3ad567347671..67bcf8a5326e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,10 @@ struct io_context;
17struct cgroup_subsys_state; 17struct cgroup_subsys_state;
18typedef void (bio_end_io_t) (struct bio *); 18typedef void (bio_end_io_t) (struct bio *);
19 19
20struct blk_issue_stat {
21 u64 stat;
22};
23
20/* 24/*
21 * main unit of I/O for the block layer and lower layers (ie drivers and 25 * main unit of I/O for the block layer and lower layers (ie drivers and
22 * stacking drivers) 26 * stacking drivers)
@@ -60,6 +64,7 @@ struct bio {
60 struct cgroup_subsys_state *bi_css; 64 struct cgroup_subsys_state *bi_css;
61#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 65#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
62 void *bi_cg_private; 66 void *bi_cg_private;
67 struct blk_issue_stat bi_issue_stat;
63#endif 68#endif
64#endif 69#endif
65 union { 70 union {
@@ -286,10 +291,6 @@ static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
286 return (cookie & BLK_QC_T_INTERNAL) != 0; 291 return (cookie & BLK_QC_T_INTERNAL) != 0;
287} 292}
288 293
289struct blk_issue_stat {
290 u64 stat;
291};
292
293struct blk_rq_stat { 294struct blk_rq_stat {
294 s64 mean; 295 s64 mean;
295 u64 min; 296 u64 min;