blk-throttle: add a mechanism to estimate IO latency

User configures latency target, but the latency threshold for each request size isn't fixed. For a SSD, the IO latency highly depends on request size. To calculate latency threshold, we sample some data, eg, average latency for request size 4k, 8k, 16k, 32k .. 1M. The latency threshold of each request size will be the sample latency (I'll call it base latency) plus latency target. For example, the base latency for request size 4k is 80us and user configures latency target 60us. The 4k latency threshold will be 80 + 60 = 140us. To sample data, we calculate the order base 2 of rounded up IO sectors. If the IO size is bigger than 1M, it will be accounted as 1M. Since the calculation does round up, the base latency will be slightly smaller than actual value. Also if there isn't any IO dispatched for a specific IO size, we will use the base latency of smaller IO size for this IO size. But we shouldn't sample data at any time. The base latency is supposed to be latency where disk isn't congested, because we use latency threshold to schedule IOs between cgroups. If disk is congested, the latency is higher, using it for scheduling is meaningless. Hence we only do the sampling when block throttling is in the LOW limit, with assumption disk isn't congested in such state. If the assumption isn't true, eg, low limit is too high, calculated latency threshold will be higher. Hard disk is completely different. Latency depends on spindle seek instead of request size. Currently this feature is SSD only, we probably can use a fixed threshold like 4ms for hard disk though. Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
author: Shaohua Li <shli@fb.com> 2017-03-27 18:19:42 -0400
committer: Jens Axboe <axboe@fb.com> 2017-03-28 10:02:20 -0400
commit: b9147dd1bae2b15d6931ecd42f8606c775fecbc9 (patch)
tree: 9becbcfbf24e535538680bb53f38962808b4e28e /block/blk-throttle.c
parent: 88eeca495ba7de749ff253376ec6be19bb05368d (diff)
1 files changed, 161 insertions, 5 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6e1c29860eec..140da29f5800 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -28,6 +28,8 @@ static int throtl_quantum = 32;
 /* default latency target is 0, eg, guarantee IO latency by default */
 #define DFL_LATENCY_TARGET (0)
+#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 static struct blkcg_policy blkcg_policy_throtl;
 /* A workqueue to queue throttle related work */
@@ -165,6 +167,19 @@ struct throtl_grp {
        unsigned long idletime_threshold; /* us */
 };
+/* We measure latency for request size from <= 4k to >= 1M */
+#define LATENCY_BUCKET_SIZE 9
+struct latency_bucket {
+        unsigned long total_latency; /* ns / 1024 */
+        int samples;
+};
+struct avg_latency_bucket {
+        unsigned long latency; /* ns / 1024 */
+        bool valid;
+};
 struct throtl_data
 {
        /* service tree for active throtl groups */
@@ -188,6 +203,13 @@ struct throtl_data
        unsigned long low_downgrade_time;
        unsigned int scale;
+        struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE];
+        struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
+        struct latency_bucket __percpu *latency_buckets;
+        unsigned long last_calculate_time;
+        bool track_bio_latency;
 };
 static void throtl_pending_timer_fn(unsigned long arg);
@@ -306,6 +328,9 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
        return ret;
 }
+#define request_bucket_index(sectors) \
+        clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
 /**
 * throtl_log - log debug message via blktrace
 * @sq: the service_queue being reported
@@ -1931,6 +1956,73 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
        tg->checked_last_finish_time = last_finish_time;
 }
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+static void throtl_update_latency_buckets(struct throtl_data *td)
+{
+        struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE];
+        int i, cpu;
+        unsigned long last_latency = 0;
+        unsigned long latency;
+        if (!blk_queue_nonrot(td->queue))
+                return;
+        if (time_before(jiffies, td->last_calculate_time + HZ))
+                return;
+        td->last_calculate_time = jiffies;
+        memset(avg_latency, 0, sizeof(avg_latency));
+        for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
+                struct latency_bucket *tmp = &td->tmp_buckets[i];
+                for_each_possible_cpu(cpu) {
+                        struct latency_bucket *bucket;
+                        /* this isn't race free, but ok in practice */
+                        bucket = per_cpu_ptr(td->latency_buckets, cpu);
+                        tmp->total_latency += bucket[i].total_latency;
+                        tmp->samples += bucket[i].samples;
+                        bucket[i].total_latency = 0;
+                        bucket[i].samples = 0;
+                }
+                if (tmp->samples >= 32) {
+                        int samples = tmp->samples;
+                        latency = tmp->total_latency;
+                        tmp->total_latency = 0;
+                        tmp->samples = 0;
+                        latency /= samples;
+                        if (latency == 0)
+                                continue;
+                        avg_latency[i].latency = latency;
+                }
+        }
+        for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
+                if (!avg_latency[i].latency) {
+                        if (td->avg_buckets[i].latency < last_latency)
+                                td->avg_buckets[i].latency = last_latency;
+                        continue;
+                }
+                if (!td->avg_buckets[i].valid)
+                        latency = avg_latency[i].latency;
+                else
+                        latency = (td->avg_buckets[i].latency * 7 +
+                                avg_latency[i].latency) >> 3;
+                td->avg_buckets[i].latency = max(latency, last_latency);
+                td->avg_buckets[i].valid = true;
+                last_latency = td->avg_buckets[i].latency;
+        }
+}
+#else
+static inline void throtl_update_latency_buckets(struct throtl_data *td)
+{
+}
+#endif
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
                    struct bio *bio)
 {
@@ -1939,6 +2031,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
        struct throtl_service_queue *sq;
        bool rw = bio_data_dir(bio);
        bool throttled = false;
+        struct throtl_data *td = tg->td;
        int ret;
        WARN_ON_ONCE(!rcu_read_lock_held());
@@ -1949,6 +2042,8 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
        spin_lock_irq(q->queue_lock);
+        throtl_update_latency_buckets(td);
        if (unlikely(blk_queue_bypass(q)))
                goto out_unlock;
@@ -1956,6 +2051,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
        if (ret == 0 || ret == -EBUSY)
                bio->bi_cg_private = tg;
+        blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
 #endif
        blk_throtl_update_idletime(tg);
@@ -1974,8 +2070,8 @@ again:
                /* if above limits, break to queue */
                if (!tg_may_dispatch(tg, bio, NULL)) {
                        tg->last_low_overflow_time[rw] = jiffies;
-                        if (throtl_can_upgrade(tg->td, tg)) {
+                        if (throtl_can_upgrade(td, tg)) {
-                                throtl_upgrade_state(tg->td);
+                                throtl_upgrade_state(td);
                                goto again;
                        }
                        break;
@@ -2019,7 +2115,7 @@ again:
        tg->last_low_overflow_time[rw] = jiffies;
-        tg->td->nr_queued[rw]++;
+        td->nr_queued[rw]++;
        throtl_add_bio_tg(bio, qn, tg);
        throttled = true;
@@ -2044,20 +2140,67 @@ out:
         */
        if (!throttled)
                bio_clear_flag(bio, BIO_THROTTLED);
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+        if (throttled || !td->track_bio_latency)
+                bio->bi_issue_stat.stat |= SKIP_LATENCY;
+#endif
        return throttled;
 }
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+static void throtl_track_latency(struct throtl_data *td, sector_t size,
+        int op, unsigned long time)
+{
+        struct latency_bucket *latency;
+        int index;
+        if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ ||
+            !blk_queue_nonrot(td->queue))
+                return;
+        index = request_bucket_index(size);
+        latency = get_cpu_ptr(td->latency_buckets);
+        latency[index].total_latency += time;
+        latency[index].samples++;
+        put_cpu_ptr(td->latency_buckets);
+}
+void blk_throtl_stat_add(struct request *rq, u64 time_ns)
+{
+        struct request_queue *q = rq->q;
+        struct throtl_data *td = q->td;
+        throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
+                req_op(rq), time_ns >> 10);
+}
 void blk_throtl_bio_endio(struct bio *bio)
 {
        struct throtl_grp *tg;
+        u64 finish_time_ns;
+        unsigned long finish_time;
+        unsigned long start_time;
+        unsigned long lat;
        tg = bio->bi_cg_private;
        if (!tg)
                return;
        bio->bi_cg_private = NULL;
-        tg->last_finish_time = ktime_get_ns() >> 10;
+        finish_time_ns = ktime_get_ns();
+        tg->last_finish_time = finish_time_ns >> 10;
+        start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
+        finish_time = __blk_stat_time(finish_time_ns) >> 10;
+        /* this is only for bio based driver */
+        if (start_time && finish_time > start_time &&
+            !(bio->bi_issue_stat.stat & SKIP_LATENCY)) {
+                lat = finish_time - start_time;
+                throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
+                        bio_op(bio), lat);
+        }
 }
 #endif
@@ -2133,6 +2276,12 @@ int blk_throtl_init(struct request_queue *q)
        td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
        if (!td)
                return -ENOMEM;
+        td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) *
+                LATENCY_BUCKET_SIZE, __alignof__(u64));
+        if (!td->latency_buckets) {
+                kfree(td);
+                return -ENOMEM;
+        }
        INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
        throtl_service_queue_init(&td->service_queue);
@@ -2147,8 +2296,10 @@ int blk_throtl_init(struct request_queue *q)
        /* activate policy */
        ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
-        if (ret)
+        if (ret) {
+                free_percpu(td->latency_buckets);
                kfree(td);
+        }
        return ret;
 }
@@ -2157,6 +2308,7 @@ void blk_throtl_exit(struct request_queue *q)
        BUG_ON(!q->td);
        throtl_shutdown_wq(q);
        blkcg_deactivate_policy(q, &blkcg_policy_throtl);
+        free_percpu(q->td->latency_buckets);
        kfree(q->td);
 }
@@ -2181,6 +2333,10 @@ void blk_throtl_register_queue(struct request_queue *q)
        td->throtl_slice = DFL_THROTL_SLICE_HD;
 #endif
+        td->track_bio_latency = !q->mq_ops && !q->request_fn;
+        if (!td->track_bio_latency)
+                blk_stat_enable_accounting(q);
        /*
         * some tg are created before queue is fully initialized, eg, nonrot
         * isn't initialized yet
author	Shaohua Li <shli@fb.com>	2017-03-27 18:19:42 -0400
committer	Jens Axboe <axboe@fb.com>	2017-03-28 10:02:20 -0400
commit	b9147dd1bae2b15d6931ecd42f8606c775fecbc9 (patch)
tree	9becbcfbf24e535538680bb53f38962808b4e28e /block/blk-throttle.c
parent	88eeca495ba7de749ff253376ec6be19bb05368d (diff)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 6e1c29860eec..140da29f5800 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c
@@ -28,6 +28,8 @@ static int throtl_quantum = 32;
28	/* default latency target is 0, eg, guarantee IO latency by default */	28	/* default latency target is 0, eg, guarantee IO latency by default */
29	#define DFL_LATENCY_TARGET (0)	29	#define DFL_LATENCY_TARGET (0)
30		30
		31	#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
		32
31	static struct blkcg_policy blkcg_policy_throtl;	33	static struct blkcg_policy blkcg_policy_throtl;
32		34
33	/* A workqueue to queue throttle related work */	35	/* A workqueue to queue throttle related work */
@@ -165,6 +167,19 @@ struct throtl_grp {
165	unsigned long idletime_threshold; /* us */	167	unsigned long idletime_threshold; /* us */
166	};	168	};
167		169
		170	/* We measure latency for request size from <= 4k to >= 1M */
		171	#define LATENCY_BUCKET_SIZE 9
		172
		173	struct latency_bucket {
		174	unsigned long total_latency; /* ns / 1024 */
		175	int samples;
		176	};
		177
		178	struct avg_latency_bucket {
		179	unsigned long latency; /* ns / 1024 */
		180	bool valid;
		181	};
		182
168	struct throtl_data	183	struct throtl_data
169	{	184	{
170	/* service tree for active throtl groups */	185	/* service tree for active throtl groups */
@@ -188,6 +203,13 @@ struct throtl_data
188	unsigned long low_downgrade_time;	203	unsigned long low_downgrade_time;
189		204
190	unsigned int scale;	205	unsigned int scale;
		206
		207	struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE];
		208	struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
		209	struct latency_bucket __percpu *latency_buckets;
		210	unsigned long last_calculate_time;
		211
		212	bool track_bio_latency;
191	};	213	};
192		214
193	static void throtl_pending_timer_fn(unsigned long arg);	215	static void throtl_pending_timer_fn(unsigned long arg);
@@ -306,6 +328,9 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
306	return ret;	328	return ret;
307	}	329	}
308		330
		331	#define request_bucket_index(sectors) \
		332	clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
		333
309	/**	334	/**
310	* throtl_log - log debug message via blktrace	335	* throtl_log - log debug message via blktrace
311	* @sq: the service_queue being reported	336	* @sq: the service_queue being reported
@@ -1931,6 +1956,73 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
1931	tg->checked_last_finish_time = last_finish_time;	1956	tg->checked_last_finish_time = last_finish_time;
1932	}	1957	}
1933		1958
		1959	#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
		1960	static void throtl_update_latency_buckets(struct throtl_data *td)
		1961	{
		1962	struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE];
		1963	int i, cpu;
		1964	unsigned long last_latency = 0;
		1965	unsigned long latency;
		1966
		1967	if (!blk_queue_nonrot(td->queue))
		1968	return;
		1969	if (time_before(jiffies, td->last_calculate_time + HZ))
		1970	return;
		1971	td->last_calculate_time = jiffies;
		1972
		1973	memset(avg_latency, 0, sizeof(avg_latency));
		1974	for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
		1975	struct latency_bucket *tmp = &td->tmp_buckets[i];
		1976
		1977	for_each_possible_cpu(cpu) {
		1978	struct latency_bucket *bucket;
		1979
		1980	/* this isn't race free, but ok in practice */
		1981	bucket = per_cpu_ptr(td->latency_buckets, cpu);
		1982	tmp->total_latency += bucket[i].total_latency;
		1983	tmp->samples += bucket[i].samples;
		1984	bucket[i].total_latency = 0;
		1985	bucket[i].samples = 0;
		1986	}
		1987
		1988	if (tmp->samples >= 32) {
		1989	int samples = tmp->samples;
		1990
		1991	latency = tmp->total_latency;
		1992
		1993	tmp->total_latency = 0;
		1994	tmp->samples = 0;
		1995	latency /= samples;
		1996	if (latency == 0)
		1997	continue;
		1998	avg_latency[i].latency = latency;
		1999	}
		2000	}
		2001
		2002	for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
		2003	if (!avg_latency[i].latency) {
		2004	if (td->avg_buckets[i].latency < last_latency)
		2005	td->avg_buckets[i].latency = last_latency;
		2006	continue;
		2007	}
		2008
		2009	if (!td->avg_buckets[i].valid)
		2010	latency = avg_latency[i].latency;
		2011	else
		2012	latency = (td->avg_buckets[i].latency * 7 +
		2013	avg_latency[i].latency) >> 3;
		2014
		2015	td->avg_buckets[i].latency = max(latency, last_latency);
		2016	td->avg_buckets[i].valid = true;
		2017	last_latency = td->avg_buckets[i].latency;
		2018	}
		2019	}
		2020	#else
		2021	static inline void throtl_update_latency_buckets(struct throtl_data *td)
		2022	{
		2023	}
		2024	#endif
		2025
1934	bool blk_throtl_bio(struct request_queue q, struct blkcg_gq blkg,	2026	bool blk_throtl_bio(struct request_queue q, struct blkcg_gq blkg,
1935	struct bio *bio)	2027	struct bio *bio)
1936	{	2028	{
@@ -1939,6 +2031,7 @@ bool blk_throtl_bio(struct request_queue q, struct blkcg_gq blkg,
1939	struct throtl_service_queue *sq;	2031	struct throtl_service_queue *sq;
1940	bool rw = bio_data_dir(bio);	2032	bool rw = bio_data_dir(bio);
1941	bool throttled = false;	2033	bool throttled = false;
		2034	struct throtl_data *td = tg->td;
1942	int ret;	2035	int ret;
1943		2036
1944	WARN_ON_ONCE(!rcu_read_lock_held());	2037	WARN_ON_ONCE(!rcu_read_lock_held());
@@ -1949,6 +2042,8 @@ bool blk_throtl_bio(struct request_queue q, struct blkcg_gq blkg,
1949		2042
1950	spin_lock_irq(q->queue_lock);	2043	spin_lock_irq(q->queue_lock);
1951		2044
		2045	throtl_update_latency_buckets(td);
		2046
1952	if (unlikely(blk_queue_bypass(q)))	2047	if (unlikely(blk_queue_bypass(q)))
1953	goto out_unlock;	2048	goto out_unlock;
1954		2049
@@ -1956,6 +2051,7 @@ bool blk_throtl_bio(struct request_queue q, struct blkcg_gq blkg,
1956	#ifdef CONFIG_BLK_DEV_THROTTLING_LOW	2051	#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
1957	if (ret == 0 \|\| ret == -EBUSY)	2052	if (ret == 0 \|\| ret == -EBUSY)
1958	bio->bi_cg_private = tg;	2053	bio->bi_cg_private = tg;
		2054	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
1959	#endif	2055	#endif
1960	blk_throtl_update_idletime(tg);	2056	blk_throtl_update_idletime(tg);
1961		2057
@@ -1974,8 +2070,8 @@ again:
1974	/* if above limits, break to queue */	2070	/* if above limits, break to queue */
1975	if (!tg_may_dispatch(tg, bio, NULL)) {	2071	if (!tg_may_dispatch(tg, bio, NULL)) {
1976	tg->last_low_overflow_time[rw] = jiffies;	2072	tg->last_low_overflow_time[rw] = jiffies;
1977	if (throtl_can_upgrade(tg->td, tg)) {	2073	if (throtl_can_upgrade(td, tg)) {
1978	throtl_upgrade_state(tg->td);	2074	throtl_upgrade_state(td);
1979	goto again;	2075	goto again;
1980	}	2076	}
1981	break;	2077	break;
@@ -2019,7 +2115,7 @@ again:
2019		2115
2020	tg->last_low_overflow_time[rw] = jiffies;	2116	tg->last_low_overflow_time[rw] = jiffies;
2021		2117
2022	tg->td->nr_queued[rw]++;	2118	td->nr_queued[rw]++;
2023	throtl_add_bio_tg(bio, qn, tg);	2119	throtl_add_bio_tg(bio, qn, tg);
2024	throttled = true;	2120	throttled = true;
2025		2121
@@ -2044,20 +2140,67 @@ out:
2044	*/	2140	*/
2045	if (!throttled)	2141	if (!throttled)
2046	bio_clear_flag(bio, BIO_THROTTLED);	2142	bio_clear_flag(bio, BIO_THROTTLED);
		2143
		2144	#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
		2145	if (throttled \|\| !td->track_bio_latency)
		2146	bio->bi_issue_stat.stat \|= SKIP_LATENCY;
		2147	#endif
2047	return throttled;	2148	return throttled;
2048	}	2149	}
2049		2150
2050	#ifdef CONFIG_BLK_DEV_THROTTLING_LOW	2151	#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
		2152	static void throtl_track_latency(struct throtl_data *td, sector_t size,
		2153	int op, unsigned long time)
		2154	{
		2155	struct latency_bucket *latency;
		2156	int index;
		2157
		2158	if (!td \|\| td->limit_index != LIMIT_LOW \|\| op != REQ_OP_READ \|\|
		2159	!blk_queue_nonrot(td->queue))
		2160	return;
		2161
		2162	index = request_bucket_index(size);
		2163
		2164	latency = get_cpu_ptr(td->latency_buckets);
		2165	latency[index].total_latency += time;
		2166	latency[index].samples++;
		2167	put_cpu_ptr(td->latency_buckets);
		2168	}
		2169
		2170	void blk_throtl_stat_add(struct request *rq, u64 time_ns)
		2171	{
		2172	struct request_queue *q = rq->q;
		2173	struct throtl_data *td = q->td;
		2174
		2175	throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
		2176	req_op(rq), time_ns >> 10);
		2177	}
		2178
2051	void blk_throtl_bio_endio(struct bio *bio)	2179	void blk_throtl_bio_endio(struct bio *bio)
2052	{	2180	{
2053	struct throtl_grp *tg;	2181	struct throtl_grp *tg;
		2182	u64 finish_time_ns;
		2183	unsigned long finish_time;
		2184	unsigned long start_time;
		2185	unsigned long lat;
2054		2186
2055	tg = bio->bi_cg_private;	2187	tg = bio->bi_cg_private;
2056	if (!tg)	2188	if (!tg)
2057	return;	2189	return;
2058	bio->bi_cg_private = NULL;	2190	bio->bi_cg_private = NULL;
2059		2191
2060	tg->last_finish_time = ktime_get_ns() >> 10;	2192	finish_time_ns = ktime_get_ns();
		2193	tg->last_finish_time = finish_time_ns >> 10;
		2194
		2195	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
		2196	finish_time = __blk_stat_time(finish_time_ns) >> 10;
		2197	/* this is only for bio based driver */
		2198	if (start_time && finish_time > start_time &&
		2199	!(bio->bi_issue_stat.stat & SKIP_LATENCY)) {
		2200	lat = finish_time - start_time;
		2201	throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
		2202	bio_op(bio), lat);
		2203	}
2061	}	2204	}
2062	#endif	2205	#endif
2063		2206
@@ -2133,6 +2276,12 @@ int blk_throtl_init(struct request_queue *q)
2133	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);	2276	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
2134	if (!td)	2277	if (!td)
2135	return -ENOMEM;	2278	return -ENOMEM;
		2279	td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) *
		2280	LATENCY_BUCKET_SIZE, __alignof__(u64));
		2281	if (!td->latency_buckets) {
		2282	kfree(td);
		2283	return -ENOMEM;
		2284	}
2136		2285
2137	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);	2286	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
2138	throtl_service_queue_init(&td->service_queue);	2287	throtl_service_queue_init(&td->service_queue);
@@ -2147,8 +2296,10 @@ int blk_throtl_init(struct request_queue *q)
2147		2296
2148	/* activate policy */	2297	/* activate policy */
2149	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);	2298	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
2150	if (ret)	2299	if (ret) {
		2300	free_percpu(td->latency_buckets);
2151	kfree(td);	2301	kfree(td);
		2302	}
2152	return ret;	2303	return ret;
2153	}	2304	}
2154		2305
@@ -2157,6 +2308,7 @@ void blk_throtl_exit(struct request_queue *q)
2157	BUG_ON(!q->td);	2308	BUG_ON(!q->td);
2158	throtl_shutdown_wq(q);	2309	throtl_shutdown_wq(q);
2159	blkcg_deactivate_policy(q, &blkcg_policy_throtl);	2310	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
		2311	free_percpu(q->td->latency_buckets);
2160	kfree(q->td);	2312	kfree(q->td);
2161	}	2313	}
2162		2314
@@ -2181,6 +2333,10 @@ void blk_throtl_register_queue(struct request_queue *q)
2181	td->throtl_slice = DFL_THROTL_SLICE_HD;	2333	td->throtl_slice = DFL_THROTL_SLICE_HD;
2182	#endif	2334	#endif
2183		2335
		2336	td->track_bio_latency = !q->mq_ops && !q->request_fn;
		2337	if (!td->track_bio_latency)
		2338	blk_stat_enable_accounting(q);
		2339
2184	/*	2340	/*
2185	* some tg are created before queue is fully initialized, eg, nonrot	2341	* some tg are created before queue is fully initialized, eg, nonrot
2186	* isn't initialized yet	2342	* isn't initialized yet