blkio: Implement per cfq group latency target and busy queue avg

o So far we had 300ms soft target latency system wide. Now with the introduction of cfq groups, divide that latency by number of groups so that one can come up with group target latency which will be helpful in determining the workload slice with-in group and also the dynamic slice length of the cfq queue. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
author: Vivek Goyal <vgoyal@redhat.com> 2009-12-03 12:59:44 -0500
committer: Jens Axboe <jens.axboe@oracle.com> 2009-12-03 13:28:52 -0500
commit: 58ff82f34cded3812af5b6c69b6aa626b6be2490 (patch)
tree: 06098474c5763f20d1c9715faf67c83c56b9a787 /block
parent: 25bc6b07767fe77422312eda2af99c9477f76191 (diff)
1 files changed, 45 insertions, 20 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 019f28eea9df..84887e2eb210 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -82,6 +82,7 @@ struct cfq_rb_root {
        unsigned count;
        u64 min_vdisktime;
        struct rb_node *active;
+        unsigned total_weight;
 };
 #define CFQ_RB_ROOT     (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
@@ -172,6 +173,8 @@ struct cfq_group {
        /* number of cfqq currently on this group */
        int nr_cfqq;
+        /* Per group busy queus average. Useful for workload slice calc. */
+        unsigned int busy_queues_avg[2];
        /*
         * rr lists of queues with requests, onle rr for each priority class.
         * Counts are embedded in the cfq_rb_root
@@ -188,6 +191,8 @@ struct cfq_data {
        /* Root service tree for cfq_groups */
        struct cfq_rb_root grp_service_tree;
        struct cfq_group root_group;
+        /* Number of active cfq groups on group service tree */
+        int nr_groups;
        /*
         * The priority currently being served
@@ -206,7 +211,6 @@ struct cfq_data {
        struct rb_root prio_trees[CFQ_PRIO_LISTS];
        unsigned int busy_queues;
-        unsigned int busy_queues_avg[2];
        int rq_in_driver[2];
        int sync_flight;
@@ -354,10 +358,10 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
        return SYNC_WORKLOAD;
 }
-static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
+static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
+                                        struct cfq_data *cfqd,
+                                        struct cfq_group *cfqg)
 {
-        struct cfq_group *cfqg = &cfqd->root_group;
        if (wl == IDLE_WORKLOAD)
                return cfqg->service_tree_idle.count;
@@ -489,18 +493,27 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
 * to quickly follows sudden increases and decrease slowly
 */
-static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt)
+static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
+                                        struct cfq_group *cfqg, bool rt)
 {
        unsigned min_q, max_q;
        unsigned mult  = cfq_hist_divisor - 1;
        unsigned round = cfq_hist_divisor / 2;
-        unsigned busy = cfq_busy_queues_wl(rt, cfqd);
+        unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
-        min_q = min(cfqd->busy_queues_avg[rt], busy);
+        min_q = min(cfqg->busy_queues_avg[rt], busy);
-        max_q = max(cfqd->busy_queues_avg[rt], busy);
+        max_q = max(cfqg->busy_queues_avg[rt], busy);
-        cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
+        cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
                cfq_hist_divisor;
-        return cfqd->busy_queues_avg[rt];
+        return cfqg->busy_queues_avg[rt];
+}
+static inline unsigned
+cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
+{
+        struct cfq_rb_root *st = &cfqd->grp_service_tree;
+        return cfq_target_latency * cfqg->weight / st->total_weight;
 }
 static inline void
@@ -508,12 +521,17 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
        if (cfqd->cfq_latency) {
-                /* interested queues (we consider only the ones with the same
+                /*
-                 * priority class) */
+                 * interested queues (we consider only the ones with the same
-                unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq));
+                 * priority class in the cfq group)
+                 */
+                unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
+                                                cfq_class_rt(cfqq));
                unsigned sync_slice = cfqd->cfq_slice[1];
                unsigned expect_latency = sync_slice * iq;
-                if (expect_latency > cfq_target_latency) {
+                unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
+                if (expect_latency > group_slice) {
                        unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
                        /* scale low_slice according to IO priority
                         * and sync vs async */
@@ -521,7 +539,7 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                                min(slice, base_low_slice * slice / sync_slice);
                        /* the adapted slice value is scaled to fit all iqs
                         * into the target latency */
-                        slice = max(slice * cfq_target_latency / expect_latency,
+                        slice = max(slice * group_slice / expect_latency,
                                    low_slice);
                }
        }
@@ -776,6 +794,8 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
        __cfq_group_service_tree_add(st, cfqg);
        cfqg->on_st = true;
+        cfqd->nr_groups++;
+        st->total_weight += cfqg->weight;
 }
 static void
@@ -794,6 +814,8 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
                return;
        cfqg->on_st = false;
+        cfqd->nr_groups--;
+        st->total_weight -= cfqg->weight;
        if (!RB_EMPTY_NODE(&cfqg->rb_node))
                cfq_rb_erase(&cfqg->rb_node, st);
 }
@@ -1639,6 +1661,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
        unsigned slice;
        unsigned count;
        struct cfq_rb_root *st;
+        unsigned group_slice;
        if (!cfqg) {
                cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1647,9 +1670,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
        }
        /* Choose next priority. RT > BE > IDLE */
-        if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
+        if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
                cfqd->serving_prio = RT_WORKLOAD;
-        else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
+        else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
                cfqd->serving_prio = BE_WORKLOAD;
        else {
                cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1687,9 +1710,11 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
         * proportional to the number of queues in that workload, over
         * all the queues in the same priority class
         */
-        slice = cfq_target_latency * count /
+        group_slice = cfq_group_slice(cfqd, cfqg);
-                max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
-                      cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
+        slice = group_slice * count /
+                max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
+                      cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
        if (cfqd->serving_type == ASYNC_WORKLOAD)
                /* async workload slice is scaled down according to
author	Vivek Goyal <vgoyal@redhat.com>	2009-12-03 12:59:44 -0500
committer	Jens Axboe <jens.axboe@oracle.com>	2009-12-03 13:28:52 -0500
commit	58ff82f34cded3812af5b6c69b6aa626b6be2490 (patch)
tree	06098474c5763f20d1c9715faf67c83c56b9a787 /block
parent	25bc6b07767fe77422312eda2af99c9477f76191 (diff)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 019f28eea9df..84887e2eb210 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c
@@ -82,6 +82,7 @@ struct cfq_rb_root {
82	unsigned count;	82	unsigned count;
83	u64 min_vdisktime;	83	u64 min_vdisktime;
84	struct rb_node *active;	84	struct rb_node *active;
		85	unsigned total_weight;
85	};	86	};
86	#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }	87	#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
87		88
@@ -172,6 +173,8 @@ struct cfq_group {
172	/* number of cfqq currently on this group */	173	/* number of cfqq currently on this group */
173	int nr_cfqq;	174	int nr_cfqq;
174		175
		176	/* Per group busy queus average. Useful for workload slice calc. */
		177	unsigned int busy_queues_avg[2];
175	/*	178	/*
176	* rr lists of queues with requests, onle rr for each priority class.	179	* rr lists of queues with requests, onle rr for each priority class.
177	* Counts are embedded in the cfq_rb_root	180	* Counts are embedded in the cfq_rb_root
@@ -188,6 +191,8 @@ struct cfq_data {
188	/* Root service tree for cfq_groups */	191	/* Root service tree for cfq_groups */
189	struct cfq_rb_root grp_service_tree;	192	struct cfq_rb_root grp_service_tree;
190	struct cfq_group root_group;	193	struct cfq_group root_group;
		194	/* Number of active cfq groups on group service tree */
		195	int nr_groups;
191		196
192	/*	197	/*
193	* The priority currently being served	198	* The priority currently being served
@@ -206,7 +211,6 @@ struct cfq_data {
206	struct rb_root prio_trees[CFQ_PRIO_LISTS];	211	struct rb_root prio_trees[CFQ_PRIO_LISTS];
207		212
208	unsigned int busy_queues;	213	unsigned int busy_queues;
209	unsigned int busy_queues_avg[2];
210		214
211	int rq_in_driver[2];	215	int rq_in_driver[2];
212	int sync_flight;	216	int sync_flight;
@@ -354,10 +358,10 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
354	return SYNC_WORKLOAD;	358	return SYNC_WORKLOAD;
355	}	359	}
356		360
357	static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)	361	static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
		362	struct cfq_data *cfqd,
		363	struct cfq_group *cfqg)
358	{	364	{
359	struct cfq_group *cfqg = &cfqd->root_group;
360
361	if (wl == IDLE_WORKLOAD)	365	if (wl == IDLE_WORKLOAD)
362	return cfqg->service_tree_idle.count;	366	return cfqg->service_tree_idle.count;
363		367
@@ -489,18 +493,27 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
489	* to quickly follows sudden increases and decrease slowly	493	* to quickly follows sudden increases and decrease slowly
490	*/	494	*/
491		495
492	static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt)	496	static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
		497	struct cfq_group *cfqg, bool rt)
493	{	498	{
494	unsigned min_q, max_q;	499	unsigned min_q, max_q;
495	unsigned mult = cfq_hist_divisor - 1;	500	unsigned mult = cfq_hist_divisor - 1;
496	unsigned round = cfq_hist_divisor / 2;	501	unsigned round = cfq_hist_divisor / 2;
497	unsigned busy = cfq_busy_queues_wl(rt, cfqd);	502	unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
498		503
499	min_q = min(cfqd->busy_queues_avg[rt], busy);	504	min_q = min(cfqg->busy_queues_avg[rt], busy);
500	max_q = max(cfqd->busy_queues_avg[rt], busy);	505	max_q = max(cfqg->busy_queues_avg[rt], busy);
501	cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /	506	cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
502	cfq_hist_divisor;	507	cfq_hist_divisor;
503	return cfqd->busy_queues_avg[rt];	508	return cfqg->busy_queues_avg[rt];
		509	}
		510
		511	static inline unsigned
		512	cfq_group_slice(struct cfq_data cfqd, struct cfq_group cfqg)
		513	{
		514	struct cfq_rb_root *st = &cfqd->grp_service_tree;
		515
		516	return cfq_target_latency * cfqg->weight / st->total_weight;
504	}	517	}
505		518
506	static inline void	519	static inline void
@@ -508,12 +521,17 @@ cfq_set_prio_slice(struct cfq_data cfqd, struct cfq_queue cfqq)
508	{	521	{
509	unsigned slice = cfq_prio_to_slice(cfqd, cfqq);	522	unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
510	if (cfqd->cfq_latency) {	523	if (cfqd->cfq_latency) {
511	/* interested queues (we consider only the ones with the same	524	/*
512	* priority class) */	525	* interested queues (we consider only the ones with the same
513	unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq));	526	* priority class in the cfq group)
		527	*/
		528	unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
		529	cfq_class_rt(cfqq));
514	unsigned sync_slice = cfqd->cfq_slice[1];	530	unsigned sync_slice = cfqd->cfq_slice[1];
515	unsigned expect_latency = sync_slice * iq;	531	unsigned expect_latency = sync_slice * iq;
516	if (expect_latency > cfq_target_latency) {	532	unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
		533
		534	if (expect_latency > group_slice) {
517	unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;	535	unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
518	/* scale low_slice according to IO priority	536	/* scale low_slice according to IO priority
519	* and sync vs async */	537	* and sync vs async */
@@ -521,7 +539,7 @@ cfq_set_prio_slice(struct cfq_data cfqd, struct cfq_queue cfqq)
521	min(slice, base_low_slice * slice / sync_slice);	539	min(slice, base_low_slice * slice / sync_slice);
522	/* the adapted slice value is scaled to fit all iqs	540	/* the adapted slice value is scaled to fit all iqs
523	* into the target latency */	541	* into the target latency */
524	slice = max(slice * cfq_target_latency / expect_latency,	542	slice = max(slice * group_slice / expect_latency,
525	low_slice);	543	low_slice);
526	}	544	}
527	}	545	}
@@ -776,6 +794,8 @@ cfq_group_service_tree_add(struct cfq_data cfqd, struct cfq_group cfqg)
776		794
777	__cfq_group_service_tree_add(st, cfqg);	795	__cfq_group_service_tree_add(st, cfqg);
778	cfqg->on_st = true;	796	cfqg->on_st = true;
		797	cfqd->nr_groups++;
		798	st->total_weight += cfqg->weight;
779	}	799	}
780		800
781	static void	801	static void
@@ -794,6 +814,8 @@ cfq_group_service_tree_del(struct cfq_data cfqd, struct cfq_group cfqg)
794	return;	814	return;
795		815
796	cfqg->on_st = false;	816	cfqg->on_st = false;
		817	cfqd->nr_groups--;
		818	st->total_weight -= cfqg->weight;
797	if (!RB_EMPTY_NODE(&cfqg->rb_node))	819	if (!RB_EMPTY_NODE(&cfqg->rb_node))
798	cfq_rb_erase(&cfqg->rb_node, st);	820	cfq_rb_erase(&cfqg->rb_node, st);
799	}	821	}
@@ -1639,6 +1661,7 @@ static void choose_service_tree(struct cfq_data cfqd, struct cfq_group cfqg)
1639	unsigned slice;	1661	unsigned slice;
1640	unsigned count;	1662	unsigned count;
1641	struct cfq_rb_root *st;	1663	struct cfq_rb_root *st;
		1664	unsigned group_slice;
1642		1665
1643	if (!cfqg) {	1666	if (!cfqg) {
1644	cfqd->serving_prio = IDLE_WORKLOAD;	1667	cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1647,9 +1670,9 @@ static void choose_service_tree(struct cfq_data cfqd, struct cfq_group cfqg)
1647	}	1670	}
1648		1671
1649	/* Choose next priority. RT > BE > IDLE */	1672	/* Choose next priority. RT > BE > IDLE */
1650	if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))	1673	if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
1651	cfqd->serving_prio = RT_WORKLOAD;	1674	cfqd->serving_prio = RT_WORKLOAD;
1652	else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))	1675	else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
1653	cfqd->serving_prio = BE_WORKLOAD;	1676	cfqd->serving_prio = BE_WORKLOAD;
1654	else {	1677	else {
1655	cfqd->serving_prio = IDLE_WORKLOAD;	1678	cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1687,9 +1710,11 @@ static void choose_service_tree(struct cfq_data cfqd, struct cfq_group cfqg)
1687	* proportional to the number of queues in that workload, over	1710	* proportional to the number of queues in that workload, over
1688	* all the queues in the same priority class	1711	* all the queues in the same priority class
1689	*/	1712	*/
1690	slice = cfq_target_latency * count /	1713	group_slice = cfq_group_slice(cfqd, cfqg);
1691	max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],	1714
1692	cfq_busy_queues_wl(cfqd->serving_prio, cfqd));	1715	slice = group_slice * count /
		1716	max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
		1717	cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
1693		1718
1694	if (cfqd->serving_type == ASYNC_WORKLOAD)	1719	if (cfqd->serving_type == ASYNC_WORKLOAD)
1695	/* async workload slice is scaled down according to	1720	/* async workload slice is scaled down according to