1 files changed, 45 insertions, 20 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 019f28eea9df..84887e2eb210 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -82,6 +82,7 @@ struct cfq_rb_root {
        unsigned count;
        u64 min_vdisktime;
        struct rb_node *active;
+        unsigned total_weight;
 };
 #define CFQ_RB_ROOT     (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
@@ -172,6 +173,8 @@ struct cfq_group {
        /* number of cfqq currently on this group */
        int nr_cfqq;
+        /* Per group busy queus average. Useful for workload slice calc. */
+        unsigned int busy_queues_avg[2];
        /*
         * rr lists of queues with requests, onle rr for each priority class.
         * Counts are embedded in the cfq_rb_root
@@ -188,6 +191,8 @@ struct cfq_data {
        /* Root service tree for cfq_groups */
        struct cfq_rb_root grp_service_tree;
        struct cfq_group root_group;
+        /* Number of active cfq groups on group service tree */
+        int nr_groups;
        /*
         * The priority currently being served
@@ -206,7 +211,6 @@ struct cfq_data {
        struct rb_root prio_trees[CFQ_PRIO_LISTS];
        unsigned int busy_queues;
-        unsigned int busy_queues_avg[2];
        int rq_in_driver[2];
        int sync_flight;
@@ -354,10 +358,10 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
        return SYNC_WORKLOAD;
 }
-static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
+static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
+                                        struct cfq_data *cfqd,
+                                        struct cfq_group *cfqg)
 {
-        struct cfq_group *cfqg = &cfqd->root_group;
        if (wl == IDLE_WORKLOAD)
                return cfqg->service_tree_idle.count;
@@ -489,18 +493,27 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
 * to quickly follows sudden increases and decrease slowly
 */
-static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt)
+static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
+                                        struct cfq_group *cfqg, bool rt)
 {
        unsigned min_q, max_q;
        unsigned mult  = cfq_hist_divisor - 1;
        unsigned round = cfq_hist_divisor / 2;
-        unsigned busy = cfq_busy_queues_wl(rt, cfqd);
+        unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
-        min_q = min(cfqd->busy_queues_avg[rt], busy);
+        min_q = min(cfqg->busy_queues_avg[rt], busy);
-        max_q = max(cfqd->busy_queues_avg[rt], busy);
+        max_q = max(cfqg->busy_queues_avg[rt], busy);
-        cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
+        cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
                cfq_hist_divisor;
-        return cfqd->busy_queues_avg[rt];
+        return cfqg->busy_queues_avg[rt];
+}
+static inline unsigned
+cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
+{
+        struct cfq_rb_root *st = &cfqd->grp_service_tree;
+        return cfq_target_latency * cfqg->weight / st->total_weight;
 }
 static inline void
@@ -508,12 +521,17 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
        if (cfqd->cfq_latency) {
-                /* interested queues (we consider only the ones with the same
+                /*
-                 * priority class) */
+                 * interested queues (we consider only the ones with the same
-                unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq));
+                 * priority class in the cfq group)
+                 */
+                unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
+                                                cfq_class_rt(cfqq));
                unsigned sync_slice = cfqd->cfq_slice[1];
                unsigned expect_latency = sync_slice * iq;
-                if (expect_latency > cfq_target_latency) {
+                unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
+                if (expect_latency > group_slice) {
                        unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
                        /* scale low_slice according to IO priority
                         * and sync vs async */
@@ -521,7 +539,7 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                                min(slice, base_low_slice * slice / sync_slice);
                        /* the adapted slice value is scaled to fit all iqs
                         * into the target latency */
-                        slice = max(slice * cfq_target_latency / expect_latency,
+                        slice = max(slice * group_slice / expect_latency,
                                    low_slice);
                }
        }
@@ -776,6 +794,8 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
        __cfq_group_service_tree_add(st, cfqg);
        cfqg->on_st = true;
+        cfqd->nr_groups++;
+        st->total_weight += cfqg->weight;
 }
 static void
@@ -794,6 +814,8 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
                return;
        cfqg->on_st = false;
+        cfqd->nr_groups--;
+        st->total_weight -= cfqg->weight;
        if (!RB_EMPTY_NODE(&cfqg->rb_node))
                cfq_rb_erase(&cfqg->rb_node, st);
 }
@@ -1639,6 +1661,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
        unsigned slice;
        unsigned count;
        struct cfq_rb_root *st;
+        unsigned group_slice;
        if (!cfqg) {
                cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1647,9 +1670,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
        }
        /* Choose next priority. RT > BE > IDLE */
-        if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
+        if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
                cfqd->serving_prio = RT_WORKLOAD;
-        else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
+        else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
                cfqd->serving_prio = BE_WORKLOAD;
        else {
                cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1687,9 +1710,11 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
         * proportional to the number of queues in that workload, over
         * all the queues in the same priority class
         */
-        slice = cfq_target_latency * count /
+        group_slice = cfq_group_slice(cfqd, cfqg);
-                max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
-                      cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
+        slice = group_slice * count /
+                max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
+                      cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
        if (cfqd->serving_type == ASYNC_WORKLOAD)
                /* async workload slice is scaled down according to

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 019f28eea9df..84887e2eb210 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c
@@ -82,6 +82,7 @@ struct cfq_rb_root {
82	unsigned count;	82	unsigned count;
83	u64 min_vdisktime;	83	u64 min_vdisktime;
84	struct rb_node *active;	84	struct rb_node *active;
		85	unsigned total_weight;
85	};	86	};
86	#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }	87	#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
87		88
@@ -172,6 +173,8 @@ struct cfq_group {
172	/* number of cfqq currently on this group */	173	/* number of cfqq currently on this group */
173	int nr_cfqq;	174	int nr_cfqq;
174		175
		176	/* Per group busy queus average. Useful for workload slice calc. */
		177	unsigned int busy_queues_avg[2];
175	/*	178	/*
176	* rr lists of queues with requests, onle rr for each priority class.	179	* rr lists of queues with requests, onle rr for each priority class.
177	* Counts are embedded in the cfq_rb_root	180	* Counts are embedded in the cfq_rb_root
@@ -188,6 +191,8 @@ struct cfq_data {
188	/* Root service tree for cfq_groups */	191	/* Root service tree for cfq_groups */
189	struct cfq_rb_root grp_service_tree;	192	struct cfq_rb_root grp_service_tree;
190	struct cfq_group root_group;	193	struct cfq_group root_group;
		194	/* Number of active cfq groups on group service tree */
		195	int nr_groups;
191		196
192	/*	197	/*
193	* The priority currently being served	198	* The priority currently being served
@@ -206,7 +211,6 @@ struct cfq_data {
206	struct rb_root prio_trees[CFQ_PRIO_LISTS];	211	struct rb_root prio_trees[CFQ_PRIO_LISTS];
207		212
208	unsigned int busy_queues;	213	unsigned int busy_queues;
209	unsigned int busy_queues_avg[2];
210		214
211	int rq_in_driver[2];	215	int rq_in_driver[2];
212	int sync_flight;	216	int sync_flight;
@@ -354,10 +358,10 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
354	return SYNC_WORKLOAD;	358	return SYNC_WORKLOAD;
355	}	359	}
356		360
357	static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)	361	static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
		362	struct cfq_data *cfqd,
		363	struct cfq_group *cfqg)
358	{	364	{
359	struct cfq_group *cfqg = &cfqd->root_group;
360
361	if (wl == IDLE_WORKLOAD)	365	if (wl == IDLE_WORKLOAD)
362	return cfqg->service_tree_idle.count;	366	return cfqg->service_tree_idle.count;
363		367
@@ -489,18 +493,27 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
489	* to quickly follows sudden increases and decrease slowly	493	* to quickly follows sudden increases and decrease slowly
490	*/	494	*/
491		495
492	static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt)	496	static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
		497	struct cfq_group *cfqg, bool rt)
493	{	498	{
494	unsigned min_q, max_q;	499	unsigned min_q, max_q;
495	unsigned mult = cfq_hist_divisor - 1;	500	unsigned mult = cfq_hist_divisor - 1;
496	unsigned round = cfq_hist_divisor / 2;	501	unsigned round = cfq_hist_divisor / 2;
497	unsigned busy = cfq_busy_queues_wl(rt, cfqd);	502	unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
498		503
499	min_q = min(cfqd->busy_queues_avg[rt], busy);	504	min_q = min(cfqg->busy_queues_avg[rt], busy);
500	max_q = max(cfqd->busy_queues_avg[rt], busy);	505	max_q = max(cfqg->busy_queues_avg[rt], busy);
501	cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /	506	cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
502	cfq_hist_divisor;	507	cfq_hist_divisor;
503	return cfqd->busy_queues_avg[rt];	508	return cfqg->busy_queues_avg[rt];
		509	}
		510
		511	static inline unsigned
		512	cfq_group_slice(struct cfq_data cfqd, struct cfq_group cfqg)
		513	{
		514	struct cfq_rb_root *st = &cfqd->grp_service_tree;
		515
		516	return cfq_target_latency * cfqg->weight / st->total_weight;
504	}	517	}
505		518
506	static inline void	519	static inline void
@@ -508,12 +521,17 @@ cfq_set_prio_slice(struct cfq_data cfqd, struct cfq_queue cfqq)
508	{	521	{
509	unsigned slice = cfq_prio_to_slice(cfqd, cfqq);	522	unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
510	if (cfqd->cfq_latency) {	523	if (cfqd->cfq_latency) {
511	/* interested queues (we consider only the ones with the same	524	/*
512	* priority class) */	525	* interested queues (we consider only the ones with the same
513	unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq));	526	* priority class in the cfq group)
		527	*/
		528	unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
		529	cfq_class_rt(cfqq));
514	unsigned sync_slice = cfqd->cfq_slice[1];	530	unsigned sync_slice = cfqd->cfq_slice[1];
515	unsigned expect_latency = sync_slice * iq;	531	unsigned expect_latency = sync_slice * iq;
516	if (expect_latency > cfq_target_latency) {	532	unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
		533
		534	if (expect_latency > group_slice) {
517	unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;	535	unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
518	/* scale low_slice according to IO priority	536	/* scale low_slice according to IO priority
519	* and sync vs async */	537	* and sync vs async */
@@ -521,7 +539,7 @@ cfq_set_prio_slice(struct cfq_data cfqd, struct cfq_queue cfqq)
521	min(slice, base_low_slice * slice / sync_slice);	539	min(slice, base_low_slice * slice / sync_slice);
522	/* the adapted slice value is scaled to fit all iqs	540	/* the adapted slice value is scaled to fit all iqs
523	* into the target latency */	541	* into the target latency */
524	slice = max(slice * cfq_target_latency / expect_latency,	542	slice = max(slice * group_slice / expect_latency,
525	low_slice);	543	low_slice);
526	}	544	}
527	}	545	}
@@ -776,6 +794,8 @@ cfq_group_service_tree_add(struct cfq_data cfqd, struct cfq_group cfqg)
776		794
777	__cfq_group_service_tree_add(st, cfqg);	795	__cfq_group_service_tree_add(st, cfqg);
778	cfqg->on_st = true;	796	cfqg->on_st = true;
		797	cfqd->nr_groups++;
		798	st->total_weight += cfqg->weight;
779	}	799	}
780		800
781	static void	801	static void
@@ -794,6 +814,8 @@ cfq_group_service_tree_del(struct cfq_data cfqd, struct cfq_group cfqg)
794	return;	814	return;
795		815
796	cfqg->on_st = false;	816	cfqg->on_st = false;
		817	cfqd->nr_groups--;
		818	st->total_weight -= cfqg->weight;
797	if (!RB_EMPTY_NODE(&cfqg->rb_node))	819	if (!RB_EMPTY_NODE(&cfqg->rb_node))
798	cfq_rb_erase(&cfqg->rb_node, st);	820	cfq_rb_erase(&cfqg->rb_node, st);
799	}	821	}
@@ -1639,6 +1661,7 @@ static void choose_service_tree(struct cfq_data cfqd, struct cfq_group cfqg)
1639	unsigned slice;	1661	unsigned slice;
1640	unsigned count;	1662	unsigned count;
1641	struct cfq_rb_root *st;	1663	struct cfq_rb_root *st;
		1664	unsigned group_slice;
1642		1665
1643	if (!cfqg) {	1666	if (!cfqg) {
1644	cfqd->serving_prio = IDLE_WORKLOAD;	1667	cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1647,9 +1670,9 @@ static void choose_service_tree(struct cfq_data cfqd, struct cfq_group cfqg)
1647	}	1670	}
1648		1671
1649	/* Choose next priority. RT > BE > IDLE */	1672	/* Choose next priority. RT > BE > IDLE */
1650	if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))	1673	if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
1651	cfqd->serving_prio = RT_WORKLOAD;	1674	cfqd->serving_prio = RT_WORKLOAD;
1652	else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))	1675	else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
1653	cfqd->serving_prio = BE_WORKLOAD;	1676	cfqd->serving_prio = BE_WORKLOAD;
1654	else {	1677	else {
1655	cfqd->serving_prio = IDLE_WORKLOAD;	1678	cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1687,9 +1710,11 @@ static void choose_service_tree(struct cfq_data cfqd, struct cfq_group cfqg)
1687	* proportional to the number of queues in that workload, over	1710	* proportional to the number of queues in that workload, over
1688	* all the queues in the same priority class	1711	* all the queues in the same priority class
1689	*/	1712	*/
1690	slice = cfq_target_latency * count /	1713	group_slice = cfq_group_slice(cfqd, cfqg);
1691	max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],	1714
1692	cfq_busy_queues_wl(cfqd->serving_prio, cfqd));	1715	slice = group_slice * count /
		1716	max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
		1717	cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
1693		1718
1694	if (cfqd->serving_type == ASYNC_WORKLOAD)	1719	if (cfqd->serving_type == ASYNC_WORKLOAD)
1695	/* async workload slice is scaled down according to	1720	/* async workload slice is scaled down according to