cfq-iosched: fairness for sync no-idle queues

Currently no-idle queues in cfq are not serviced fairly: even if they can only dispatch a small number of requests at a time, they have to compete with idling queues to be serviced, experiencing large latencies. We should notice, instead, that no-idle queues are the ones that would benefit most from having low latency, in fact they are any of: * processes with large think times (e.g. interactive ones like file managers) * seeky (e.g. programs faulting in their code at startup) * or marked as no-idle from upper levels, to improve latencies of those requests. This patch improves the fairness and latency for those queues, by: * separating sync idle, sync no-idle and async queues in separate service_trees, for each priority * service all no-idle queues together * and idling when the last no-idle queue has been serviced, to anticipate for more no-idle work * the timeslices allotted for idle and no-idle service_trees are computed proportionally to the number of processes in each set. Servicing all no-idle queues together should have a performance boost for NCQ-capable drives, without compromising fairness. Signed-off-by: Corrado Zoccolo <czoccolo@gmail.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
author: Corrado Zoccolo <czoccolo@gmail.com> 2009-10-26 17:45:29 -0400
committer: Jens Axboe <jens.axboe@oracle.com> 2009-10-28 04:23:26 -0400
commit: 718eee0579b802aabe3bafacf09d0a9b0830f1dd (patch)
tree: 3a85a6d38ed7b68ed6ca21d04158afee13980e5e /block/cfq-iosched.c
parent: a6d44e982d3734583b3b4e1d36921af8cfd61fc0 (diff)
1 files changed, 168 insertions, 32 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 76afa3696894..859f534ae9ef 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -134,7 +134,7 @@ struct cfq_queue {
 };
 /*
- * Index in the service_trees.
+ * First index in the service_trees.
 * IDLE is handled separately, so it has negative index
 */
 enum wl_prio_t {
@@ -144,6 +144,16 @@ enum wl_prio_t {
 };
 /*
+ * Second index in the service_trees.
+ */
+enum wl_type_t {
+        ASYNC_WORKLOAD = 0,
+        SYNC_NOIDLE_WORKLOAD = 1,
+        SYNC_WORKLOAD = 2
+};
+/*
 * Per block device queue structure
 */
 struct cfq_data {
@@ -153,12 +163,14 @@ struct cfq_data {
         * rr lists of queues with requests, onle rr for each priority class.
         * Counts are embedded in the cfq_rb_root
         */
-        struct cfq_rb_root service_trees[2];
+        struct cfq_rb_root service_trees[2][3];
        struct cfq_rb_root service_tree_idle;
        /*
         * The priority currently being served
         */
        enum wl_prio_t serving_prio;
+        enum wl_type_t serving_type;
+        unsigned long workload_expires;
        /*
         * Each priority tree is sorted by next_request position.  These
@@ -221,12 +233,13 @@ struct cfq_data {
 };
 static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio,
+                                            enum wl_type_t type,
                                            struct cfq_data *cfqd)
 {
        if (prio == IDLE_WORKLOAD)
                return &cfqd->service_tree_idle;
-        return &cfqd->service_trees[prio];
+        return &cfqd->service_trees[prio][type];
 }
 enum cfqq_state_flags {
@@ -282,12 +295,24 @@ static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
        return BE_WORKLOAD;
 }
+static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
+{
+        if (!cfq_cfqq_sync(cfqq))
+                return ASYNC_WORKLOAD;
+        if (!cfq_cfqq_idle_window(cfqq))
+                return SYNC_NOIDLE_WORKLOAD;
+        return SYNC_WORKLOAD;
+}
 static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
 {
        if (wl == IDLE_WORKLOAD)
                return cfqd->service_tree_idle.count;
-        return cfqd->service_trees[wl].count;
+        return cfqd->service_trees[wl][ASYNC_WORKLOAD].count
+                + cfqd->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
+                + cfqd->service_trees[wl][SYNC_WORKLOAD].count;
 }
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
@@ -597,7 +622,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct cfq_rb_root *service_tree;
        int left;
-        service_tree = service_tree_for(cfqq_prio(cfqq), cfqd);
+        service_tree = service_tree_for(cfqq_prio(cfqq), cfqq_type(cfqq), cfqd);
        if (cfq_class_idle(cfqq)) {
                rb_key = CFQ_IDLE_DELAY;
                parent = rb_last(&service_tree->rb);
@@ -1030,7 +1055,7 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
 {
        struct cfq_rb_root *service_tree =
-                service_tree_for(cfqd->serving_prio, cfqd);
+                service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd);
        if (RB_EMPTY_ROOT(&service_tree->rb))
                return NULL;
@@ -1167,7 +1192,7 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
 static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        enum wl_prio_t prio = cfqq_prio(cfqq);
-        struct cfq_rb_root *service_tree;
+        struct cfq_rb_root *service_tree = cfqq->service_tree;
        /* We never do for idle class queues. */
        if (prio == IDLE_WORKLOAD)
@@ -1181,7 +1206,9 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * Otherwise, we do only if they are the last ones
         * in their service tree.
         */
-        service_tree = service_tree_for(prio, cfqd);
+        if (!service_tree)
+                service_tree = service_tree_for(prio, cfqq_type(cfqq), cfqd);
        if (service_tree->count == 0)
                return true;
@@ -1235,14 +1262,20 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        cfq_mark_cfqq_wait_request(cfqq);
-        /*
-         * we don't want to idle for seeks, but we do want to allow
-         * fair distribution of slice time for a process doing back-to-back
-         * seeks. so allow a little bit of time for him to submit a new rq
-         */
        sl = cfqd->cfq_slice_idle;
-        if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))
+        /* are we servicing noidle tree, and there are more queues?
+         * non-rotational or NCQ: no idle
+         * non-NCQ rotational : very small idle, to allow
+         *     fair distribution of slice time for a process doing back-to-back
+         *     seeks.
+         */
+        if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
+            service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WORKLOAD, cfqd)
+                ->count > 0) {
+                if (blk_queue_nonrot(cfqd->queue) || cfqd->hw_tag)
+                        return;
                sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
+        }
        mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
        cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
@@ -1346,6 +1379,106 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
        }
 }
+static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio,
+                                    bool prio_changed)
+{
+        struct cfq_queue *queue;
+        int i;
+        bool key_valid = false;
+        unsigned long lowest_key = 0;
+        enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
+        if (prio_changed) {
+                /*
+                 * When priorities switched, we prefer starting
+                 * from SYNC_NOIDLE (first choice), or just SYNC
+                 * over ASYNC
+                 */
+                if (service_tree_for(prio, cur_best, cfqd)->count)
+                        return cur_best;
+                cur_best = SYNC_WORKLOAD;
+                if (service_tree_for(prio, cur_best, cfqd)->count)
+                        return cur_best;
+                return ASYNC_WORKLOAD;
+        }
+        for (i = 0; i < 3; ++i) {
+                /* otherwise, select the one with lowest rb_key */
+                queue = cfq_rb_first(service_tree_for(prio, i, cfqd));
+                if (queue &&
+                    (!key_valid || time_before(queue->rb_key, lowest_key))) {
+                        lowest_key = queue->rb_key;
+                        cur_best = i;
+                        key_valid = true;
+                }
+        }
+        return cur_best;
+}
+static void choose_service_tree(struct cfq_data *cfqd)
+{
+        enum wl_prio_t previous_prio = cfqd->serving_prio;
+        bool prio_changed;
+        unsigned slice;
+        unsigned count;
+        /* Choose next priority. RT > BE > IDLE */
+        if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
+                cfqd->serving_prio = RT_WORKLOAD;
+        else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
+                cfqd->serving_prio = BE_WORKLOAD;
+        else {
+                cfqd->serving_prio = IDLE_WORKLOAD;
+                cfqd->workload_expires = jiffies + 1;
+                return;
+        }
+        /*
+         * For RT and BE, we have to choose also the type
+         * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
+         * expiration time
+         */
+        prio_changed = (cfqd->serving_prio != previous_prio);
+        count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
+                ->count;
+        /*
+         * If priority didn't change, check workload expiration,
+         * and that we still have other queues ready
+         */
+        if (!prio_changed && count &&
+            !time_after(jiffies, cfqd->workload_expires))
+                return;
+        /* otherwise select new workload type */
+        cfqd->serving_type =
+                cfq_choose_wl(cfqd, cfqd->serving_prio, prio_changed);
+        count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
+                ->count;
+        /*
+         * the workload slice is computed as a fraction of target latency
+         * proportional to the number of queues in that workload, over
+         * all the queues in the same priority class
+         */
+        slice = cfq_target_latency * count /
+                max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
+                      cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
+        if (cfqd->serving_type == ASYNC_WORKLOAD)
+                /* async workload slice is scaled down according to
+                 * the sync/async slice ratio. */
+                slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
+        else
+                /* sync workload slice is at least 2 * cfq_slice_idle */
+                slice = max(slice, 2 * cfqd->cfq_slice_idle);
+        slice = max_t(unsigned, slice, CFQ_MIN_TT);
+        cfqd->workload_expires = jiffies + slice;
+}
 /*
 * Select a queue for service. If we have a current active queue,
 * check whether to continue servicing it, or retrieve and set a new one.
@@ -1398,14 +1531,13 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 expire:
        cfq_slice_expired(cfqd, 0);
 new_queue:
-        if (!new_cfqq) {
+        /*
-                if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
+         * Current queue expired. Check if we have to switch to a new
-                        cfqd->serving_prio = RT_WORKLOAD;
+         * service tree
-                else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
+         */
-                        cfqd->serving_prio = BE_WORKLOAD;
+        if (!new_cfqq)
-                else
+                choose_service_tree(cfqd);
-                        cfqd->serving_prio = IDLE_WORKLOAD;
-        }
        cfqq = cfq_set_active_queue(cfqd, new_cfqq);
 keep_queue:
        return cfqq;
@@ -1432,10 +1564,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
 {
        struct cfq_queue *cfqq;
        int dispatched = 0;
-        int i;
+        int i, j;
        for (i = 0; i < 2; ++i)
-                while ((cfqq = cfq_rb_first(&cfqd->service_trees[i])) != NULL)
+                for (j = 0; j < 3; ++j)
-                        dispatched += __cfq_forced_dispatch_cfqq(cfqq);
+                        while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j]))
+                                != NULL)
+                                dispatched += __cfq_forced_dispatch_cfqq(cfqq);
        while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL)
                dispatched += __cfq_forced_dispatch_cfqq(cfqq);
@@ -2218,13 +2352,10 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
        if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-            (!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq)))
+            (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)))
                enable_idle = 0;
        else if (sample_valid(cic->ttime_samples)) {
-                unsigned int slice_idle = cfqd->cfq_slice_idle;
+                if (cic->ttime_mean > cfqd->cfq_slice_idle)
-                if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))
-                        slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
-                if (cic->ttime_mean > slice_idle)
                        enable_idle = 0;
                else
                        enable_idle = 1;
@@ -2262,6 +2393,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
        if (cfq_class_idle(cfqq))
                return true;
+        if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD
+            && new_cfqq->service_tree == cfqq->service_tree)
+                return true;
        /*
         * if the new request is sync, but the currently running queue is
         * not, let the sync request have priority.
@@ -2778,14 +2913,15 @@ static void cfq_exit_queue(struct elevator_queue *e)
 static void *cfq_init_queue(struct request_queue *q)
 {
        struct cfq_data *cfqd;
-        int i;
+        int i, j;
        cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
        if (!cfqd)
                return NULL;
        for (i = 0; i < 2; ++i)
-                cfqd->service_trees[i] = CFQ_RB_ROOT;
+                for (j = 0; j < 3; ++j)
+                        cfqd->service_trees[i][j] = CFQ_RB_ROOT;
        cfqd->service_tree_idle = CFQ_RB_ROOT;
        /*
author	Corrado Zoccolo <czoccolo@gmail.com>	2009-10-26 17:45:29 -0400
committer	Jens Axboe <jens.axboe@oracle.com>	2009-10-28 04:23:26 -0400
commit	718eee0579b802aabe3bafacf09d0a9b0830f1dd (patch)
tree	3a85a6d38ed7b68ed6ca21d04158afee13980e5e /block/cfq-iosched.c
parent	a6d44e982d3734583b3b4e1d36921af8cfd61fc0 (diff)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 76afa3696894..859f534ae9ef 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c
@@ -134,7 +134,7 @@ struct cfq_queue {
134	};	134	};
135		135
136	/*	136	/*
137	* Index in the service_trees.	137	* First index in the service_trees.
138	* IDLE is handled separately, so it has negative index	138	* IDLE is handled separately, so it has negative index
139	*/	139	*/
140	enum wl_prio_t {	140	enum wl_prio_t {
@@ -144,6 +144,16 @@ enum wl_prio_t {
144	};	144	};
145		145
146	/*	146	/*
		147	* Second index in the service_trees.
		148	*/
		149	enum wl_type_t {
		150	ASYNC_WORKLOAD = 0,
		151	SYNC_NOIDLE_WORKLOAD = 1,
		152	SYNC_WORKLOAD = 2
		153	};
		154
		155
		156	/*
147	* Per block device queue structure	157	* Per block device queue structure
148	*/	158	*/
149	struct cfq_data {	159	struct cfq_data {
@@ -153,12 +163,14 @@ struct cfq_data {
153	* rr lists of queues with requests, onle rr for each priority class.	163	* rr lists of queues with requests, onle rr for each priority class.
154	* Counts are embedded in the cfq_rb_root	164	* Counts are embedded in the cfq_rb_root
155	*/	165	*/
156	struct cfq_rb_root service_trees[2];	166	struct cfq_rb_root service_trees[2][3];
157	struct cfq_rb_root service_tree_idle;	167	struct cfq_rb_root service_tree_idle;
158	/*	168	/*
159	* The priority currently being served	169	* The priority currently being served
160	*/	170	*/
161	enum wl_prio_t serving_prio;	171	enum wl_prio_t serving_prio;
		172	enum wl_type_t serving_type;
		173	unsigned long workload_expires;
162		174
163	/*	175	/*
164	* Each priority tree is sorted by next_request position. These	176	* Each priority tree is sorted by next_request position. These
@@ -221,12 +233,13 @@ struct cfq_data {
221	};	233	};
222		234
223	static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio,	235	static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio,
		236	enum wl_type_t type,
224	struct cfq_data *cfqd)	237	struct cfq_data *cfqd)
225	{	238	{
226	if (prio == IDLE_WORKLOAD)	239	if (prio == IDLE_WORKLOAD)
227	return &cfqd->service_tree_idle;	240	return &cfqd->service_tree_idle;
228		241
229	return &cfqd->service_trees[prio];	242	return &cfqd->service_trees[prio][type];
230	}	243	}
231		244
232	enum cfqq_state_flags {	245	enum cfqq_state_flags {
@@ -282,12 +295,24 @@ static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
282	return BE_WORKLOAD;	295	return BE_WORKLOAD;
283	}	296	}
284		297
		298
		299	static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
		300	{
		301	if (!cfq_cfqq_sync(cfqq))
		302	return ASYNC_WORKLOAD;
		303	if (!cfq_cfqq_idle_window(cfqq))
		304	return SYNC_NOIDLE_WORKLOAD;
		305	return SYNC_WORKLOAD;
		306	}
		307
285	static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)	308	static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
286	{	309	{
287	if (wl == IDLE_WORKLOAD)	310	if (wl == IDLE_WORKLOAD)
288	return cfqd->service_tree_idle.count;	311	return cfqd->service_tree_idle.count;
289		312
290	return cfqd->service_trees[wl].count;	313	return cfqd->service_trees[wl][ASYNC_WORKLOAD].count
		314	+ cfqd->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
		315	+ cfqd->service_trees[wl][SYNC_WORKLOAD].count;
291	}	316	}
292		317
293	static void cfq_dispatch_insert(struct request_queue , struct request );	318	static void cfq_dispatch_insert(struct request_queue , struct request );
@@ -597,7 +622,7 @@ static void cfq_service_tree_add(struct cfq_data cfqd, struct cfq_queue cfqq,
597	struct cfq_rb_root *service_tree;	622	struct cfq_rb_root *service_tree;
598	int left;	623	int left;
599		624
600	service_tree = service_tree_for(cfqq_prio(cfqq), cfqd);	625	service_tree = service_tree_for(cfqq_prio(cfqq), cfqq_type(cfqq), cfqd);
601	if (cfq_class_idle(cfqq)) {	626	if (cfq_class_idle(cfqq)) {
602	rb_key = CFQ_IDLE_DELAY;	627	rb_key = CFQ_IDLE_DELAY;
603	parent = rb_last(&service_tree->rb);	628	parent = rb_last(&service_tree->rb);
@@ -1030,7 +1055,7 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
1030	static struct cfq_queue cfq_get_next_queue(struct cfq_data cfqd)	1055	static struct cfq_queue cfq_get_next_queue(struct cfq_data cfqd)
1031	{	1056	{
1032	struct cfq_rb_root *service_tree =	1057	struct cfq_rb_root *service_tree =
1033	service_tree_for(cfqd->serving_prio, cfqd);	1058	service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd);
1034		1059
1035	if (RB_EMPTY_ROOT(&service_tree->rb))	1060	if (RB_EMPTY_ROOT(&service_tree->rb))
1036	return NULL;	1061	return NULL;
@@ -1167,7 +1192,7 @@ static struct cfq_queue cfq_close_cooperator(struct cfq_data cfqd,
1167	static bool cfq_should_idle(struct cfq_data cfqd, struct cfq_queue cfqq)	1192	static bool cfq_should_idle(struct cfq_data cfqd, struct cfq_queue cfqq)
1168	{	1193	{
1169	enum wl_prio_t prio = cfqq_prio(cfqq);	1194	enum wl_prio_t prio = cfqq_prio(cfqq);
1170	struct cfq_rb_root *service_tree;	1195	struct cfq_rb_root *service_tree = cfqq->service_tree;
1171		1196
1172	/* We never do for idle class queues. */	1197	/* We never do for idle class queues. */
1173	if (prio == IDLE_WORKLOAD)	1198	if (prio == IDLE_WORKLOAD)
@@ -1181,7 +1206,9 @@ static bool cfq_should_idle(struct cfq_data cfqd, struct cfq_queue cfqq)
1181	* Otherwise, we do only if they are the last ones	1206	* Otherwise, we do only if they are the last ones
1182	* in their service tree.	1207	* in their service tree.
1183	*/	1208	*/
1184	service_tree = service_tree_for(prio, cfqd);	1209	if (!service_tree)
		1210	service_tree = service_tree_for(prio, cfqq_type(cfqq), cfqd);
		1211
1185	if (service_tree->count == 0)	1212	if (service_tree->count == 0)
1186	return true;	1213	return true;
1187		1214
@@ -1235,14 +1262,20 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1235		1262
1236	cfq_mark_cfqq_wait_request(cfqq);	1263	cfq_mark_cfqq_wait_request(cfqq);
1237		1264
1238	/*
1239	* we don't want to idle for seeks, but we do want to allow
1240	* fair distribution of slice time for a process doing back-to-back
1241	* seeks. so allow a little bit of time for him to submit a new rq
1242	*/
1243	sl = cfqd->cfq_slice_idle;	1265	sl = cfqd->cfq_slice_idle;
1244	if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))	1266	/* are we servicing noidle tree, and there are more queues?
		1267	* non-rotational or NCQ: no idle
		1268	* non-NCQ rotational : very small idle, to allow
		1269	* fair distribution of slice time for a process doing back-to-back
		1270	* seeks.
		1271	*/
		1272	if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
		1273	service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WORKLOAD, cfqd)
		1274	->count > 0) {
		1275	if (blk_queue_nonrot(cfqd->queue) \|\| cfqd->hw_tag)
		1276	return;
1245	sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));	1277	sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
		1278	}
1246		1279
1247	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);	1280	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
1248	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);	1281	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
@@ -1346,6 +1379,106 @@ static void cfq_setup_merge(struct cfq_queue cfqq, struct cfq_queue new_cfqq)
1346	}	1379	}
1347	}	1380	}
1348		1381
		1382	static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio,
		1383	bool prio_changed)
		1384	{
		1385	struct cfq_queue *queue;
		1386	int i;
		1387	bool key_valid = false;
		1388	unsigned long lowest_key = 0;
		1389	enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
		1390
		1391	if (prio_changed) {
		1392	/*
		1393	* When priorities switched, we prefer starting
		1394	* from SYNC_NOIDLE (first choice), or just SYNC
		1395	* over ASYNC
		1396	*/
		1397	if (service_tree_for(prio, cur_best, cfqd)->count)
		1398	return cur_best;
		1399	cur_best = SYNC_WORKLOAD;
		1400	if (service_tree_for(prio, cur_best, cfqd)->count)
		1401	return cur_best;
		1402
		1403	return ASYNC_WORKLOAD;
		1404	}
		1405
		1406	for (i = 0; i < 3; ++i) {
		1407	/* otherwise, select the one with lowest rb_key */
		1408	queue = cfq_rb_first(service_tree_for(prio, i, cfqd));
		1409	if (queue &&
		1410	(!key_valid \|\| time_before(queue->rb_key, lowest_key))) {
		1411	lowest_key = queue->rb_key;
		1412	cur_best = i;
		1413	key_valid = true;
		1414	}
		1415	}
		1416
		1417	return cur_best;
		1418	}
		1419
		1420	static void choose_service_tree(struct cfq_data *cfqd)
		1421	{
		1422	enum wl_prio_t previous_prio = cfqd->serving_prio;
		1423	bool prio_changed;
		1424	unsigned slice;
		1425	unsigned count;
		1426
		1427	/* Choose next priority. RT > BE > IDLE */
		1428	if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
		1429	cfqd->serving_prio = RT_WORKLOAD;
		1430	else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
		1431	cfqd->serving_prio = BE_WORKLOAD;
		1432	else {
		1433	cfqd->serving_prio = IDLE_WORKLOAD;
		1434	cfqd->workload_expires = jiffies + 1;
		1435	return;
		1436	}
		1437
		1438	/*
		1439	* For RT and BE, we have to choose also the type
		1440	* (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
		1441	* expiration time
		1442	*/
		1443	prio_changed = (cfqd->serving_prio != previous_prio);
		1444	count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
		1445	->count;
		1446
		1447	/*
		1448	* If priority didn't change, check workload expiration,
		1449	* and that we still have other queues ready
		1450	*/
		1451	if (!prio_changed && count &&
		1452	!time_after(jiffies, cfqd->workload_expires))
		1453	return;
		1454
		1455	/* otherwise select new workload type */
		1456	cfqd->serving_type =
		1457	cfq_choose_wl(cfqd, cfqd->serving_prio, prio_changed);
		1458	count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
		1459	->count;
		1460
		1461	/*
		1462	* the workload slice is computed as a fraction of target latency
		1463	* proportional to the number of queues in that workload, over
		1464	* all the queues in the same priority class
		1465	*/
		1466	slice = cfq_target_latency * count /
		1467	max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
		1468	cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
		1469
		1470	if (cfqd->serving_type == ASYNC_WORKLOAD)
		1471	/* async workload slice is scaled down according to
		1472	* the sync/async slice ratio. */
		1473	slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
		1474	else
		1475	/* sync workload slice is at least 2 * cfq_slice_idle */
		1476	slice = max(slice, 2 * cfqd->cfq_slice_idle);
		1477
		1478	slice = max_t(unsigned, slice, CFQ_MIN_TT);
		1479	cfqd->workload_expires = jiffies + slice;
		1480	}
		1481
1349	/*	1482	/*
1350	* Select a queue for service. If we have a current active queue,	1483	* Select a queue for service. If we have a current active queue,
1351	* check whether to continue servicing it, or retrieve and set a new one.	1484	* check whether to continue servicing it, or retrieve and set a new one.
@@ -1398,14 +1531,13 @@ static struct cfq_queue cfq_select_queue(struct cfq_data cfqd)
1398	expire:	1531	expire:
1399	cfq_slice_expired(cfqd, 0);	1532	cfq_slice_expired(cfqd, 0);
1400	new_queue:	1533	new_queue:
1401	if (!new_cfqq) {	1534	/*
1402	if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))	1535	* Current queue expired. Check if we have to switch to a new
1403	cfqd->serving_prio = RT_WORKLOAD;	1536	* service tree
1404	else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))	1537	*/
1405	cfqd->serving_prio = BE_WORKLOAD;	1538	if (!new_cfqq)
1406	else	1539	choose_service_tree(cfqd);
1407	cfqd->serving_prio = IDLE_WORKLOAD;	1540
1408	}
1409	cfqq = cfq_set_active_queue(cfqd, new_cfqq);	1541	cfqq = cfq_set_active_queue(cfqd, new_cfqq);
1410	keep_queue:	1542	keep_queue:
1411	return cfqq;	1543	return cfqq;
@@ -1432,10 +1564,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
1432	{	1564	{
1433	struct cfq_queue *cfqq;	1565	struct cfq_queue *cfqq;
1434	int dispatched = 0;	1566	int dispatched = 0;
1435	int i;	1567	int i, j;
1436	for (i = 0; i < 2; ++i)	1568	for (i = 0; i < 2; ++i)
1437	while ((cfqq = cfq_rb_first(&cfqd->service_trees[i])) != NULL)	1569	for (j = 0; j < 3; ++j)
1438	dispatched += __cfq_forced_dispatch_cfqq(cfqq);	1570	while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j]))
		1571	!= NULL)
		1572	dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1439		1573
1440	while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL)	1574	while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL)
1441	dispatched += __cfq_forced_dispatch_cfqq(cfqq);	1575	dispatched += __cfq_forced_dispatch_cfqq(cfqq);
@@ -2218,13 +2352,10 @@ cfq_update_idle_window(struct cfq_data cfqd, struct cfq_queue cfqq,
2218	enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);	2352	enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
2219		2353
2220	if (!atomic_read(&cic->ioc->nr_tasks) \|\| !cfqd->cfq_slice_idle \|\|	2354	if (!atomic_read(&cic->ioc->nr_tasks) \|\| !cfqd->cfq_slice_idle \|\|
2221	(!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq)))	2355	(sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)))
2222	enable_idle = 0;	2356	enable_idle = 0;
2223	else if (sample_valid(cic->ttime_samples)) {	2357	else if (sample_valid(cic->ttime_samples)) {
2224	unsigned int slice_idle = cfqd->cfq_slice_idle;	2358	if (cic->ttime_mean > cfqd->cfq_slice_idle)
2225	if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))
2226	slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
2227	if (cic->ttime_mean > slice_idle)
2228	enable_idle = 0;	2359	enable_idle = 0;
2229	else	2360	else
2230	enable_idle = 1;	2361	enable_idle = 1;
@@ -2262,6 +2393,10 @@ cfq_should_preempt(struct cfq_data cfqd, struct cfq_queue new_cfqq,
2262	if (cfq_class_idle(cfqq))	2393	if (cfq_class_idle(cfqq))
2263	return true;	2394	return true;
2264		2395
		2396	if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD
		2397	&& new_cfqq->service_tree == cfqq->service_tree)
		2398	return true;
		2399
2265	/*	2400	/*
2266	* if the new request is sync, but the currently running queue is	2401	* if the new request is sync, but the currently running queue is
2267	* not, let the sync request have priority.	2402	* not, let the sync request have priority.
@@ -2778,14 +2913,15 @@ static void cfq_exit_queue(struct elevator_queue *e)
2778	static void cfq_init_queue(struct request_queue q)	2913	static void cfq_init_queue(struct request_queue q)
2779	{	2914	{
2780	struct cfq_data *cfqd;	2915	struct cfq_data *cfqd;
2781	int i;	2916	int i, j;
2782		2917
2783	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL \| __GFP_ZERO, q->node);	2918	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL \| __GFP_ZERO, q->node);
2784	if (!cfqd)	2919	if (!cfqd)
2785	return NULL;	2920	return NULL;
2786		2921
2787	for (i = 0; i < 2; ++i)	2922	for (i = 0; i < 2; ++i)
2788	cfqd->service_trees[i] = CFQ_RB_ROOT;	2923	for (j = 0; j < 3; ++j)
		2924	cfqd->service_trees[i][j] = CFQ_RB_ROOT;
2789	cfqd->service_tree_idle = CFQ_RB_ROOT;	2925	cfqd->service_tree_idle = CFQ_RB_ROOT;
2790		2926
2791	/*	2927	/*