aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2009-11-03 15:12:10 -0500
committerJens Axboe <jens.axboe@oracle.com>2009-11-03 15:12:10 -0500
commit150e6c67f4bf6ab51e62defc41bd19a2eefe5709 (patch)
tree586bd5450afc4f142d78017389af260e76c19f0b /block
parent4f570f995f68ef77aae7e5a441222f59232f2d0e (diff)
parent5869619cb5b26754574375472fe54a390edf34c7 (diff)
Merge branch 'cfq-2.6.33' into for-2.6.33
Diffstat (limited to 'block')
-rw-r--r--block/cfq-iosched.c373
1 files changed, 321 insertions, 52 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 757010d8fb7a..8b5ba184cf91 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -27,6 +27,8 @@ static const int cfq_slice_sync = HZ / 10;
27static int cfq_slice_async = HZ / 25; 27static int cfq_slice_async = HZ / 25;
28static const int cfq_slice_async_rq = 2; 28static const int cfq_slice_async_rq = 2;
29static int cfq_slice_idle = HZ / 125; 29static int cfq_slice_idle = HZ / 125;
30static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
31static const int cfq_hist_divisor = 4;
30 32
31/* 33/*
32 * offset from end of service tree 34 * offset from end of service tree
@@ -73,8 +75,9 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
73struct cfq_rb_root { 75struct cfq_rb_root {
74 struct rb_root rb; 76 struct rb_root rb;
75 struct rb_node *left; 77 struct rb_node *left;
78 unsigned count;
76}; 79};
77#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } 80#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, }
78 81
79/* 82/*
80 * Per process-grouping structure 83 * Per process-grouping structure
@@ -126,19 +129,48 @@ struct cfq_queue {
126 129
127 pid_t pid; 130 pid_t pid;
128 131
132 struct cfq_rb_root *service_tree;
129 struct cfq_queue *new_cfqq; 133 struct cfq_queue *new_cfqq;
130}; 134};
131 135
132/* 136/*
137 * First index in the service_trees.
138 * IDLE is handled separately, so it has negative index
139 */
140enum wl_prio_t {
141 IDLE_WORKLOAD = -1,
142 BE_WORKLOAD = 0,
143 RT_WORKLOAD = 1
144};
145
146/*
147 * Second index in the service_trees.
148 */
149enum wl_type_t {
150 ASYNC_WORKLOAD = 0,
151 SYNC_NOIDLE_WORKLOAD = 1,
152 SYNC_WORKLOAD = 2
153};
154
155
156/*
133 * Per block device queue structure 157 * Per block device queue structure
134 */ 158 */
135struct cfq_data { 159struct cfq_data {
136 struct request_queue *queue; 160 struct request_queue *queue;
137 161
138 /* 162 /*
139 * rr list of queues with requests and the count of them 163 * rr lists of queues with requests, onle rr for each priority class.
164 * Counts are embedded in the cfq_rb_root
165 */
166 struct cfq_rb_root service_trees[2][3];
167 struct cfq_rb_root service_tree_idle;
168 /*
169 * The priority currently being served
140 */ 170 */
141 struct cfq_rb_root service_tree; 171 enum wl_prio_t serving_prio;
172 enum wl_type_t serving_type;
173 unsigned long workload_expires;
142 174
143 /* 175 /*
144 * Each priority tree is sorted by next_request position. These 176 * Each priority tree is sorted by next_request position. These
@@ -148,6 +180,7 @@ struct cfq_data {
148 struct rb_root prio_trees[CFQ_PRIO_LISTS]; 180 struct rb_root prio_trees[CFQ_PRIO_LISTS];
149 181
150 unsigned int busy_queues; 182 unsigned int busy_queues;
183 unsigned int busy_queues_avg[2];
151 184
152 int rq_in_driver[2]; 185 int rq_in_driver[2];
153 int sync_flight; 186 int sync_flight;
@@ -199,6 +232,16 @@ struct cfq_data {
199 unsigned long last_end_sync_rq; 232 unsigned long last_end_sync_rq;
200}; 233};
201 234
235static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio,
236 enum wl_type_t type,
237 struct cfq_data *cfqd)
238{
239 if (prio == IDLE_WORKLOAD)
240 return &cfqd->service_tree_idle;
241
242 return &cfqd->service_trees[prio][type];
243}
244
202enum cfqq_state_flags { 245enum cfqq_state_flags {
203 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 246 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
204 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 247 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
@@ -243,6 +286,35 @@ CFQ_CFQQ_FNS(coop);
243#define cfq_log(cfqd, fmt, args...) \ 286#define cfq_log(cfqd, fmt, args...) \
244 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) 287 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
245 288
289static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
290{
291 if (cfq_class_idle(cfqq))
292 return IDLE_WORKLOAD;
293 if (cfq_class_rt(cfqq))
294 return RT_WORKLOAD;
295 return BE_WORKLOAD;
296}
297
298
299static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
300{
301 if (!cfq_cfqq_sync(cfqq))
302 return ASYNC_WORKLOAD;
303 if (!cfq_cfqq_idle_window(cfqq))
304 return SYNC_NOIDLE_WORKLOAD;
305 return SYNC_WORKLOAD;
306}
307
308static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
309{
310 if (wl == IDLE_WORKLOAD)
311 return cfqd->service_tree_idle.count;
312
313 return cfqd->service_trees[wl][ASYNC_WORKLOAD].count
314 + cfqd->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
315 + cfqd->service_trees[wl][SYNC_WORKLOAD].count;
316}
317
246static void cfq_dispatch_insert(struct request_queue *, struct request *); 318static void cfq_dispatch_insert(struct request_queue *, struct request *);
247static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, 319static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
248 struct io_context *, gfp_t); 320 struct io_context *, gfp_t);
@@ -315,10 +387,49 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
315 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 387 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
316} 388}
317 389
390/*
391 * get averaged number of queues of RT/BE priority.
392 * average is updated, with a formula that gives more weight to higher numbers,
393 * to quickly follows sudden increases and decrease slowly
394 */
395
396static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt)
397{
398 unsigned min_q, max_q;
399 unsigned mult = cfq_hist_divisor - 1;
400 unsigned round = cfq_hist_divisor / 2;
401 unsigned busy = cfq_busy_queues_wl(rt, cfqd);
402
403 min_q = min(cfqd->busy_queues_avg[rt], busy);
404 max_q = max(cfqd->busy_queues_avg[rt], busy);
405 cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
406 cfq_hist_divisor;
407 return cfqd->busy_queues_avg[rt];
408}
409
318static inline void 410static inline void
319cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 411cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
320{ 412{
321 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 413 unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
414 if (cfqd->cfq_latency) {
415 /* interested queues (we consider only the ones with the same
416 * priority class) */
417 unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq));
418 unsigned sync_slice = cfqd->cfq_slice[1];
419 unsigned expect_latency = sync_slice * iq;
420 if (expect_latency > cfq_target_latency) {
421 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
422 /* scale low_slice according to IO priority
423 * and sync vs async */
424 unsigned low_slice =
425 min(slice, base_low_slice * slice / sync_slice);
426 /* the adapted slice value is scaled to fit all iqs
427 * into the target latency */
428 slice = max(slice * cfq_target_latency / expect_latency,
429 low_slice);
430 }
431 }
432 cfqq->slice_end = jiffies + slice;
322 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); 433 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
323} 434}
324 435
@@ -457,6 +568,7 @@ static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
457 if (root->left == n) 568 if (root->left == n)
458 root->left = NULL; 569 root->left = NULL;
459 rb_erase_init(n, &root->rb); 570 rb_erase_init(n, &root->rb);
571 --root->count;
460} 572}
461 573
462/* 574/*
@@ -497,7 +609,7 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
497} 609}
498 610
499/* 611/*
500 * The cfqd->service_tree holds all pending cfq_queue's that have 612 * The cfqd->service_trees holds all pending cfq_queue's that have
501 * requests waiting to be processed. It is sorted in the order that 613 * requests waiting to be processed. It is sorted in the order that
502 * we will service the queues. 614 * we will service the queues.
503 */ 615 */
@@ -507,11 +619,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
507 struct rb_node **p, *parent; 619 struct rb_node **p, *parent;
508 struct cfq_queue *__cfqq; 620 struct cfq_queue *__cfqq;
509 unsigned long rb_key; 621 unsigned long rb_key;
622 struct cfq_rb_root *service_tree;
510 int left; 623 int left;
511 624
625 service_tree = service_tree_for(cfqq_prio(cfqq), cfqq_type(cfqq), cfqd);
512 if (cfq_class_idle(cfqq)) { 626 if (cfq_class_idle(cfqq)) {
513 rb_key = CFQ_IDLE_DELAY; 627 rb_key = CFQ_IDLE_DELAY;
514 parent = rb_last(&cfqd->service_tree.rb); 628 parent = rb_last(&service_tree->rb);
515 if (parent && parent != &cfqq->rb_node) { 629 if (parent && parent != &cfqq->rb_node) {
516 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 630 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
517 rb_key += __cfqq->rb_key; 631 rb_key += __cfqq->rb_key;
@@ -529,7 +643,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
529 cfqq->slice_resid = 0; 643 cfqq->slice_resid = 0;
530 } else { 644 } else {
531 rb_key = -HZ; 645 rb_key = -HZ;
532 __cfqq = cfq_rb_first(&cfqd->service_tree); 646 __cfqq = cfq_rb_first(service_tree);
533 rb_key += __cfqq ? __cfqq->rb_key : jiffies; 647 rb_key += __cfqq ? __cfqq->rb_key : jiffies;
534 } 648 }
535 649
@@ -537,15 +651,18 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
537 /* 651 /*
538 * same position, nothing more to do 652 * same position, nothing more to do
539 */ 653 */
540 if (rb_key == cfqq->rb_key) 654 if (rb_key == cfqq->rb_key &&
655 cfqq->service_tree == service_tree)
541 return; 656 return;
542 657
543 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 658 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
659 cfqq->service_tree = NULL;
544 } 660 }
545 661
546 left = 1; 662 left = 1;
547 parent = NULL; 663 parent = NULL;
548 p = &cfqd->service_tree.rb.rb_node; 664 cfqq->service_tree = service_tree;
665 p = &service_tree->rb.rb_node;
549 while (*p) { 666 while (*p) {
550 struct rb_node **n; 667 struct rb_node **n;
551 668
@@ -553,35 +670,25 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
553 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 670 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
554 671
555 /* 672 /*
556 * sort RT queues first, we always want to give 673 * sort by key, that represents service time.
557 * preference to them. IDLE queues goes to the back.
558 * after that, sort on the next service time.
559 */ 674 */
560 if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq)) 675 if (time_before(rb_key, __cfqq->rb_key))
561 n = &(*p)->rb_left; 676 n = &(*p)->rb_left;
562 else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq)) 677 else {
563 n = &(*p)->rb_right;
564 else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq))
565 n = &(*p)->rb_left;
566 else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
567 n = &(*p)->rb_right;
568 else if (time_before(rb_key, __cfqq->rb_key))
569 n = &(*p)->rb_left;
570 else
571 n = &(*p)->rb_right; 678 n = &(*p)->rb_right;
572
573 if (n == &(*p)->rb_right)
574 left = 0; 679 left = 0;
680 }
575 681
576 p = n; 682 p = n;
577 } 683 }
578 684
579 if (left) 685 if (left)
580 cfqd->service_tree.left = &cfqq->rb_node; 686 service_tree->left = &cfqq->rb_node;
581 687
582 cfqq->rb_key = rb_key; 688 cfqq->rb_key = rb_key;
583 rb_link_node(&cfqq->rb_node, parent, p); 689 rb_link_node(&cfqq->rb_node, parent, p);
584 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); 690 rb_insert_color(&cfqq->rb_node, &service_tree->rb);
691 service_tree->count++;
585} 692}
586 693
587static struct cfq_queue * 694static struct cfq_queue *
@@ -683,8 +790,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
683 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 790 BUG_ON(!cfq_cfqq_on_rr(cfqq));
684 cfq_clear_cfqq_on_rr(cfqq); 791 cfq_clear_cfqq_on_rr(cfqq);
685 792
686 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 793 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
687 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 794 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
795 cfqq->service_tree = NULL;
796 }
688 if (cfqq->p_root) { 797 if (cfqq->p_root) {
689 rb_erase(&cfqq->p_node, cfqq->p_root); 798 rb_erase(&cfqq->p_node, cfqq->p_root);
690 cfqq->p_root = NULL; 799 cfqq->p_root = NULL;
@@ -945,10 +1054,12 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
945 */ 1054 */
946static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) 1055static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
947{ 1056{
948 if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) 1057 struct cfq_rb_root *service_tree =
949 return NULL; 1058 service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd);
950 1059
951 return cfq_rb_first(&cfqd->service_tree); 1060 if (RB_EMPTY_ROOT(&service_tree->rb))
1061 return NULL;
1062 return cfq_rb_first(service_tree);
952} 1063}
953 1064
954/* 1065/*
@@ -1065,9 +1176,45 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1065 if (CFQQ_SEEKY(cfqq)) 1176 if (CFQQ_SEEKY(cfqq))
1066 return NULL; 1177 return NULL;
1067 1178
1179 /*
1180 * Do not merge queues of different priority classes
1181 */
1182 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
1183 return NULL;
1184
1068 return cfqq; 1185 return cfqq;
1069} 1186}
1070 1187
1188/*
1189 * Determine whether we should enforce idle window for this queue.
1190 */
1191
1192static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1193{
1194 enum wl_prio_t prio = cfqq_prio(cfqq);
1195 struct cfq_rb_root *service_tree = cfqq->service_tree;
1196
1197 /* We never do for idle class queues. */
1198 if (prio == IDLE_WORKLOAD)
1199 return false;
1200
1201 /* We do for queues that were marked with idle window flag. */
1202 if (cfq_cfqq_idle_window(cfqq))
1203 return true;
1204
1205 /*
1206 * Otherwise, we do only if they are the last ones
1207 * in their service tree.
1208 */
1209 if (!service_tree)
1210 service_tree = service_tree_for(prio, cfqq_type(cfqq), cfqd);
1211
1212 if (service_tree->count == 0)
1213 return true;
1214
1215 return (service_tree->count == 1 && cfq_rb_first(service_tree) == cfqq);
1216}
1217
1071static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1218static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1072{ 1219{
1073 struct cfq_queue *cfqq = cfqd->active_queue; 1220 struct cfq_queue *cfqq = cfqd->active_queue;
@@ -1088,7 +1235,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1088 /* 1235 /*
1089 * idle is disabled, either manually or by past process history 1236 * idle is disabled, either manually or by past process history
1090 */ 1237 */
1091 if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq)) 1238 if (!cfqd->cfq_slice_idle || !cfq_should_idle(cfqd, cfqq))
1092 return; 1239 return;
1093 1240
1094 /* 1241 /*
@@ -1115,14 +1262,20 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1115 1262
1116 cfq_mark_cfqq_wait_request(cfqq); 1263 cfq_mark_cfqq_wait_request(cfqq);
1117 1264
1118 /*
1119 * we don't want to idle for seeks, but we do want to allow
1120 * fair distribution of slice time for a process doing back-to-back
1121 * seeks. so allow a little bit of time for him to submit a new rq
1122 */
1123 sl = cfqd->cfq_slice_idle; 1265 sl = cfqd->cfq_slice_idle;
1124 if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) 1266 /* are we servicing noidle tree, and there are more queues?
1267 * non-rotational or NCQ: no idle
1268 * non-NCQ rotational : very small idle, to allow
1269 * fair distribution of slice time for a process doing back-to-back
1270 * seeks.
1271 */
1272 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
1273 service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WORKLOAD, cfqd)
1274 ->count > 0) {
1275 if (blk_queue_nonrot(cfqd->queue) || cfqd->hw_tag)
1276 return;
1125 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 1277 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
1278 }
1126 1279
1127 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 1280 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
1128 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); 1281 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
@@ -1226,6 +1379,106 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
1226 } 1379 }
1227} 1380}
1228 1381
1382static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio,
1383 bool prio_changed)
1384{
1385 struct cfq_queue *queue;
1386 int i;
1387 bool key_valid = false;
1388 unsigned long lowest_key = 0;
1389 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
1390
1391 if (prio_changed) {
1392 /*
1393 * When priorities switched, we prefer starting
1394 * from SYNC_NOIDLE (first choice), or just SYNC
1395 * over ASYNC
1396 */
1397 if (service_tree_for(prio, cur_best, cfqd)->count)
1398 return cur_best;
1399 cur_best = SYNC_WORKLOAD;
1400 if (service_tree_for(prio, cur_best, cfqd)->count)
1401 return cur_best;
1402
1403 return ASYNC_WORKLOAD;
1404 }
1405
1406 for (i = 0; i < 3; ++i) {
1407 /* otherwise, select the one with lowest rb_key */
1408 queue = cfq_rb_first(service_tree_for(prio, i, cfqd));
1409 if (queue &&
1410 (!key_valid || time_before(queue->rb_key, lowest_key))) {
1411 lowest_key = queue->rb_key;
1412 cur_best = i;
1413 key_valid = true;
1414 }
1415 }
1416
1417 return cur_best;
1418}
1419
1420static void choose_service_tree(struct cfq_data *cfqd)
1421{
1422 enum wl_prio_t previous_prio = cfqd->serving_prio;
1423 bool prio_changed;
1424 unsigned slice;
1425 unsigned count;
1426
1427 /* Choose next priority. RT > BE > IDLE */
1428 if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
1429 cfqd->serving_prio = RT_WORKLOAD;
1430 else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
1431 cfqd->serving_prio = BE_WORKLOAD;
1432 else {
1433 cfqd->serving_prio = IDLE_WORKLOAD;
1434 cfqd->workload_expires = jiffies + 1;
1435 return;
1436 }
1437
1438 /*
1439 * For RT and BE, we have to choose also the type
1440 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
1441 * expiration time
1442 */
1443 prio_changed = (cfqd->serving_prio != previous_prio);
1444 count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
1445 ->count;
1446
1447 /*
1448 * If priority didn't change, check workload expiration,
1449 * and that we still have other queues ready
1450 */
1451 if (!prio_changed && count &&
1452 !time_after(jiffies, cfqd->workload_expires))
1453 return;
1454
1455 /* otherwise select new workload type */
1456 cfqd->serving_type =
1457 cfq_choose_wl(cfqd, cfqd->serving_prio, prio_changed);
1458 count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
1459 ->count;
1460
1461 /*
1462 * the workload slice is computed as a fraction of target latency
1463 * proportional to the number of queues in that workload, over
1464 * all the queues in the same priority class
1465 */
1466 slice = cfq_target_latency * count /
1467 max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
1468 cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
1469
1470 if (cfqd->serving_type == ASYNC_WORKLOAD)
1471 /* async workload slice is scaled down according to
1472 * the sync/async slice ratio. */
1473 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
1474 else
1475 /* sync workload slice is at least 2 * cfq_slice_idle */
1476 slice = max(slice, 2 * cfqd->cfq_slice_idle);
1477
1478 slice = max_t(unsigned, slice, CFQ_MIN_TT);
1479 cfqd->workload_expires = jiffies + slice;
1480}
1481
1229/* 1482/*
1230 * Select a queue for service. If we have a current active queue, 1483 * Select a queue for service. If we have a current active queue,
1231 * check whether to continue servicing it, or retrieve and set a new one. 1484 * check whether to continue servicing it, or retrieve and set a new one.
@@ -1270,7 +1523,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1270 * conditions to happen (or time out) before selecting a new queue. 1523 * conditions to happen (or time out) before selecting a new queue.
1271 */ 1524 */
1272 if (timer_pending(&cfqd->idle_slice_timer) || 1525 if (timer_pending(&cfqd->idle_slice_timer) ||
1273 (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) { 1526 (cfqq->dispatched && cfq_should_idle(cfqd, cfqq))) {
1274 cfqq = NULL; 1527 cfqq = NULL;
1275 goto keep_queue; 1528 goto keep_queue;
1276 } 1529 }
@@ -1278,6 +1531,13 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1278expire: 1531expire:
1279 cfq_slice_expired(cfqd, 0); 1532 cfq_slice_expired(cfqd, 0);
1280new_queue: 1533new_queue:
1534 /*
1535 * Current queue expired. Check if we have to switch to a new
1536 * service tree
1537 */
1538 if (!new_cfqq)
1539 choose_service_tree(cfqd);
1540
1281 cfqq = cfq_set_active_queue(cfqd, new_cfqq); 1541 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
1282keep_queue: 1542keep_queue:
1283 return cfqq; 1543 return cfqq;
@@ -1304,8 +1564,14 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
1304{ 1564{
1305 struct cfq_queue *cfqq; 1565 struct cfq_queue *cfqq;
1306 int dispatched = 0; 1566 int dispatched = 0;
1307 1567 int i, j;
1308 while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL) 1568 for (i = 0; i < 2; ++i)
1569 for (j = 0; j < 3; ++j)
1570 while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j]))
1571 != NULL)
1572 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1573
1574 while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL)
1309 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 1575 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1310 1576
1311 cfq_slice_expired(cfqd, 0); 1577 cfq_slice_expired(cfqd, 0);
@@ -1323,7 +1589,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1323 /* 1589 /*
1324 * Drain async requests before we start sync IO 1590 * Drain async requests before we start sync IO
1325 */ 1591 */
1326 if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) 1592 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
1327 return false; 1593 return false;
1328 1594
1329 /* 1595 /*
@@ -2086,13 +2352,10 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2086 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); 2352 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
2087 2353
2088 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 2354 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
2089 (!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq))) 2355 (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)))
2090 enable_idle = 0; 2356 enable_idle = 0;
2091 else if (sample_valid(cic->ttime_samples)) { 2357 else if (sample_valid(cic->ttime_samples)) {
2092 unsigned int slice_idle = cfqd->cfq_slice_idle; 2358 if (cic->ttime_mean > cfqd->cfq_slice_idle)
2093 if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))
2094 slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
2095 if (cic->ttime_mean > slice_idle)
2096 enable_idle = 0; 2359 enable_idle = 0;
2097 else 2360 else
2098 enable_idle = 1; 2361 enable_idle = 1;
@@ -2130,6 +2393,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
2130 if (cfq_class_idle(cfqq)) 2393 if (cfq_class_idle(cfqq))
2131 return true; 2394 return true;
2132 2395
2396 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD
2397 && new_cfqq->service_tree == cfqq->service_tree)
2398 return true;
2399
2133 /* 2400 /*
2134 * if the new request is sync, but the currently running queue is 2401 * if the new request is sync, but the currently running queue is
2135 * not, let the sync request have priority. 2402 * not, let the sync request have priority.
@@ -2243,10 +2510,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
2243 cfq_log_cfqq(cfqd, cfqq, "insert_request"); 2510 cfq_log_cfqq(cfqd, cfqq, "insert_request");
2244 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); 2511 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
2245 2512
2246 cfq_add_rq_rb(rq);
2247
2248 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); 2513 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
2249 list_add_tail(&rq->queuelist, &cfqq->fifo); 2514 list_add_tail(&rq->queuelist, &cfqq->fifo);
2515 cfq_add_rq_rb(rq);
2250 2516
2251 cfq_rq_enqueued(cfqd, cfqq, rq); 2517 cfq_rq_enqueued(cfqd, cfqq, rq);
2252} 2518}
@@ -2645,13 +2911,16 @@ static void cfq_exit_queue(struct elevator_queue *e)
2645static void *cfq_init_queue(struct request_queue *q) 2911static void *cfq_init_queue(struct request_queue *q)
2646{ 2912{
2647 struct cfq_data *cfqd; 2913 struct cfq_data *cfqd;
2648 int i; 2914 int i, j;
2649 2915
2650 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 2916 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
2651 if (!cfqd) 2917 if (!cfqd)
2652 return NULL; 2918 return NULL;
2653 2919
2654 cfqd->service_tree = CFQ_RB_ROOT; 2920 for (i = 0; i < 2; ++i)
2921 for (j = 0; j < 3; ++j)
2922 cfqd->service_trees[i][j] = CFQ_RB_ROOT;
2923 cfqd->service_tree_idle = CFQ_RB_ROOT;
2655 2924
2656 /* 2925 /*
2657 * Not strictly needed (since RB_ROOT just clears the node and we 2926 * Not strictly needed (since RB_ROOT just clears the node and we