diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2009-11-03 15:12:10 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-11-03 15:12:10 -0500 |
commit | 150e6c67f4bf6ab51e62defc41bd19a2eefe5709 (patch) | |
tree | 586bd5450afc4f142d78017389af260e76c19f0b /block/cfq-iosched.c | |
parent | 4f570f995f68ef77aae7e5a441222f59232f2d0e (diff) | |
parent | 5869619cb5b26754574375472fe54a390edf34c7 (diff) |
Merge branch 'cfq-2.6.33' into for-2.6.33
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 373 |
1 files changed, 321 insertions, 52 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 757010d8fb7a..8b5ba184cf91 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -27,6 +27,8 @@ static const int cfq_slice_sync = HZ / 10; | |||
27 | static int cfq_slice_async = HZ / 25; | 27 | static int cfq_slice_async = HZ / 25; |
28 | static const int cfq_slice_async_rq = 2; | 28 | static const int cfq_slice_async_rq = 2; |
29 | static int cfq_slice_idle = HZ / 125; | 29 | static int cfq_slice_idle = HZ / 125; |
30 | static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ | ||
31 | static const int cfq_hist_divisor = 4; | ||
30 | 32 | ||
31 | /* | 33 | /* |
32 | * offset from end of service tree | 34 | * offset from end of service tree |
@@ -73,8 +75,9 @@ static DEFINE_SPINLOCK(ioc_gone_lock); | |||
73 | struct cfq_rb_root { | 75 | struct cfq_rb_root { |
74 | struct rb_root rb; | 76 | struct rb_root rb; |
75 | struct rb_node *left; | 77 | struct rb_node *left; |
78 | unsigned count; | ||
76 | }; | 79 | }; |
77 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } | 80 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, } |
78 | 81 | ||
79 | /* | 82 | /* |
80 | * Per process-grouping structure | 83 | * Per process-grouping structure |
@@ -126,19 +129,48 @@ struct cfq_queue { | |||
126 | 129 | ||
127 | pid_t pid; | 130 | pid_t pid; |
128 | 131 | ||
132 | struct cfq_rb_root *service_tree; | ||
129 | struct cfq_queue *new_cfqq; | 133 | struct cfq_queue *new_cfqq; |
130 | }; | 134 | }; |
131 | 135 | ||
132 | /* | 136 | /* |
137 | * First index in the service_trees. | ||
138 | * IDLE is handled separately, so it has negative index | ||
139 | */ | ||
140 | enum wl_prio_t { | ||
141 | IDLE_WORKLOAD = -1, | ||
142 | BE_WORKLOAD = 0, | ||
143 | RT_WORKLOAD = 1 | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * Second index in the service_trees. | ||
148 | */ | ||
149 | enum wl_type_t { | ||
150 | ASYNC_WORKLOAD = 0, | ||
151 | SYNC_NOIDLE_WORKLOAD = 1, | ||
152 | SYNC_WORKLOAD = 2 | ||
153 | }; | ||
154 | |||
155 | |||
156 | /* | ||
133 | * Per block device queue structure | 157 | * Per block device queue structure |
134 | */ | 158 | */ |
135 | struct cfq_data { | 159 | struct cfq_data { |
136 | struct request_queue *queue; | 160 | struct request_queue *queue; |
137 | 161 | ||
138 | /* | 162 | /* |
139 | * rr list of queues with requests and the count of them | 163 | * rr lists of queues with requests, onle rr for each priority class. |
164 | * Counts are embedded in the cfq_rb_root | ||
165 | */ | ||
166 | struct cfq_rb_root service_trees[2][3]; | ||
167 | struct cfq_rb_root service_tree_idle; | ||
168 | /* | ||
169 | * The priority currently being served | ||
140 | */ | 170 | */ |
141 | struct cfq_rb_root service_tree; | 171 | enum wl_prio_t serving_prio; |
172 | enum wl_type_t serving_type; | ||
173 | unsigned long workload_expires; | ||
142 | 174 | ||
143 | /* | 175 | /* |
144 | * Each priority tree is sorted by next_request position. These | 176 | * Each priority tree is sorted by next_request position. These |
@@ -148,6 +180,7 @@ struct cfq_data { | |||
148 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; | 180 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; |
149 | 181 | ||
150 | unsigned int busy_queues; | 182 | unsigned int busy_queues; |
183 | unsigned int busy_queues_avg[2]; | ||
151 | 184 | ||
152 | int rq_in_driver[2]; | 185 | int rq_in_driver[2]; |
153 | int sync_flight; | 186 | int sync_flight; |
@@ -199,6 +232,16 @@ struct cfq_data { | |||
199 | unsigned long last_end_sync_rq; | 232 | unsigned long last_end_sync_rq; |
200 | }; | 233 | }; |
201 | 234 | ||
235 | static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio, | ||
236 | enum wl_type_t type, | ||
237 | struct cfq_data *cfqd) | ||
238 | { | ||
239 | if (prio == IDLE_WORKLOAD) | ||
240 | return &cfqd->service_tree_idle; | ||
241 | |||
242 | return &cfqd->service_trees[prio][type]; | ||
243 | } | ||
244 | |||
202 | enum cfqq_state_flags { | 245 | enum cfqq_state_flags { |
203 | CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ | 246 | CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ |
204 | CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ | 247 | CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ |
@@ -243,6 +286,35 @@ CFQ_CFQQ_FNS(coop); | |||
243 | #define cfq_log(cfqd, fmt, args...) \ | 286 | #define cfq_log(cfqd, fmt, args...) \ |
244 | blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) | 287 | blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) |
245 | 288 | ||
289 | static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) | ||
290 | { | ||
291 | if (cfq_class_idle(cfqq)) | ||
292 | return IDLE_WORKLOAD; | ||
293 | if (cfq_class_rt(cfqq)) | ||
294 | return RT_WORKLOAD; | ||
295 | return BE_WORKLOAD; | ||
296 | } | ||
297 | |||
298 | |||
299 | static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) | ||
300 | { | ||
301 | if (!cfq_cfqq_sync(cfqq)) | ||
302 | return ASYNC_WORKLOAD; | ||
303 | if (!cfq_cfqq_idle_window(cfqq)) | ||
304 | return SYNC_NOIDLE_WORKLOAD; | ||
305 | return SYNC_WORKLOAD; | ||
306 | } | ||
307 | |||
308 | static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd) | ||
309 | { | ||
310 | if (wl == IDLE_WORKLOAD) | ||
311 | return cfqd->service_tree_idle.count; | ||
312 | |||
313 | return cfqd->service_trees[wl][ASYNC_WORKLOAD].count | ||
314 | + cfqd->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count | ||
315 | + cfqd->service_trees[wl][SYNC_WORKLOAD].count; | ||
316 | } | ||
317 | |||
246 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 318 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
247 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, | 319 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, |
248 | struct io_context *, gfp_t); | 320 | struct io_context *, gfp_t); |
@@ -315,10 +387,49 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
315 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); | 387 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); |
316 | } | 388 | } |
317 | 389 | ||
390 | /* | ||
391 | * get averaged number of queues of RT/BE priority. | ||
392 | * average is updated, with a formula that gives more weight to higher numbers, | ||
393 | * to quickly follows sudden increases and decrease slowly | ||
394 | */ | ||
395 | |||
396 | static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt) | ||
397 | { | ||
398 | unsigned min_q, max_q; | ||
399 | unsigned mult = cfq_hist_divisor - 1; | ||
400 | unsigned round = cfq_hist_divisor / 2; | ||
401 | unsigned busy = cfq_busy_queues_wl(rt, cfqd); | ||
402 | |||
403 | min_q = min(cfqd->busy_queues_avg[rt], busy); | ||
404 | max_q = max(cfqd->busy_queues_avg[rt], busy); | ||
405 | cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) / | ||
406 | cfq_hist_divisor; | ||
407 | return cfqd->busy_queues_avg[rt]; | ||
408 | } | ||
409 | |||
318 | static inline void | 410 | static inline void |
319 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 411 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
320 | { | 412 | { |
321 | cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; | 413 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); |
414 | if (cfqd->cfq_latency) { | ||
415 | /* interested queues (we consider only the ones with the same | ||
416 | * priority class) */ | ||
417 | unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq)); | ||
418 | unsigned sync_slice = cfqd->cfq_slice[1]; | ||
419 | unsigned expect_latency = sync_slice * iq; | ||
420 | if (expect_latency > cfq_target_latency) { | ||
421 | unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; | ||
422 | /* scale low_slice according to IO priority | ||
423 | * and sync vs async */ | ||
424 | unsigned low_slice = | ||
425 | min(slice, base_low_slice * slice / sync_slice); | ||
426 | /* the adapted slice value is scaled to fit all iqs | ||
427 | * into the target latency */ | ||
428 | slice = max(slice * cfq_target_latency / expect_latency, | ||
429 | low_slice); | ||
430 | } | ||
431 | } | ||
432 | cfqq->slice_end = jiffies + slice; | ||
322 | cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); | 433 | cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); |
323 | } | 434 | } |
324 | 435 | ||
@@ -457,6 +568,7 @@ static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) | |||
457 | if (root->left == n) | 568 | if (root->left == n) |
458 | root->left = NULL; | 569 | root->left = NULL; |
459 | rb_erase_init(n, &root->rb); | 570 | rb_erase_init(n, &root->rb); |
571 | --root->count; | ||
460 | } | 572 | } |
461 | 573 | ||
462 | /* | 574 | /* |
@@ -497,7 +609,7 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | |||
497 | } | 609 | } |
498 | 610 | ||
499 | /* | 611 | /* |
500 | * The cfqd->service_tree holds all pending cfq_queue's that have | 612 | * The cfqd->service_trees holds all pending cfq_queue's that have |
501 | * requests waiting to be processed. It is sorted in the order that | 613 | * requests waiting to be processed. It is sorted in the order that |
502 | * we will service the queues. | 614 | * we will service the queues. |
503 | */ | 615 | */ |
@@ -507,11 +619,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
507 | struct rb_node **p, *parent; | 619 | struct rb_node **p, *parent; |
508 | struct cfq_queue *__cfqq; | 620 | struct cfq_queue *__cfqq; |
509 | unsigned long rb_key; | 621 | unsigned long rb_key; |
622 | struct cfq_rb_root *service_tree; | ||
510 | int left; | 623 | int left; |
511 | 624 | ||
625 | service_tree = service_tree_for(cfqq_prio(cfqq), cfqq_type(cfqq), cfqd); | ||
512 | if (cfq_class_idle(cfqq)) { | 626 | if (cfq_class_idle(cfqq)) { |
513 | rb_key = CFQ_IDLE_DELAY; | 627 | rb_key = CFQ_IDLE_DELAY; |
514 | parent = rb_last(&cfqd->service_tree.rb); | 628 | parent = rb_last(&service_tree->rb); |
515 | if (parent && parent != &cfqq->rb_node) { | 629 | if (parent && parent != &cfqq->rb_node) { |
516 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | 630 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); |
517 | rb_key += __cfqq->rb_key; | 631 | rb_key += __cfqq->rb_key; |
@@ -529,7 +643,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
529 | cfqq->slice_resid = 0; | 643 | cfqq->slice_resid = 0; |
530 | } else { | 644 | } else { |
531 | rb_key = -HZ; | 645 | rb_key = -HZ; |
532 | __cfqq = cfq_rb_first(&cfqd->service_tree); | 646 | __cfqq = cfq_rb_first(service_tree); |
533 | rb_key += __cfqq ? __cfqq->rb_key : jiffies; | 647 | rb_key += __cfqq ? __cfqq->rb_key : jiffies; |
534 | } | 648 | } |
535 | 649 | ||
@@ -537,15 +651,18 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
537 | /* | 651 | /* |
538 | * same position, nothing more to do | 652 | * same position, nothing more to do |
539 | */ | 653 | */ |
540 | if (rb_key == cfqq->rb_key) | 654 | if (rb_key == cfqq->rb_key && |
655 | cfqq->service_tree == service_tree) | ||
541 | return; | 656 | return; |
542 | 657 | ||
543 | cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); | 658 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); |
659 | cfqq->service_tree = NULL; | ||
544 | } | 660 | } |
545 | 661 | ||
546 | left = 1; | 662 | left = 1; |
547 | parent = NULL; | 663 | parent = NULL; |
548 | p = &cfqd->service_tree.rb.rb_node; | 664 | cfqq->service_tree = service_tree; |
665 | p = &service_tree->rb.rb_node; | ||
549 | while (*p) { | 666 | while (*p) { |
550 | struct rb_node **n; | 667 | struct rb_node **n; |
551 | 668 | ||
@@ -553,35 +670,25 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
553 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | 670 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); |
554 | 671 | ||
555 | /* | 672 | /* |
556 | * sort RT queues first, we always want to give | 673 | * sort by key, that represents service time. |
557 | * preference to them. IDLE queues goes to the back. | ||
558 | * after that, sort on the next service time. | ||
559 | */ | 674 | */ |
560 | if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq)) | 675 | if (time_before(rb_key, __cfqq->rb_key)) |
561 | n = &(*p)->rb_left; | 676 | n = &(*p)->rb_left; |
562 | else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq)) | 677 | else { |
563 | n = &(*p)->rb_right; | ||
564 | else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq)) | ||
565 | n = &(*p)->rb_left; | ||
566 | else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq)) | ||
567 | n = &(*p)->rb_right; | ||
568 | else if (time_before(rb_key, __cfqq->rb_key)) | ||
569 | n = &(*p)->rb_left; | ||
570 | else | ||
571 | n = &(*p)->rb_right; | 678 | n = &(*p)->rb_right; |
572 | |||
573 | if (n == &(*p)->rb_right) | ||
574 | left = 0; | 679 | left = 0; |
680 | } | ||
575 | 681 | ||
576 | p = n; | 682 | p = n; |
577 | } | 683 | } |
578 | 684 | ||
579 | if (left) | 685 | if (left) |
580 | cfqd->service_tree.left = &cfqq->rb_node; | 686 | service_tree->left = &cfqq->rb_node; |
581 | 687 | ||
582 | cfqq->rb_key = rb_key; | 688 | cfqq->rb_key = rb_key; |
583 | rb_link_node(&cfqq->rb_node, parent, p); | 689 | rb_link_node(&cfqq->rb_node, parent, p); |
584 | rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); | 690 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); |
691 | service_tree->count++; | ||
585 | } | 692 | } |
586 | 693 | ||
587 | static struct cfq_queue * | 694 | static struct cfq_queue * |
@@ -683,8 +790,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
683 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | 790 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); |
684 | cfq_clear_cfqq_on_rr(cfqq); | 791 | cfq_clear_cfqq_on_rr(cfqq); |
685 | 792 | ||
686 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | 793 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) { |
687 | cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); | 794 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); |
795 | cfqq->service_tree = NULL; | ||
796 | } | ||
688 | if (cfqq->p_root) { | 797 | if (cfqq->p_root) { |
689 | rb_erase(&cfqq->p_node, cfqq->p_root); | 798 | rb_erase(&cfqq->p_node, cfqq->p_root); |
690 | cfqq->p_root = NULL; | 799 | cfqq->p_root = NULL; |
@@ -945,10 +1054,12 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) | |||
945 | */ | 1054 | */ |
946 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | 1055 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) |
947 | { | 1056 | { |
948 | if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) | 1057 | struct cfq_rb_root *service_tree = |
949 | return NULL; | 1058 | service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd); |
950 | 1059 | ||
951 | return cfq_rb_first(&cfqd->service_tree); | 1060 | if (RB_EMPTY_ROOT(&service_tree->rb)) |
1061 | return NULL; | ||
1062 | return cfq_rb_first(service_tree); | ||
952 | } | 1063 | } |
953 | 1064 | ||
954 | /* | 1065 | /* |
@@ -1065,9 +1176,45 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, | |||
1065 | if (CFQQ_SEEKY(cfqq)) | 1176 | if (CFQQ_SEEKY(cfqq)) |
1066 | return NULL; | 1177 | return NULL; |
1067 | 1178 | ||
1179 | /* | ||
1180 | * Do not merge queues of different priority classes | ||
1181 | */ | ||
1182 | if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq)) | ||
1183 | return NULL; | ||
1184 | |||
1068 | return cfqq; | 1185 | return cfqq; |
1069 | } | 1186 | } |
1070 | 1187 | ||
1188 | /* | ||
1189 | * Determine whether we should enforce idle window for this queue. | ||
1190 | */ | ||
1191 | |||
1192 | static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
1193 | { | ||
1194 | enum wl_prio_t prio = cfqq_prio(cfqq); | ||
1195 | struct cfq_rb_root *service_tree = cfqq->service_tree; | ||
1196 | |||
1197 | /* We never do for idle class queues. */ | ||
1198 | if (prio == IDLE_WORKLOAD) | ||
1199 | return false; | ||
1200 | |||
1201 | /* We do for queues that were marked with idle window flag. */ | ||
1202 | if (cfq_cfqq_idle_window(cfqq)) | ||
1203 | return true; | ||
1204 | |||
1205 | /* | ||
1206 | * Otherwise, we do only if they are the last ones | ||
1207 | * in their service tree. | ||
1208 | */ | ||
1209 | if (!service_tree) | ||
1210 | service_tree = service_tree_for(prio, cfqq_type(cfqq), cfqd); | ||
1211 | |||
1212 | if (service_tree->count == 0) | ||
1213 | return true; | ||
1214 | |||
1215 | return (service_tree->count == 1 && cfq_rb_first(service_tree) == cfqq); | ||
1216 | } | ||
1217 | |||
1071 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 1218 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
1072 | { | 1219 | { |
1073 | struct cfq_queue *cfqq = cfqd->active_queue; | 1220 | struct cfq_queue *cfqq = cfqd->active_queue; |
@@ -1088,7 +1235,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
1088 | /* | 1235 | /* |
1089 | * idle is disabled, either manually or by past process history | 1236 | * idle is disabled, either manually or by past process history |
1090 | */ | 1237 | */ |
1091 | if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq)) | 1238 | if (!cfqd->cfq_slice_idle || !cfq_should_idle(cfqd, cfqq)) |
1092 | return; | 1239 | return; |
1093 | 1240 | ||
1094 | /* | 1241 | /* |
@@ -1115,14 +1262,20 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
1115 | 1262 | ||
1116 | cfq_mark_cfqq_wait_request(cfqq); | 1263 | cfq_mark_cfqq_wait_request(cfqq); |
1117 | 1264 | ||
1118 | /* | ||
1119 | * we don't want to idle for seeks, but we do want to allow | ||
1120 | * fair distribution of slice time for a process doing back-to-back | ||
1121 | * seeks. so allow a little bit of time for him to submit a new rq | ||
1122 | */ | ||
1123 | sl = cfqd->cfq_slice_idle; | 1265 | sl = cfqd->cfq_slice_idle; |
1124 | if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) | 1266 | /* are we servicing noidle tree, and there are more queues? |
1267 | * non-rotational or NCQ: no idle | ||
1268 | * non-NCQ rotational : very small idle, to allow | ||
1269 | * fair distribution of slice time for a process doing back-to-back | ||
1270 | * seeks. | ||
1271 | */ | ||
1272 | if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && | ||
1273 | service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WORKLOAD, cfqd) | ||
1274 | ->count > 0) { | ||
1275 | if (blk_queue_nonrot(cfqd->queue) || cfqd->hw_tag) | ||
1276 | return; | ||
1125 | sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); | 1277 | sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); |
1278 | } | ||
1126 | 1279 | ||
1127 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 1280 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); |
1128 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); | 1281 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); |
@@ -1226,6 +1379,106 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) | |||
1226 | } | 1379 | } |
1227 | } | 1380 | } |
1228 | 1381 | ||
1382 | static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio, | ||
1383 | bool prio_changed) | ||
1384 | { | ||
1385 | struct cfq_queue *queue; | ||
1386 | int i; | ||
1387 | bool key_valid = false; | ||
1388 | unsigned long lowest_key = 0; | ||
1389 | enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; | ||
1390 | |||
1391 | if (prio_changed) { | ||
1392 | /* | ||
1393 | * When priorities switched, we prefer starting | ||
1394 | * from SYNC_NOIDLE (first choice), or just SYNC | ||
1395 | * over ASYNC | ||
1396 | */ | ||
1397 | if (service_tree_for(prio, cur_best, cfqd)->count) | ||
1398 | return cur_best; | ||
1399 | cur_best = SYNC_WORKLOAD; | ||
1400 | if (service_tree_for(prio, cur_best, cfqd)->count) | ||
1401 | return cur_best; | ||
1402 | |||
1403 | return ASYNC_WORKLOAD; | ||
1404 | } | ||
1405 | |||
1406 | for (i = 0; i < 3; ++i) { | ||
1407 | /* otherwise, select the one with lowest rb_key */ | ||
1408 | queue = cfq_rb_first(service_tree_for(prio, i, cfqd)); | ||
1409 | if (queue && | ||
1410 | (!key_valid || time_before(queue->rb_key, lowest_key))) { | ||
1411 | lowest_key = queue->rb_key; | ||
1412 | cur_best = i; | ||
1413 | key_valid = true; | ||
1414 | } | ||
1415 | } | ||
1416 | |||
1417 | return cur_best; | ||
1418 | } | ||
1419 | |||
1420 | static void choose_service_tree(struct cfq_data *cfqd) | ||
1421 | { | ||
1422 | enum wl_prio_t previous_prio = cfqd->serving_prio; | ||
1423 | bool prio_changed; | ||
1424 | unsigned slice; | ||
1425 | unsigned count; | ||
1426 | |||
1427 | /* Choose next priority. RT > BE > IDLE */ | ||
1428 | if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd)) | ||
1429 | cfqd->serving_prio = RT_WORKLOAD; | ||
1430 | else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd)) | ||
1431 | cfqd->serving_prio = BE_WORKLOAD; | ||
1432 | else { | ||
1433 | cfqd->serving_prio = IDLE_WORKLOAD; | ||
1434 | cfqd->workload_expires = jiffies + 1; | ||
1435 | return; | ||
1436 | } | ||
1437 | |||
1438 | /* | ||
1439 | * For RT and BE, we have to choose also the type | ||
1440 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload | ||
1441 | * expiration time | ||
1442 | */ | ||
1443 | prio_changed = (cfqd->serving_prio != previous_prio); | ||
1444 | count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd) | ||
1445 | ->count; | ||
1446 | |||
1447 | /* | ||
1448 | * If priority didn't change, check workload expiration, | ||
1449 | * and that we still have other queues ready | ||
1450 | */ | ||
1451 | if (!prio_changed && count && | ||
1452 | !time_after(jiffies, cfqd->workload_expires)) | ||
1453 | return; | ||
1454 | |||
1455 | /* otherwise select new workload type */ | ||
1456 | cfqd->serving_type = | ||
1457 | cfq_choose_wl(cfqd, cfqd->serving_prio, prio_changed); | ||
1458 | count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd) | ||
1459 | ->count; | ||
1460 | |||
1461 | /* | ||
1462 | * the workload slice is computed as a fraction of target latency | ||
1463 | * proportional to the number of queues in that workload, over | ||
1464 | * all the queues in the same priority class | ||
1465 | */ | ||
1466 | slice = cfq_target_latency * count / | ||
1467 | max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio], | ||
1468 | cfq_busy_queues_wl(cfqd->serving_prio, cfqd)); | ||
1469 | |||
1470 | if (cfqd->serving_type == ASYNC_WORKLOAD) | ||
1471 | /* async workload slice is scaled down according to | ||
1472 | * the sync/async slice ratio. */ | ||
1473 | slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; | ||
1474 | else | ||
1475 | /* sync workload slice is at least 2 * cfq_slice_idle */ | ||
1476 | slice = max(slice, 2 * cfqd->cfq_slice_idle); | ||
1477 | |||
1478 | slice = max_t(unsigned, slice, CFQ_MIN_TT); | ||
1479 | cfqd->workload_expires = jiffies + slice; | ||
1480 | } | ||
1481 | |||
1229 | /* | 1482 | /* |
1230 | * Select a queue for service. If we have a current active queue, | 1483 | * Select a queue for service. If we have a current active queue, |
1231 | * check whether to continue servicing it, or retrieve and set a new one. | 1484 | * check whether to continue servicing it, or retrieve and set a new one. |
@@ -1270,7 +1523,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
1270 | * conditions to happen (or time out) before selecting a new queue. | 1523 | * conditions to happen (or time out) before selecting a new queue. |
1271 | */ | 1524 | */ |
1272 | if (timer_pending(&cfqd->idle_slice_timer) || | 1525 | if (timer_pending(&cfqd->idle_slice_timer) || |
1273 | (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) { | 1526 | (cfqq->dispatched && cfq_should_idle(cfqd, cfqq))) { |
1274 | cfqq = NULL; | 1527 | cfqq = NULL; |
1275 | goto keep_queue; | 1528 | goto keep_queue; |
1276 | } | 1529 | } |
@@ -1278,6 +1531,13 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
1278 | expire: | 1531 | expire: |
1279 | cfq_slice_expired(cfqd, 0); | 1532 | cfq_slice_expired(cfqd, 0); |
1280 | new_queue: | 1533 | new_queue: |
1534 | /* | ||
1535 | * Current queue expired. Check if we have to switch to a new | ||
1536 | * service tree | ||
1537 | */ | ||
1538 | if (!new_cfqq) | ||
1539 | choose_service_tree(cfqd); | ||
1540 | |||
1281 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); | 1541 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); |
1282 | keep_queue: | 1542 | keep_queue: |
1283 | return cfqq; | 1543 | return cfqq; |
@@ -1304,8 +1564,14 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) | |||
1304 | { | 1564 | { |
1305 | struct cfq_queue *cfqq; | 1565 | struct cfq_queue *cfqq; |
1306 | int dispatched = 0; | 1566 | int dispatched = 0; |
1307 | 1567 | int i, j; | |
1308 | while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL) | 1568 | for (i = 0; i < 2; ++i) |
1569 | for (j = 0; j < 3; ++j) | ||
1570 | while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j])) | ||
1571 | != NULL) | ||
1572 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | ||
1573 | |||
1574 | while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL) | ||
1309 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 1575 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
1310 | 1576 | ||
1311 | cfq_slice_expired(cfqd, 0); | 1577 | cfq_slice_expired(cfqd, 0); |
@@ -1323,7 +1589,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1323 | /* | 1589 | /* |
1324 | * Drain async requests before we start sync IO | 1590 | * Drain async requests before we start sync IO |
1325 | */ | 1591 | */ |
1326 | if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) | 1592 | if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) |
1327 | return false; | 1593 | return false; |
1328 | 1594 | ||
1329 | /* | 1595 | /* |
@@ -2086,13 +2352,10 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2086 | enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); | 2352 | enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); |
2087 | 2353 | ||
2088 | if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || | 2354 | if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || |
2089 | (!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq))) | 2355 | (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))) |
2090 | enable_idle = 0; | 2356 | enable_idle = 0; |
2091 | else if (sample_valid(cic->ttime_samples)) { | 2357 | else if (sample_valid(cic->ttime_samples)) { |
2092 | unsigned int slice_idle = cfqd->cfq_slice_idle; | 2358 | if (cic->ttime_mean > cfqd->cfq_slice_idle) |
2093 | if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) | ||
2094 | slice_idle = msecs_to_jiffies(CFQ_MIN_TT); | ||
2095 | if (cic->ttime_mean > slice_idle) | ||
2096 | enable_idle = 0; | 2359 | enable_idle = 0; |
2097 | else | 2360 | else |
2098 | enable_idle = 1; | 2361 | enable_idle = 1; |
@@ -2130,6 +2393,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
2130 | if (cfq_class_idle(cfqq)) | 2393 | if (cfq_class_idle(cfqq)) |
2131 | return true; | 2394 | return true; |
2132 | 2395 | ||
2396 | if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD | ||
2397 | && new_cfqq->service_tree == cfqq->service_tree) | ||
2398 | return true; | ||
2399 | |||
2133 | /* | 2400 | /* |
2134 | * if the new request is sync, but the currently running queue is | 2401 | * if the new request is sync, but the currently running queue is |
2135 | * not, let the sync request have priority. | 2402 | * not, let the sync request have priority. |
@@ -2243,10 +2510,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
2243 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); | 2510 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); |
2244 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); | 2511 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); |
2245 | 2512 | ||
2246 | cfq_add_rq_rb(rq); | ||
2247 | |||
2248 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); | 2513 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); |
2249 | list_add_tail(&rq->queuelist, &cfqq->fifo); | 2514 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
2515 | cfq_add_rq_rb(rq); | ||
2250 | 2516 | ||
2251 | cfq_rq_enqueued(cfqd, cfqq, rq); | 2517 | cfq_rq_enqueued(cfqd, cfqq, rq); |
2252 | } | 2518 | } |
@@ -2645,13 +2911,16 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
2645 | static void *cfq_init_queue(struct request_queue *q) | 2911 | static void *cfq_init_queue(struct request_queue *q) |
2646 | { | 2912 | { |
2647 | struct cfq_data *cfqd; | 2913 | struct cfq_data *cfqd; |
2648 | int i; | 2914 | int i, j; |
2649 | 2915 | ||
2650 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 2916 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
2651 | if (!cfqd) | 2917 | if (!cfqd) |
2652 | return NULL; | 2918 | return NULL; |
2653 | 2919 | ||
2654 | cfqd->service_tree = CFQ_RB_ROOT; | 2920 | for (i = 0; i < 2; ++i) |
2921 | for (j = 0; j < 3; ++j) | ||
2922 | cfqd->service_trees[i][j] = CFQ_RB_ROOT; | ||
2923 | cfqd->service_tree_idle = CFQ_RB_ROOT; | ||
2655 | 2924 | ||
2656 | /* | 2925 | /* |
2657 | * Not strictly needed (since RB_ROOT just clears the node and we | 2926 | * Not strictly needed (since RB_ROOT just clears the node and we |