diff options
-rw-r--r-- | block/cfq-iosched.c | 223 |
1 files changed, 198 insertions, 25 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 090a4ee75b9d..0d3b70de3d80 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -80,6 +80,14 @@ struct cfq_data { | |||
80 | * rr list of queues with requests and the count of them | 80 | * rr list of queues with requests and the count of them |
81 | */ | 81 | */ |
82 | struct cfq_rb_root service_tree; | 82 | struct cfq_rb_root service_tree; |
83 | |||
84 | /* | ||
85 | * Each priority tree is sorted by next_request position. These | ||
86 | * trees are used when determining if two or more queues are | ||
87 | * interleaving requests (see cfq_close_cooperator). | ||
88 | */ | ||
89 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; | ||
90 | |||
83 | unsigned int busy_queues; | 91 | unsigned int busy_queues; |
84 | /* | 92 | /* |
85 | * Used to track any pending rt requests so we can pre-empt current | 93 | * Used to track any pending rt requests so we can pre-empt current |
@@ -144,6 +152,8 @@ struct cfq_queue { | |||
144 | struct rb_node rb_node; | 152 | struct rb_node rb_node; |
145 | /* service_tree key */ | 153 | /* service_tree key */ |
146 | unsigned long rb_key; | 154 | unsigned long rb_key; |
155 | /* prio tree member */ | ||
156 | struct rb_node p_node; | ||
147 | /* sorted list of pending requests */ | 157 | /* sorted list of pending requests */ |
148 | struct rb_root sort_list; | 158 | struct rb_root sort_list; |
149 | /* if fifo isn't expired, next request to serve */ | 159 | /* if fifo isn't expired, next request to serve */ |
@@ -182,6 +192,7 @@ enum cfqq_state_flags { | |||
182 | CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ | 192 | CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ |
183 | CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ | 193 | CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ |
184 | CFQ_CFQQ_FLAG_sync, /* synchronous queue */ | 194 | CFQ_CFQQ_FLAG_sync, /* synchronous queue */ |
195 | CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */ | ||
185 | }; | 196 | }; |
186 | 197 | ||
187 | #define CFQ_CFQQ_FNS(name) \ | 198 | #define CFQ_CFQQ_FNS(name) \ |
@@ -208,6 +219,7 @@ CFQ_CFQQ_FNS(idle_window); | |||
208 | CFQ_CFQQ_FNS(prio_changed); | 219 | CFQ_CFQQ_FNS(prio_changed); |
209 | CFQ_CFQQ_FNS(slice_new); | 220 | CFQ_CFQQ_FNS(slice_new); |
210 | CFQ_CFQQ_FNS(sync); | 221 | CFQ_CFQQ_FNS(sync); |
222 | CFQ_CFQQ_FNS(coop); | ||
211 | #undef CFQ_CFQQ_FNS | 223 | #undef CFQ_CFQQ_FNS |
212 | 224 | ||
213 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 225 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
@@ -416,13 +428,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) | |||
416 | return NULL; | 428 | return NULL; |
417 | } | 429 | } |
418 | 430 | ||
431 | static void rb_erase_init(struct rb_node *n, struct rb_root *root) | ||
432 | { | ||
433 | rb_erase(n, root); | ||
434 | RB_CLEAR_NODE(n); | ||
435 | } | ||
436 | |||
419 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) | 437 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) |
420 | { | 438 | { |
421 | if (root->left == n) | 439 | if (root->left == n) |
422 | root->left = NULL; | 440 | root->left = NULL; |
423 | 441 | rb_erase_init(n, &root->rb); | |
424 | rb_erase(n, &root->rb); | ||
425 | RB_CLEAR_NODE(n); | ||
426 | } | 442 | } |
427 | 443 | ||
428 | /* | 444 | /* |
@@ -467,8 +483,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | |||
467 | * requests waiting to be processed. It is sorted in the order that | 483 | * requests waiting to be processed. It is sorted in the order that |
468 | * we will service the queues. | 484 | * we will service the queues. |
469 | */ | 485 | */ |
470 | static void cfq_service_tree_add(struct cfq_data *cfqd, | 486 | static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
471 | struct cfq_queue *cfqq, int add_front) | 487 | int add_front) |
472 | { | 488 | { |
473 | struct rb_node **p, *parent; | 489 | struct rb_node **p, *parent; |
474 | struct cfq_queue *__cfqq; | 490 | struct cfq_queue *__cfqq; |
@@ -541,6 +557,63 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, | |||
541 | rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); | 557 | rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); |
542 | } | 558 | } |
543 | 559 | ||
560 | static struct cfq_queue * | ||
561 | cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector, | ||
562 | struct rb_node **ret_parent, struct rb_node ***rb_link) | ||
563 | { | ||
564 | struct rb_root *root = &cfqd->prio_trees[ioprio]; | ||
565 | struct rb_node **p, *parent; | ||
566 | struct cfq_queue *cfqq = NULL; | ||
567 | |||
568 | parent = NULL; | ||
569 | p = &root->rb_node; | ||
570 | while (*p) { | ||
571 | struct rb_node **n; | ||
572 | |||
573 | parent = *p; | ||
574 | cfqq = rb_entry(parent, struct cfq_queue, p_node); | ||
575 | |||
576 | /* | ||
577 | * Sort strictly based on sector. Smallest to the left, | ||
578 | * largest to the right. | ||
579 | */ | ||
580 | if (sector > cfqq->next_rq->sector) | ||
581 | n = &(*p)->rb_right; | ||
582 | else if (sector < cfqq->next_rq->sector) | ||
583 | n = &(*p)->rb_left; | ||
584 | else | ||
585 | break; | ||
586 | p = n; | ||
587 | } | ||
588 | |||
589 | *ret_parent = parent; | ||
590 | if (rb_link) | ||
591 | *rb_link = p; | ||
592 | return NULL; | ||
593 | } | ||
594 | |||
595 | static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
596 | { | ||
597 | struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio]; | ||
598 | struct rb_node **p, *parent; | ||
599 | struct cfq_queue *__cfqq; | ||
600 | |||
601 | if (!RB_EMPTY_NODE(&cfqq->p_node)) | ||
602 | rb_erase_init(&cfqq->p_node, root); | ||
603 | |||
604 | if (cfq_class_idle(cfqq)) | ||
605 | return; | ||
606 | if (!cfqq->next_rq) | ||
607 | return; | ||
608 | |||
609 | __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector, | ||
610 | &parent, &p); | ||
611 | BUG_ON(__cfqq); | ||
612 | |||
613 | rb_link_node(&cfqq->p_node, parent, p); | ||
614 | rb_insert_color(&cfqq->p_node, root); | ||
615 | } | ||
616 | |||
544 | /* | 617 | /* |
545 | * Update cfqq's position in the service tree. | 618 | * Update cfqq's position in the service tree. |
546 | */ | 619 | */ |
@@ -549,8 +622,10 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
549 | /* | 622 | /* |
550 | * Resorting requires the cfqq to be on the RR list already. | 623 | * Resorting requires the cfqq to be on the RR list already. |
551 | */ | 624 | */ |
552 | if (cfq_cfqq_on_rr(cfqq)) | 625 | if (cfq_cfqq_on_rr(cfqq)) { |
553 | cfq_service_tree_add(cfqd, cfqq, 0); | 626 | cfq_service_tree_add(cfqd, cfqq, 0); |
627 | cfq_prio_tree_add(cfqd, cfqq); | ||
628 | } | ||
554 | } | 629 | } |
555 | 630 | ||
556 | /* | 631 | /* |
@@ -581,6 +656,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
581 | 656 | ||
582 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | 657 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) |
583 | cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); | 658 | cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); |
659 | if (!RB_EMPTY_NODE(&cfqq->p_node)) | ||
660 | rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]); | ||
584 | 661 | ||
585 | BUG_ON(!cfqd->busy_queues); | 662 | BUG_ON(!cfqd->busy_queues); |
586 | cfqd->busy_queues--; | 663 | cfqd->busy_queues--; |
@@ -610,7 +687,7 @@ static void cfq_add_rq_rb(struct request *rq) | |||
610 | { | 687 | { |
611 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 688 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
612 | struct cfq_data *cfqd = cfqq->cfqd; | 689 | struct cfq_data *cfqd = cfqq->cfqd; |
613 | struct request *__alias; | 690 | struct request *__alias, *prev; |
614 | 691 | ||
615 | cfqq->queued[rq_is_sync(rq)]++; | 692 | cfqq->queued[rq_is_sync(rq)]++; |
616 | 693 | ||
@@ -627,7 +704,15 @@ static void cfq_add_rq_rb(struct request *rq) | |||
627 | /* | 704 | /* |
628 | * check if this request is a better next-serve candidate | 705 | * check if this request is a better next-serve candidate |
629 | */ | 706 | */ |
707 | prev = cfqq->next_rq; | ||
630 | cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); | 708 | cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); |
709 | |||
710 | /* | ||
711 | * adjust priority tree position, if ->next_rq changes | ||
712 | */ | ||
713 | if (prev != cfqq->next_rq) | ||
714 | cfq_prio_tree_add(cfqd, cfqq); | ||
715 | |||
631 | BUG_ON(!cfqq->next_rq); | 716 | BUG_ON(!cfqq->next_rq); |
632 | } | 717 | } |
633 | 718 | ||
@@ -840,11 +925,15 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | |||
840 | /* | 925 | /* |
841 | * Get and set a new active queue for service. | 926 | * Get and set a new active queue for service. |
842 | */ | 927 | */ |
843 | static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) | 928 | static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, |
929 | struct cfq_queue *cfqq) | ||
844 | { | 930 | { |
845 | struct cfq_queue *cfqq; | 931 | if (!cfqq) { |
932 | cfqq = cfq_get_next_queue(cfqd); | ||
933 | if (cfqq) | ||
934 | cfq_clear_cfqq_coop(cfqq); | ||
935 | } | ||
846 | 936 | ||
847 | cfqq = cfq_get_next_queue(cfqd); | ||
848 | __cfq_set_active_queue(cfqd, cfqq); | 937 | __cfq_set_active_queue(cfqd, cfqq); |
849 | return cfqq; | 938 | return cfqq; |
850 | } | 939 | } |
@@ -868,17 +957,89 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) | |||
868 | return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; | 957 | return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; |
869 | } | 958 | } |
870 | 959 | ||
871 | static int cfq_close_cooperator(struct cfq_data *cfq_data, | 960 | static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, |
872 | struct cfq_queue *cfqq) | 961 | struct cfq_queue *cur_cfqq) |
962 | { | ||
963 | struct rb_root *root = &cfqd->prio_trees[cur_cfqq->ioprio]; | ||
964 | struct rb_node *parent, *node; | ||
965 | struct cfq_queue *__cfqq; | ||
966 | sector_t sector = cfqd->last_position; | ||
967 | |||
968 | if (RB_EMPTY_ROOT(root)) | ||
969 | return NULL; | ||
970 | |||
971 | /* | ||
972 | * First, if we find a request starting at the end of the last | ||
973 | * request, choose it. | ||
974 | */ | ||
975 | __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio, | ||
976 | sector, &parent, NULL); | ||
977 | if (__cfqq) | ||
978 | return __cfqq; | ||
979 | |||
980 | /* | ||
981 | * If the exact sector wasn't found, the parent of the NULL leaf | ||
982 | * will contain the closest sector. | ||
983 | */ | ||
984 | __cfqq = rb_entry(parent, struct cfq_queue, p_node); | ||
985 | if (cfq_rq_close(cfqd, __cfqq->next_rq)) | ||
986 | return __cfqq; | ||
987 | |||
988 | if (__cfqq->next_rq->sector < sector) | ||
989 | node = rb_next(&__cfqq->p_node); | ||
990 | else | ||
991 | node = rb_prev(&__cfqq->p_node); | ||
992 | if (!node) | ||
993 | return NULL; | ||
994 | |||
995 | __cfqq = rb_entry(node, struct cfq_queue, p_node); | ||
996 | if (cfq_rq_close(cfqd, __cfqq->next_rq)) | ||
997 | return __cfqq; | ||
998 | |||
999 | return NULL; | ||
1000 | } | ||
1001 | |||
1002 | /* | ||
1003 | * cfqd - obvious | ||
1004 | * cur_cfqq - passed in so that we don't decide that the current queue is | ||
1005 | * closely cooperating with itself. | ||
1006 | * | ||
1007 | * So, basically we're assuming that that cur_cfqq has dispatched at least | ||
1008 | * one request, and that cfqd->last_position reflects a position on the disk | ||
1009 | * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid | ||
1010 | * assumption. | ||
1011 | */ | ||
1012 | static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, | ||
1013 | struct cfq_queue *cur_cfqq, | ||
1014 | int probe) | ||
873 | { | 1015 | { |
1016 | struct cfq_queue *cfqq; | ||
1017 | |||
1018 | /* | ||
1019 | * A valid cfq_io_context is necessary to compare requests against | ||
1020 | * the seek_mean of the current cfqq. | ||
1021 | */ | ||
1022 | if (!cfqd->active_cic) | ||
1023 | return NULL; | ||
1024 | |||
874 | /* | 1025 | /* |
875 | * We should notice if some of the queues are cooperating, eg | 1026 | * We should notice if some of the queues are cooperating, eg |
876 | * working closely on the same area of the disk. In that case, | 1027 | * working closely on the same area of the disk. In that case, |
877 | * we can group them together and don't waste time idling. | 1028 | * we can group them together and don't waste time idling. |
878 | */ | 1029 | */ |
879 | return 0; | 1030 | cfqq = cfqq_close(cfqd, cur_cfqq); |
1031 | if (!cfqq) | ||
1032 | return NULL; | ||
1033 | |||
1034 | if (cfq_cfqq_coop(cfqq)) | ||
1035 | return NULL; | ||
1036 | |||
1037 | if (!probe) | ||
1038 | cfq_mark_cfqq_coop(cfqq); | ||
1039 | return cfqq; | ||
880 | } | 1040 | } |
881 | 1041 | ||
1042 | |||
882 | #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) | 1043 | #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) |
883 | 1044 | ||
884 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 1045 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
@@ -917,13 +1078,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
917 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) | 1078 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) |
918 | return; | 1079 | return; |
919 | 1080 | ||
920 | /* | ||
921 | * See if this prio level has a good candidate | ||
922 | */ | ||
923 | if (cfq_close_cooperator(cfqd, cfqq) && | ||
924 | (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2)) | ||
925 | return; | ||
926 | |||
927 | cfq_mark_cfqq_wait_request(cfqq); | 1081 | cfq_mark_cfqq_wait_request(cfqq); |
928 | 1082 | ||
929 | /* | 1083 | /* |
@@ -1000,7 +1154,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1000 | */ | 1154 | */ |
1001 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | 1155 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) |
1002 | { | 1156 | { |
1003 | struct cfq_queue *cfqq; | 1157 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
1004 | 1158 | ||
1005 | cfqq = cfqd->active_queue; | 1159 | cfqq = cfqd->active_queue; |
1006 | if (!cfqq) | 1160 | if (!cfqq) |
@@ -1034,6 +1188,16 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
1034 | goto keep_queue; | 1188 | goto keep_queue; |
1035 | 1189 | ||
1036 | /* | 1190 | /* |
1191 | * If another queue has a request waiting within our mean seek | ||
1192 | * distance, let it run. The expire code will check for close | ||
1193 | * cooperators and put the close queue at the front of the service | ||
1194 | * tree. | ||
1195 | */ | ||
1196 | new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0); | ||
1197 | if (new_cfqq) | ||
1198 | goto expire; | ||
1199 | |||
1200 | /* | ||
1037 | * No requests pending. If the active queue still has requests in | 1201 | * No requests pending. If the active queue still has requests in |
1038 | * flight or is idling for a new request, allow either of these | 1202 | * flight or is idling for a new request, allow either of these |
1039 | * conditions to happen (or time out) before selecting a new queue. | 1203 | * conditions to happen (or time out) before selecting a new queue. |
@@ -1047,7 +1211,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
1047 | expire: | 1211 | expire: |
1048 | cfq_slice_expired(cfqd, 0); | 1212 | cfq_slice_expired(cfqd, 0); |
1049 | new_queue: | 1213 | new_queue: |
1050 | cfqq = cfq_set_active_queue(cfqd); | 1214 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); |
1051 | keep_queue: | 1215 | keep_queue: |
1052 | return cfqq; | 1216 | return cfqq; |
1053 | } | 1217 | } |
@@ -1508,6 +1672,7 @@ retry: | |||
1508 | } | 1672 | } |
1509 | 1673 | ||
1510 | RB_CLEAR_NODE(&cfqq->rb_node); | 1674 | RB_CLEAR_NODE(&cfqq->rb_node); |
1675 | RB_CLEAR_NODE(&cfqq->p_node); | ||
1511 | INIT_LIST_HEAD(&cfqq->fifo); | 1676 | INIT_LIST_HEAD(&cfqq->fifo); |
1512 | 1677 | ||
1513 | atomic_set(&cfqq->ref, 0); | 1678 | atomic_set(&cfqq->ref, 0); |
@@ -2000,16 +2165,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
2000 | * or if we want to idle in case it has no pending requests. | 2165 | * or if we want to idle in case it has no pending requests. |
2001 | */ | 2166 | */ |
2002 | if (cfqd->active_queue == cfqq) { | 2167 | if (cfqd->active_queue == cfqq) { |
2168 | const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list); | ||
2169 | |||
2003 | if (cfq_cfqq_slice_new(cfqq)) { | 2170 | if (cfq_cfqq_slice_new(cfqq)) { |
2004 | cfq_set_prio_slice(cfqd, cfqq); | 2171 | cfq_set_prio_slice(cfqd, cfqq); |
2005 | cfq_clear_cfqq_slice_new(cfqq); | 2172 | cfq_clear_cfqq_slice_new(cfqq); |
2006 | } | 2173 | } |
2174 | /* | ||
2175 | * If there are no requests waiting in this queue, and | ||
2176 | * there are other queues ready to issue requests, AND | ||
2177 | * those other queues are issuing requests within our | ||
2178 | * mean seek distance, give them a chance to run instead | ||
2179 | * of idling. | ||
2180 | */ | ||
2007 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) | 2181 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) |
2008 | cfq_slice_expired(cfqd, 1); | 2182 | cfq_slice_expired(cfqd, 1); |
2009 | else if (sync && !rq_noidle(rq) && | 2183 | else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) && |
2010 | RB_EMPTY_ROOT(&cfqq->sort_list)) { | 2184 | sync && !rq_noidle(rq)) |
2011 | cfq_arm_slice_timer(cfqd); | 2185 | cfq_arm_slice_timer(cfqd); |
2012 | } | ||
2013 | } | 2186 | } |
2014 | 2187 | ||
2015 | if (!cfqd->rq_in_driver) | 2188 | if (!cfqd->rq_in_driver) |