aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2009-04-15 06:15:11 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-04-15 06:15:11 -0400
commita36e71f996e25d6213f57951f7ae1874086ec57e (patch)
tree1673eeb55b4d84a3d38dda9009ad7ac6f31c5a89 /block
parent9481ffdc61738a91baf0f8b7fb20922768ae1b8e (diff)
cfq-iosched: add close cooperator code
If we have processes that are working in close proximity to each other on disk, we don't want to idle wait. Instead allow the close process to issue a request, getting better aggregate bandwidth. The anticipatory scheduler has similar checks, noop and deadline do not need it since they don't care about process <-> io mappings. The code for CFQ is a little more involved though, since we split request queues into per-process contexts. This fixes a performance problem with eg dump(8), since it uses several processes in some silly attempt to speed IO up. Even if dump(8) isn't really a valid case (it should be fixed by using CLONE_IO), there are other cases where we see close processes and where idling ends up hurting performance. Credit goes to Jeff Moyer <jmoyer@redhat.com> for writing the initial implementation. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r--block/cfq-iosched.c223
1 files changed, 198 insertions, 25 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 090a4ee75b9..0d3b70de3d8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -80,6 +80,14 @@ struct cfq_data {
80 * rr list of queues with requests and the count of them 80 * rr list of queues with requests and the count of them
81 */ 81 */
82 struct cfq_rb_root service_tree; 82 struct cfq_rb_root service_tree;
83
84 /*
85 * Each priority tree is sorted by next_request position. These
86 * trees are used when determining if two or more queues are
87 * interleaving requests (see cfq_close_cooperator).
88 */
89 struct rb_root prio_trees[CFQ_PRIO_LISTS];
90
83 unsigned int busy_queues; 91 unsigned int busy_queues;
84 /* 92 /*
85 * Used to track any pending rt requests so we can pre-empt current 93 * Used to track any pending rt requests so we can pre-empt current
@@ -144,6 +152,8 @@ struct cfq_queue {
144 struct rb_node rb_node; 152 struct rb_node rb_node;
145 /* service_tree key */ 153 /* service_tree key */
146 unsigned long rb_key; 154 unsigned long rb_key;
155 /* prio tree member */
156 struct rb_node p_node;
147 /* sorted list of pending requests */ 157 /* sorted list of pending requests */
148 struct rb_root sort_list; 158 struct rb_root sort_list;
149 /* if fifo isn't expired, next request to serve */ 159 /* if fifo isn't expired, next request to serve */
@@ -182,6 +192,7 @@ enum cfqq_state_flags {
182 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 192 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
183 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 193 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
184 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 194 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
195 CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */
185}; 196};
186 197
187#define CFQ_CFQQ_FNS(name) \ 198#define CFQ_CFQQ_FNS(name) \
@@ -208,6 +219,7 @@ CFQ_CFQQ_FNS(idle_window);
208CFQ_CFQQ_FNS(prio_changed); 219CFQ_CFQQ_FNS(prio_changed);
209CFQ_CFQQ_FNS(slice_new); 220CFQ_CFQQ_FNS(slice_new);
210CFQ_CFQQ_FNS(sync); 221CFQ_CFQQ_FNS(sync);
222CFQ_CFQQ_FNS(coop);
211#undef CFQ_CFQQ_FNS 223#undef CFQ_CFQQ_FNS
212 224
213#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 225#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
@@ -416,13 +428,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
416 return NULL; 428 return NULL;
417} 429}
418 430
431static void rb_erase_init(struct rb_node *n, struct rb_root *root)
432{
433 rb_erase(n, root);
434 RB_CLEAR_NODE(n);
435}
436
419static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) 437static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
420{ 438{
421 if (root->left == n) 439 if (root->left == n)
422 root->left = NULL; 440 root->left = NULL;
423 441 rb_erase_init(n, &root->rb);
424 rb_erase(n, &root->rb);
425 RB_CLEAR_NODE(n);
426} 442}
427 443
428/* 444/*
@@ -467,8 +483,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
467 * requests waiting to be processed. It is sorted in the order that 483 * requests waiting to be processed. It is sorted in the order that
468 * we will service the queues. 484 * we will service the queues.
469 */ 485 */
470static void cfq_service_tree_add(struct cfq_data *cfqd, 486static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
471 struct cfq_queue *cfqq, int add_front) 487 int add_front)
472{ 488{
473 struct rb_node **p, *parent; 489 struct rb_node **p, *parent;
474 struct cfq_queue *__cfqq; 490 struct cfq_queue *__cfqq;
@@ -541,6 +557,63 @@ static void cfq_service_tree_add(struct cfq_data *cfqd,
541 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); 557 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
542} 558}
543 559
560static struct cfq_queue *
561cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector,
562 struct rb_node **ret_parent, struct rb_node ***rb_link)
563{
564 struct rb_root *root = &cfqd->prio_trees[ioprio];
565 struct rb_node **p, *parent;
566 struct cfq_queue *cfqq = NULL;
567
568 parent = NULL;
569 p = &root->rb_node;
570 while (*p) {
571 struct rb_node **n;
572
573 parent = *p;
574 cfqq = rb_entry(parent, struct cfq_queue, p_node);
575
576 /*
577 * Sort strictly based on sector. Smallest to the left,
578 * largest to the right.
579 */
580 if (sector > cfqq->next_rq->sector)
581 n = &(*p)->rb_right;
582 else if (sector < cfqq->next_rq->sector)
583 n = &(*p)->rb_left;
584 else
585 break;
586 p = n;
587 }
588
589 *ret_parent = parent;
590 if (rb_link)
591 *rb_link = p;
592 return NULL;
593}
594
595static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
596{
597 struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio];
598 struct rb_node **p, *parent;
599 struct cfq_queue *__cfqq;
600
601 if (!RB_EMPTY_NODE(&cfqq->p_node))
602 rb_erase_init(&cfqq->p_node, root);
603
604 if (cfq_class_idle(cfqq))
605 return;
606 if (!cfqq->next_rq)
607 return;
608
609 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector,
610 &parent, &p);
611 BUG_ON(__cfqq);
612
613 rb_link_node(&cfqq->p_node, parent, p);
614 rb_insert_color(&cfqq->p_node, root);
615}
616
544/* 617/*
545 * Update cfqq's position in the service tree. 618 * Update cfqq's position in the service tree.
546 */ 619 */
@@ -549,8 +622,10 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
549 /* 622 /*
550 * Resorting requires the cfqq to be on the RR list already. 623 * Resorting requires the cfqq to be on the RR list already.
551 */ 624 */
552 if (cfq_cfqq_on_rr(cfqq)) 625 if (cfq_cfqq_on_rr(cfqq)) {
553 cfq_service_tree_add(cfqd, cfqq, 0); 626 cfq_service_tree_add(cfqd, cfqq, 0);
627 cfq_prio_tree_add(cfqd, cfqq);
628 }
554} 629}
555 630
556/* 631/*
@@ -581,6 +656,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
581 656
582 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 657 if (!RB_EMPTY_NODE(&cfqq->rb_node))
583 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 658 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
659 if (!RB_EMPTY_NODE(&cfqq->p_node))
660 rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]);
584 661
585 BUG_ON(!cfqd->busy_queues); 662 BUG_ON(!cfqd->busy_queues);
586 cfqd->busy_queues--; 663 cfqd->busy_queues--;
@@ -610,7 +687,7 @@ static void cfq_add_rq_rb(struct request *rq)
610{ 687{
611 struct cfq_queue *cfqq = RQ_CFQQ(rq); 688 struct cfq_queue *cfqq = RQ_CFQQ(rq);
612 struct cfq_data *cfqd = cfqq->cfqd; 689 struct cfq_data *cfqd = cfqq->cfqd;
613 struct request *__alias; 690 struct request *__alias, *prev;
614 691
615 cfqq->queued[rq_is_sync(rq)]++; 692 cfqq->queued[rq_is_sync(rq)]++;
616 693
@@ -627,7 +704,15 @@ static void cfq_add_rq_rb(struct request *rq)
627 /* 704 /*
628 * check if this request is a better next-serve candidate 705 * check if this request is a better next-serve candidate
629 */ 706 */
707 prev = cfqq->next_rq;
630 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); 708 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
709
710 /*
711 * adjust priority tree position, if ->next_rq changes
712 */
713 if (prev != cfqq->next_rq)
714 cfq_prio_tree_add(cfqd, cfqq);
715
631 BUG_ON(!cfqq->next_rq); 716 BUG_ON(!cfqq->next_rq);
632} 717}
633 718
@@ -840,11 +925,15 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
840/* 925/*
841 * Get and set a new active queue for service. 926 * Get and set a new active queue for service.
842 */ 927 */
843static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 928static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
929 struct cfq_queue *cfqq)
844{ 930{
845 struct cfq_queue *cfqq; 931 if (!cfqq) {
932 cfqq = cfq_get_next_queue(cfqd);
933 if (cfqq)
934 cfq_clear_cfqq_coop(cfqq);
935 }
846 936
847 cfqq = cfq_get_next_queue(cfqd);
848 __cfq_set_active_queue(cfqd, cfqq); 937 __cfq_set_active_queue(cfqd, cfqq);
849 return cfqq; 938 return cfqq;
850} 939}
@@ -868,17 +957,89 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
868 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; 957 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
869} 958}
870 959
871static int cfq_close_cooperator(struct cfq_data *cfq_data, 960static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
872 struct cfq_queue *cfqq) 961 struct cfq_queue *cur_cfqq)
962{
963 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->ioprio];
964 struct rb_node *parent, *node;
965 struct cfq_queue *__cfqq;
966 sector_t sector = cfqd->last_position;
967
968 if (RB_EMPTY_ROOT(root))
969 return NULL;
970
971 /*
972 * First, if we find a request starting at the end of the last
973 * request, choose it.
974 */
975 __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio,
976 sector, &parent, NULL);
977 if (__cfqq)
978 return __cfqq;
979
980 /*
981 * If the exact sector wasn't found, the parent of the NULL leaf
982 * will contain the closest sector.
983 */
984 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
985 if (cfq_rq_close(cfqd, __cfqq->next_rq))
986 return __cfqq;
987
988 if (__cfqq->next_rq->sector < sector)
989 node = rb_next(&__cfqq->p_node);
990 else
991 node = rb_prev(&__cfqq->p_node);
992 if (!node)
993 return NULL;
994
995 __cfqq = rb_entry(node, struct cfq_queue, p_node);
996 if (cfq_rq_close(cfqd, __cfqq->next_rq))
997 return __cfqq;
998
999 return NULL;
1000}
1001
1002/*
1003 * cfqd - obvious
1004 * cur_cfqq - passed in so that we don't decide that the current queue is
1005 * closely cooperating with itself.
1006 *
1007 * So, basically we're assuming that that cur_cfqq has dispatched at least
1008 * one request, and that cfqd->last_position reflects a position on the disk
1009 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
1010 * assumption.
1011 */
1012static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1013 struct cfq_queue *cur_cfqq,
1014 int probe)
873{ 1015{
1016 struct cfq_queue *cfqq;
1017
1018 /*
1019 * A valid cfq_io_context is necessary to compare requests against
1020 * the seek_mean of the current cfqq.
1021 */
1022 if (!cfqd->active_cic)
1023 return NULL;
1024
874 /* 1025 /*
875 * We should notice if some of the queues are cooperating, eg 1026 * We should notice if some of the queues are cooperating, eg
876 * working closely on the same area of the disk. In that case, 1027 * working closely on the same area of the disk. In that case,
877 * we can group them together and don't waste time idling. 1028 * we can group them together and don't waste time idling.
878 */ 1029 */
879 return 0; 1030 cfqq = cfqq_close(cfqd, cur_cfqq);
1031 if (!cfqq)
1032 return NULL;
1033
1034 if (cfq_cfqq_coop(cfqq))
1035 return NULL;
1036
1037 if (!probe)
1038 cfq_mark_cfqq_coop(cfqq);
1039 return cfqq;
880} 1040}
881 1041
1042
882#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) 1043#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
883 1044
884static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1045static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -917,13 +1078,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
917 if (!cic || !atomic_read(&cic->ioc->nr_tasks)) 1078 if (!cic || !atomic_read(&cic->ioc->nr_tasks))
918 return; 1079 return;
919 1080
920 /*
921 * See if this prio level has a good candidate
922 */
923 if (cfq_close_cooperator(cfqd, cfqq) &&
924 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
925 return;
926
927 cfq_mark_cfqq_wait_request(cfqq); 1081 cfq_mark_cfqq_wait_request(cfqq);
928 1082
929 /* 1083 /*
@@ -1000,7 +1154,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1000 */ 1154 */
1001static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 1155static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1002{ 1156{
1003 struct cfq_queue *cfqq; 1157 struct cfq_queue *cfqq, *new_cfqq = NULL;
1004 1158
1005 cfqq = cfqd->active_queue; 1159 cfqq = cfqd->active_queue;
1006 if (!cfqq) 1160 if (!cfqq)
@@ -1034,6 +1188,16 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1034 goto keep_queue; 1188 goto keep_queue;
1035 1189
1036 /* 1190 /*
1191 * If another queue has a request waiting within our mean seek
1192 * distance, let it run. The expire code will check for close
1193 * cooperators and put the close queue at the front of the service
1194 * tree.
1195 */
1196 new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0);
1197 if (new_cfqq)
1198 goto expire;
1199
1200 /*
1037 * No requests pending. If the active queue still has requests in 1201 * No requests pending. If the active queue still has requests in
1038 * flight or is idling for a new request, allow either of these 1202 * flight or is idling for a new request, allow either of these
1039 * conditions to happen (or time out) before selecting a new queue. 1203 * conditions to happen (or time out) before selecting a new queue.
@@ -1047,7 +1211,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1047expire: 1211expire:
1048 cfq_slice_expired(cfqd, 0); 1212 cfq_slice_expired(cfqd, 0);
1049new_queue: 1213new_queue:
1050 cfqq = cfq_set_active_queue(cfqd); 1214 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
1051keep_queue: 1215keep_queue:
1052 return cfqq; 1216 return cfqq;
1053} 1217}
@@ -1508,6 +1672,7 @@ retry:
1508 } 1672 }
1509 1673
1510 RB_CLEAR_NODE(&cfqq->rb_node); 1674 RB_CLEAR_NODE(&cfqq->rb_node);
1675 RB_CLEAR_NODE(&cfqq->p_node);
1511 INIT_LIST_HEAD(&cfqq->fifo); 1676 INIT_LIST_HEAD(&cfqq->fifo);
1512 1677
1513 atomic_set(&cfqq->ref, 0); 1678 atomic_set(&cfqq->ref, 0);
@@ -2000,16 +2165,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
2000 * or if we want to idle in case it has no pending requests. 2165 * or if we want to idle in case it has no pending requests.
2001 */ 2166 */
2002 if (cfqd->active_queue == cfqq) { 2167 if (cfqd->active_queue == cfqq) {
2168 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
2169
2003 if (cfq_cfqq_slice_new(cfqq)) { 2170 if (cfq_cfqq_slice_new(cfqq)) {
2004 cfq_set_prio_slice(cfqd, cfqq); 2171 cfq_set_prio_slice(cfqd, cfqq);
2005 cfq_clear_cfqq_slice_new(cfqq); 2172 cfq_clear_cfqq_slice_new(cfqq);
2006 } 2173 }
2174 /*
2175 * If there are no requests waiting in this queue, and
2176 * there are other queues ready to issue requests, AND
2177 * those other queues are issuing requests within our
2178 * mean seek distance, give them a chance to run instead
2179 * of idling.
2180 */
2007 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 2181 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
2008 cfq_slice_expired(cfqd, 1); 2182 cfq_slice_expired(cfqd, 1);
2009 else if (sync && !rq_noidle(rq) && 2183 else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) &&
2010 RB_EMPTY_ROOT(&cfqq->sort_list)) { 2184 sync && !rq_noidle(rq))
2011 cfq_arm_slice_timer(cfqd); 2185 cfq_arm_slice_timer(cfqd);
2012 }
2013 } 2186 }
2014 2187
2015 if (!cfqd->rq_in_driver) 2188 if (!cfqd->rq_in_driver)