aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2007-04-20 08:27:50 -0400
committerJens Axboe <axboe@nelson.home.kernel.dk>2007-04-30 03:01:21 -0400
commitd9e7620e60bc6648c3dcabbc8d1a320b69c846f9 (patch)
tree450d0f92533184d85ac00ab1625460fe0be4cda7
parent1afba0451c83cbff622a08f2d86fbb2e680dfd5f (diff)
cfq-iosched: rework the whole round-robin list concept
Drawing on some inspiration from the CFS CPU scheduler design, overhaul the pending cfq_queue concept list management. Currently CFQ uses a doubly linked list per priority level for sorting and service uses. Kill those lists and maintain an rbtree of cfq_queue's, sorted by when to service them. This unfortunately means that the ionice levels aren't as strong anymore, will work on improving those later. We only scale the slice time now, not the number of times we service. This means that latency is better (for all priority levels), but that the distinction between the highest and lower levels aren't as big. The diffstat speaks for itself. cfq-iosched.c | 363 +++++++++++++++++--------------------------------- 1 file changed, 125 insertions(+), 238 deletions(-) Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/cfq-iosched.c361
1 files changed, 123 insertions, 238 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9d6f04103f01..4838c2b16f2c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -26,7 +26,16 @@ static int cfq_slice_async = HZ / 25;
26static const int cfq_slice_async_rq = 2; 26static const int cfq_slice_async_rq = 2;
27static int cfq_slice_idle = HZ / 125; 27static int cfq_slice_idle = HZ / 125;
28 28
29/*
30 * grace period before allowing idle class to get disk access
31 */
29#define CFQ_IDLE_GRACE (HZ / 10) 32#define CFQ_IDLE_GRACE (HZ / 10)
33
34/*
35 * below this threshold, we consider thinktime immediate
36 */
37#define CFQ_MIN_TT (2)
38
30#define CFQ_SLICE_SCALE (5) 39#define CFQ_SLICE_SCALE (5)
31 40
32#define CFQ_KEY_ASYNC (0) 41#define CFQ_KEY_ASYNC (0)
@@ -69,10 +78,9 @@ struct cfq_data {
69 /* 78 /*
70 * rr list of queues with requests and the count of them 79 * rr list of queues with requests and the count of them
71 */ 80 */
72 struct list_head rr_list[CFQ_PRIO_LISTS]; 81 struct rb_root service_tree;
73 struct list_head cur_rr; 82 struct list_head cur_rr;
74 struct list_head idle_rr; 83 struct list_head idle_rr;
75 unsigned long cur_rr_tick;
76 unsigned int busy_queues; 84 unsigned int busy_queues;
77 85
78 /* 86 /*
@@ -91,8 +99,6 @@ struct cfq_data {
91 99
92 struct cfq_queue *active_queue; 100 struct cfq_queue *active_queue;
93 struct cfq_io_context *active_cic; 101 struct cfq_io_context *active_cic;
94 int cur_prio, cur_end_prio;
95 unsigned long prio_time;
96 unsigned int dispatch_slice; 102 unsigned int dispatch_slice;
97 103
98 struct timer_list idle_class_timer; 104 struct timer_list idle_class_timer;
@@ -131,8 +137,10 @@ struct cfq_queue {
131 unsigned int key; 137 unsigned int key;
132 /* member of the rr/busy/cur/idle cfqd list */ 138 /* member of the rr/busy/cur/idle cfqd list */
133 struct list_head cfq_list; 139 struct list_head cfq_list;
134 /* in what tick we were last serviced */ 140 /* service_tree member */
135 unsigned long rr_tick; 141 struct rb_node rb_node;
142 /* service_tree key */
143 unsigned long rb_key;
136 /* sorted list of pending requests */ 144 /* sorted list of pending requests */
137 struct rb_root sort_list; 145 struct rb_root sort_list;
138 /* if fifo isn't expired, next request to serve */ 146 /* if fifo isn't expired, next request to serve */
@@ -147,8 +155,6 @@ struct cfq_queue {
147 struct list_head fifo; 155 struct list_head fifo;
148 156
149 unsigned long slice_end; 157 unsigned long slice_end;
150 unsigned long service_last;
151 unsigned long slice_start;
152 long slice_resid; 158 long slice_resid;
153 159
154 /* number of requests that are on the dispatch list or inside driver */ 160 /* number of requests that are on the dispatch list or inside driver */
@@ -240,30 +246,26 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw, int is_sync)
240 * if a queue is marked sync and has sync io queued. A sync queue with async 246 * if a queue is marked sync and has sync io queued. A sync queue with async
241 * io only, should not get full sync slice length. 247 * io only, should not get full sync slice length.
242 */ 248 */
243static inline int 249static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
244cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 250 unsigned short prio)
245{ 251{
246 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; 252 const int base_slice = cfqd->cfq_slice[sync];
247 253
248 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 254 WARN_ON(prio >= IOPRIO_BE_NR);
255
256 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
257}
249 258
250 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); 259static inline int
260cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
261{
262 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
251} 263}
252 264
253static inline void 265static inline void
254cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 266cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
255{ 267{
256 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 268 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
257 cfqq->slice_end += cfqq->slice_resid;
258
259 /*
260 * Don't carry over residual for more than one slice, we only want
261 * to slightly correct the fairness. Carrying over forever would
262 * easily introduce oscillations.
263 */
264 cfqq->slice_resid = 0;
265
266 cfqq->slice_start = jiffies;
267} 269}
268 270
269/* 271/*
@@ -403,33 +405,50 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
403 return cfq_choose_req(cfqd, next, prev); 405 return cfq_choose_req(cfqd, next, prev);
404} 406}
405 407
406/* 408static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
407 * This function finds out where to insert a BE queue in the service hierarchy 409 struct cfq_queue *cfqq)
408 */
409static void cfq_resort_be_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
410 int preempted)
411{ 410{
412 if (!cfq_cfqq_sync(cfqq)) 411 /*
413 list_add_tail(&cfqq->cfq_list, &cfqd->rr_list[cfqq->ioprio]); 412 * just an approximation, should be ok.
414 else { 413 */
415 struct list_head *n = &cfqd->rr_list[cfqq->ioprio]; 414 return ((cfqd->busy_queues - 1) * cfq_prio_slice(cfqd, 1, 0));
415}
416
417static void cfq_service_tree_add(struct cfq_data *cfqd,
418 struct cfq_queue *cfqq)
419{
420 struct rb_node **p = &cfqd->service_tree.rb_node;
421 struct rb_node *parent = NULL;
422 struct cfq_queue *__cfqq;
423 unsigned long rb_key;
424
425 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
426 rb_key += cfqq->slice_resid;
427 cfqq->slice_resid = 0;
416 428
429 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
417 /* 430 /*
418 * sort by last service, but don't cross a new or async 431 * same position, nothing more to do
419 * queue. we don't cross a new queue because it hasn't
420 * been service before, and we don't cross an async
421 * queue because it gets added to the end on expire.
422 */ 432 */
423 while ((n = n->prev) != &cfqd->rr_list[cfqq->ioprio]) { 433 if (rb_key == cfqq->rb_key)
424 struct cfq_queue *__c = list_entry_cfqq(n); 434 return;
425 435
426 if (!cfq_cfqq_sync(__c) || !__c->service_last) 436 rb_erase(&cfqq->rb_node, &cfqd->service_tree);
427 break;
428 if (time_before(__c->service_last, cfqq->service_last))
429 break;
430 }
431 list_add(&cfqq->cfq_list, n);
432 } 437 }
438
439 while (*p) {
440 parent = *p;
441 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
442
443 if (rb_key < __cfqq->rb_key)
444 p = &(*p)->rb_left;
445 else
446 p = &(*p)->rb_right;
447 }
448
449 cfqq->rb_key = rb_key;
450 rb_link_node(&cfqq->rb_node, parent, p);
451 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree);
433} 452}
434 453
435static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 454static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
@@ -443,7 +462,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
443 if (!cfq_cfqq_on_rr(cfqq)) 462 if (!cfq_cfqq_on_rr(cfqq))
444 return; 463 return;
445 464
446 list_del(&cfqq->cfq_list); 465 list_del_init(&cfqq->cfq_list);
447 466
448 if (cfq_class_rt(cfqq)) { 467 if (cfq_class_rt(cfqq)) {
449 /* 468 /*
@@ -465,7 +484,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
465 /* 484 /*
466 * So we get here, ergo the queue is a regular best-effort queue 485 * So we get here, ergo the queue is a regular best-effort queue
467 */ 486 */
468 cfq_resort_be_queue(cfqd, cfqq, preempted); 487 cfq_service_tree_add(cfqd, cfqq);
469 } 488 }
470} 489}
471 490
@@ -490,6 +509,11 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
490 cfq_clear_cfqq_on_rr(cfqq); 509 cfq_clear_cfqq_on_rr(cfqq);
491 list_del_init(&cfqq->cfq_list); 510 list_del_init(&cfqq->cfq_list);
492 511
512 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
513 rb_erase(&cfqq->rb_node, &cfqd->service_tree);
514 RB_CLEAR_NODE(&cfqq->rb_node);
515 }
516
493 BUG_ON(!cfqd->busy_queues); 517 BUG_ON(!cfqd->busy_queues);
494 cfqd->busy_queues--; 518 cfqd->busy_queues--;
495} 519}
@@ -684,7 +708,6 @@ __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
684 cfq_clear_cfqq_fifo_expire(cfqq); 708 cfq_clear_cfqq_fifo_expire(cfqq);
685 cfq_mark_cfqq_slice_new(cfqq); 709 cfq_mark_cfqq_slice_new(cfqq);
686 cfq_clear_cfqq_queue_new(cfqq); 710 cfq_clear_cfqq_queue_new(cfqq);
687 cfqq->rr_tick = cfqd->cur_rr_tick;
688 } 711 }
689 712
690 cfqd->active_queue = cfqq; 713 cfqd->active_queue = cfqq;
@@ -732,114 +755,19 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted,
732 __cfq_slice_expired(cfqd, cfqq, preempted, timed_out); 755 __cfq_slice_expired(cfqd, cfqq, preempted, timed_out);
733} 756}
734 757
735/*
736 * 0
737 * 0,1
738 * 0,1,2
739 * 0,1,2,3
740 * 0,1,2,3,4
741 * 0,1,2,3,4,5
742 * 0,1,2,3,4,5,6
743 * 0,1,2,3,4,5,6,7
744 */
745static int cfq_get_next_prio_level(struct cfq_data *cfqd)
746{
747 int prio, wrap;
748
749 prio = -1;
750 wrap = 0;
751 do {
752 int p;
753
754 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
755 if (!list_empty(&cfqd->rr_list[p])) {
756 prio = p;
757 break;
758 }
759 }
760
761 if (prio != -1)
762 break;
763 cfqd->cur_prio = 0;
764 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
765 cfqd->cur_end_prio = 0;
766 if (wrap)
767 break;
768 wrap = 1;
769 }
770 } while (1);
771
772 if (unlikely(prio == -1))
773 return -1;
774
775 BUG_ON(prio >= CFQ_PRIO_LISTS);
776
777 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
778
779 cfqd->cur_prio = prio + 1;
780 if (cfqd->cur_prio > cfqd->cur_end_prio) {
781 cfqd->cur_end_prio = cfqd->cur_prio;
782 cfqd->cur_prio = 0;
783 }
784 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
785 cfqd->cur_prio = 0;
786 cfqd->cur_end_prio = 0;
787 }
788
789 cfqd->cur_rr_tick++;
790 cfqd->prio_time = jiffies;
791 return prio;
792}
793
794static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
795 struct request *rq)
796{
797 if (rq->sector >= cfqd->last_position)
798 return rq->sector - cfqd->last_position;
799 else
800 return cfqd->last_position - rq->sector;
801}
802
803static struct cfq_queue *cfq_get_best_queue(struct cfq_data *cfqd)
804{
805 struct cfq_queue *cfqq = NULL, *__cfqq;
806 sector_t best = -1, first = -1, dist;
807
808 list_for_each_entry(__cfqq, &cfqd->cur_rr, cfq_list) {
809 if (!__cfqq->next_rq || !cfq_cfqq_sync(__cfqq))
810 continue;
811
812 dist = cfq_dist_from_last(cfqd, __cfqq->next_rq);
813 if (first == -1)
814 first = dist;
815 if (dist < best) {
816 best = dist;
817 cfqq = __cfqq;
818 }
819 }
820
821 /*
822 * Only async queue(s) available, grab first entry. Do the same
823 * if the difference between the first and best isn't more than
824 * twice, to obey fairness.
825 */
826 if (!cfqq || (best && first != best && ((first / best) < 4)))
827 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
828
829 return cfqq;
830}
831
832static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) 758static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
833{ 759{
834 struct cfq_queue *cfqq = NULL; 760 struct cfq_queue *cfqq = NULL;
835 761
836 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) { 762 if (!list_empty(&cfqd->cur_rr)) {
837 /* 763 /*
838 * if current list is non-empty, grab first entry. if it is 764 * if current list is non-empty, grab first entry.
839 * empty, get next prio level and grab first entry then if any
840 * are spliced
841 */ 765 */
842 cfqq = cfq_get_best_queue(cfqd); 766 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
767 } else if (!RB_EMPTY_ROOT(&cfqd->service_tree)) {
768 struct rb_node *n = rb_first(&cfqd->service_tree);
769
770 cfqq = rb_entry(n, struct cfq_queue, rb_node);
843 } else if (!list_empty(&cfqd->idle_rr)) { 771 } else if (!list_empty(&cfqd->idle_rr)) {
844 /* 772 /*
845 * if we have idle queues and no rt or be queues had pending 773 * if we have idle queues and no rt or be queues had pending
@@ -861,27 +789,20 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
861{ 789{
862 struct cfq_queue *cfqq; 790 struct cfq_queue *cfqq;
863 791
864 do { 792 cfqq = cfq_get_next_queue(cfqd);
865 long prio;
866
867 cfqq = cfq_get_next_queue(cfqd);
868 if (!cfqq)
869 break;
870
871 prio = cfq_prio_to_slice(cfqd, cfqq);
872 if (cfqq->slice_resid > -prio)
873 break;
874
875 cfqq->slice_resid += prio;
876 list_del_init(&cfqq->cfq_list);
877 list_add_tail(&cfqq->cfq_list, &cfqd->rr_list[cfqq->ioprio]);
878 cfqq = NULL;
879 } while (1);
880
881 __cfq_set_active_queue(cfqd, cfqq); 793 __cfq_set_active_queue(cfqd, cfqq);
882 return cfqq; 794 return cfqq;
883} 795}
884 796
797static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
798 struct request *rq)
799{
800 if (rq->sector >= cfqd->last_position)
801 return rq->sector - cfqd->last_position;
802 else
803 return cfqd->last_position - rq->sector;
804}
805
885static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) 806static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
886{ 807{
887 struct cfq_io_context *cic = cfqd->active_cic; 808 struct cfq_io_context *cic = cfqd->active_cic;
@@ -892,45 +813,15 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
892 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; 813 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
893} 814}
894 815
895static struct cfq_queue *__cfq_close_cooperator(struct cfq_data *cfqd, 816static int cfq_close_cooperator(struct cfq_data *cfq_data,
896 struct cfq_queue *cur_cfqq, 817 struct cfq_queue *cfqq)
897 struct list_head *list)
898{
899 struct cfq_queue *cfqq;
900
901 list_for_each_entry(cfqq, list, cfq_list) {
902 if (cfqq == cur_cfqq || !cfq_cfqq_sync(cfqq))
903 continue;
904
905 BUG_ON(!cfqq->next_rq);
906
907 if (cfq_rq_close(cfqd, cfqq->next_rq))
908 return cfqq;
909 }
910
911 return NULL;
912}
913
914static int cfq_close_cooperator(struct cfq_data *cfqd,
915 struct cfq_queue *cur_cfqq)
916{ 818{
917 struct cfq_queue *cfqq;
918
919 if (!cfqd->busy_queues)
920 return 0;
921
922 /* 819 /*
923 * check cur_rr and same-prio rr_list for candidates 820 * We should notice if some of the queues are cooperating, eg
821 * working closely on the same area of the disk. In that case,
822 * we can group them together and don't waste time idling.
924 */ 823 */
925 cfqq = __cfq_close_cooperator(cfqd, cur_cfqq, &cfqd->cur_rr); 824 return 0;
926 if (cfqq)
927 return 1;
928
929 cfqq = __cfq_close_cooperator(cfqd, cur_cfqq, &cfqd->rr_list[cur_cfqq->ioprio]);
930 if (cfqq && (cfqq->rr_tick == cfqd->cur_rr_tick))
931 cfqq = NULL;
932
933 return cfqq != NULL;
934} 825}
935 826
936#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) 827#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
@@ -974,7 +865,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
974 */ 865 */
975 sl = cfqd->cfq_slice_idle; 866 sl = cfqd->cfq_slice_idle;
976 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) 867 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
977 sl = min(sl, msecs_to_jiffies(2)); 868 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
978 869
979 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 870 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
980} 871}
@@ -1115,31 +1006,41 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1115 return dispatched; 1006 return dispatched;
1116} 1007}
1117 1008
1118static int 1009static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
1119cfq_forced_dispatch_cfqqs(struct list_head *list) 1010{
1011 int dispatched = 0;
1012
1013 while (cfqq->next_rq) {
1014 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
1015 dispatched++;
1016 }
1017
1018 BUG_ON(!list_empty(&cfqq->fifo));
1019 return dispatched;
1020}
1021
1022static int cfq_forced_dispatch_cfqqs(struct list_head *list)
1120{ 1023{
1121 struct cfq_queue *cfqq, *next; 1024 struct cfq_queue *cfqq, *next;
1122 int dispatched; 1025 int dispatched;
1123 1026
1124 dispatched = 0; 1027 dispatched = 0;
1125 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 1028 list_for_each_entry_safe(cfqq, next, list, cfq_list)
1126 while (cfqq->next_rq) { 1029 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1127 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
1128 dispatched++;
1129 }
1130 BUG_ON(!list_empty(&cfqq->fifo));
1131 }
1132 1030
1133 return dispatched; 1031 return dispatched;
1134} 1032}
1135 1033
1136static int 1034static int cfq_forced_dispatch(struct cfq_data *cfqd)
1137cfq_forced_dispatch(struct cfq_data *cfqd)
1138{ 1035{
1139 int i, dispatched = 0; 1036 int dispatched = 0;
1037 struct rb_node *n;
1038
1039 while ((n = rb_first(&cfqd->service_tree)) != NULL) {
1040 struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node);
1140 1041
1141 for (i = 0; i < CFQ_PRIO_LISTS; i++) 1042 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1142 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); 1043 }
1143 1044
1144 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); 1045 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
1145 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); 1046 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
@@ -1151,8 +1052,7 @@ cfq_forced_dispatch(struct cfq_data *cfqd)
1151 return dispatched; 1052 return dispatched;
1152} 1053}
1153 1054
1154static int 1055static int cfq_dispatch_requests(request_queue_t *q, int force)
1155cfq_dispatch_requests(request_queue_t *q, int force)
1156{ 1056{
1157 struct cfq_data *cfqd = q->elevator->elevator_data; 1057 struct cfq_data *cfqd = q->elevator->elevator_data;
1158 struct cfq_queue *cfqq; 1058 struct cfq_queue *cfqq;
@@ -1222,7 +1122,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1222 /* 1122 /*
1223 * it's on the empty list and still hashed 1123 * it's on the empty list and still hashed
1224 */ 1124 */
1225 list_del(&cfqq->cfq_list);
1226 hlist_del(&cfqq->cfq_hash); 1125 hlist_del(&cfqq->cfq_hash);
1227 kmem_cache_free(cfq_pool, cfqq); 1126 kmem_cache_free(cfq_pool, cfqq);
1228} 1127}
@@ -1391,8 +1290,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq)
1391 */ 1290 */
1392 cfqq->org_ioprio = cfqq->ioprio; 1291 cfqq->org_ioprio = cfqq->ioprio;
1393 cfqq->org_ioprio_class = cfqq->ioprio_class; 1292 cfqq->org_ioprio_class = cfqq->ioprio_class;
1394
1395 cfq_resort_rr_list(cfqq, 0);
1396 cfq_clear_cfqq_prio_changed(cfqq); 1293 cfq_clear_cfqq_prio_changed(cfqq);
1397} 1294}
1398 1295
@@ -1478,6 +1375,7 @@ retry:
1478 1375
1479 INIT_HLIST_NODE(&cfqq->cfq_hash); 1376 INIT_HLIST_NODE(&cfqq->cfq_hash);
1480 INIT_LIST_HEAD(&cfqq->cfq_list); 1377 INIT_LIST_HEAD(&cfqq->cfq_list);
1378 RB_CLEAR_NODE(&cfqq->rb_node);
1481 INIT_LIST_HEAD(&cfqq->fifo); 1379 INIT_LIST_HEAD(&cfqq->fifo);
1482 1380
1483 cfqq->key = key; 1381 cfqq->key = key;
@@ -1752,7 +1650,8 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1752 * so we know that it will be selected next. 1650 * so we know that it will be selected next.
1753 */ 1651 */
1754 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 1652 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1755 list_move(&cfqq->cfq_list, &cfqd->cur_rr); 1653 list_del_init(&cfqq->cfq_list);
1654 list_add(&cfqq->cfq_list, &cfqd->cur_rr);
1756 1655
1757 cfqq->slice_end = 0; 1656 cfqq->slice_end = 0;
1758 cfq_mark_cfqq_slice_new(cfqq); 1657 cfq_mark_cfqq_slice_new(cfqq);
@@ -1828,13 +1727,10 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1828 WARN_ON(!cfqq->dispatched); 1727 WARN_ON(!cfqq->dispatched);
1829 cfqd->rq_in_driver--; 1728 cfqd->rq_in_driver--;
1830 cfqq->dispatched--; 1729 cfqq->dispatched--;
1831 cfqq->service_last = now;
1832 1730
1833 if (!cfq_class_idle(cfqq)) 1731 if (!cfq_class_idle(cfqq))
1834 cfqd->last_end_request = now; 1732 cfqd->last_end_request = now;
1835 1733
1836 cfq_resort_rr_list(cfqq, 0);
1837
1838 if (sync) 1734 if (sync)
1839 RQ_CIC(rq)->last_end_request = now; 1735 RQ_CIC(rq)->last_end_request = now;
1840 1736
@@ -1863,9 +1759,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1863 */ 1759 */
1864static void cfq_prio_boost(struct cfq_queue *cfqq) 1760static void cfq_prio_boost(struct cfq_queue *cfqq)
1865{ 1761{
1866 const int ioprio_class = cfqq->ioprio_class;
1867 const int ioprio = cfqq->ioprio;
1868
1869 if (has_fs_excl()) { 1762 if (has_fs_excl()) {
1870 /* 1763 /*
1871 * boost idle prio on transactions that would lock out other 1764 * boost idle prio on transactions that would lock out other
@@ -1884,12 +1777,6 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
1884 if (cfqq->ioprio != cfqq->org_ioprio) 1777 if (cfqq->ioprio != cfqq->org_ioprio)
1885 cfqq->ioprio = cfqq->org_ioprio; 1778 cfqq->ioprio = cfqq->org_ioprio;
1886 } 1779 }
1887
1888 /*
1889 * refile between round-robin lists if we moved the priority class
1890 */
1891 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio))
1892 cfq_resort_rr_list(cfqq, 0);
1893} 1780}
1894 1781
1895static inline int __cfq_may_queue(struct cfq_queue *cfqq) 1782static inline int __cfq_may_queue(struct cfq_queue *cfqq)
@@ -2127,9 +2014,7 @@ static void *cfq_init_queue(request_queue_t *q)
2127 2014
2128 memset(cfqd, 0, sizeof(*cfqd)); 2015 memset(cfqd, 0, sizeof(*cfqd));
2129 2016
2130 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2017 cfqd->service_tree = RB_ROOT;
2131 INIT_LIST_HEAD(&cfqd->rr_list[i]);
2132
2133 INIT_LIST_HEAD(&cfqd->cur_rr); 2018 INIT_LIST_HEAD(&cfqd->cur_rr);
2134 INIT_LIST_HEAD(&cfqd->idle_rr); 2019 INIT_LIST_HEAD(&cfqd->idle_rr);
2135 INIT_LIST_HEAD(&cfqd->cic_list); 2020 INIT_LIST_HEAD(&cfqd->cic_list);