diff options
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 624 |
1 files changed, 381 insertions, 243 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9eba291eb6fd..ae21919f15e1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4; | |||
54 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) | 54 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) |
55 | 55 | ||
56 | #define RQ_CIC(rq) \ | 56 | #define RQ_CIC(rq) \ |
57 | ((struct cfq_io_context *) (rq)->elevator_private) | 57 | ((struct cfq_io_context *) (rq)->elevator_private[0]) |
58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) | 58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1]) |
59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) | 59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2]) |
60 | 60 | ||
61 | static struct kmem_cache *cfq_pool; | 61 | static struct kmem_cache *cfq_pool; |
62 | static struct kmem_cache *cfq_ioc_pool; | 62 | static struct kmem_cache *cfq_ioc_pool; |
@@ -87,7 +87,6 @@ struct cfq_rb_root { | |||
87 | unsigned count; | 87 | unsigned count; |
88 | unsigned total_weight; | 88 | unsigned total_weight; |
89 | u64 min_vdisktime; | 89 | u64 min_vdisktime; |
90 | struct rb_node *active; | ||
91 | }; | 90 | }; |
92 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ | 91 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ |
93 | .count = 0, .min_vdisktime = 0, } | 92 | .count = 0, .min_vdisktime = 0, } |
@@ -97,7 +96,7 @@ struct cfq_rb_root { | |||
97 | */ | 96 | */ |
98 | struct cfq_queue { | 97 | struct cfq_queue { |
99 | /* reference count */ | 98 | /* reference count */ |
100 | atomic_t ref; | 99 | int ref; |
101 | /* various state flags, see below */ | 100 | /* various state flags, see below */ |
102 | unsigned int flags; | 101 | unsigned int flags; |
103 | /* parent cfq_data */ | 102 | /* parent cfq_data */ |
@@ -147,7 +146,6 @@ struct cfq_queue { | |||
147 | struct cfq_rb_root *service_tree; | 146 | struct cfq_rb_root *service_tree; |
148 | struct cfq_queue *new_cfqq; | 147 | struct cfq_queue *new_cfqq; |
149 | struct cfq_group *cfqg; | 148 | struct cfq_group *cfqg; |
150 | struct cfq_group *orig_cfqg; | ||
151 | /* Number of sectors dispatched from queue in single dispatch round */ | 149 | /* Number of sectors dispatched from queue in single dispatch round */ |
152 | unsigned long nr_sectors; | 150 | unsigned long nr_sectors; |
153 | }; | 151 | }; |
@@ -160,6 +158,7 @@ enum wl_prio_t { | |||
160 | BE_WORKLOAD = 0, | 158 | BE_WORKLOAD = 0, |
161 | RT_WORKLOAD = 1, | 159 | RT_WORKLOAD = 1, |
162 | IDLE_WORKLOAD = 2, | 160 | IDLE_WORKLOAD = 2, |
161 | CFQ_PRIO_NR, | ||
163 | }; | 162 | }; |
164 | 163 | ||
165 | /* | 164 | /* |
@@ -179,15 +178,25 @@ struct cfq_group { | |||
179 | /* group service_tree key */ | 178 | /* group service_tree key */ |
180 | u64 vdisktime; | 179 | u64 vdisktime; |
181 | unsigned int weight; | 180 | unsigned int weight; |
182 | bool on_st; | 181 | unsigned int new_weight; |
182 | bool needs_update; | ||
183 | 183 | ||
184 | /* number of cfqq currently on this group */ | 184 | /* number of cfqq currently on this group */ |
185 | int nr_cfqq; | 185 | int nr_cfqq; |
186 | 186 | ||
187 | /* Per group busy queus average. Useful for workload slice calc. */ | ||
188 | unsigned int busy_queues_avg[2]; | ||
189 | /* | 187 | /* |
190 | * rr lists of queues with requests, onle rr for each priority class. | 188 | * Per group busy queues average. Useful for workload slice calc. We |
189 | * create the array for each prio class but at run time it is used | ||
190 | * only for RT and BE class and slot for IDLE class remains unused. | ||
191 | * This is primarily done to avoid confusion and a gcc warning. | ||
192 | */ | ||
193 | unsigned int busy_queues_avg[CFQ_PRIO_NR]; | ||
194 | /* | ||
195 | * rr lists of queues with requests. We maintain service trees for | ||
196 | * RT and BE classes. These trees are subdivided in subclasses | ||
197 | * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE | ||
198 | * class there is no subclassification and all the cfq queues go on | ||
199 | * a single tree service_tree_idle. | ||
191 | * Counts are embedded in the cfq_rb_root | 200 | * Counts are embedded in the cfq_rb_root |
192 | */ | 201 | */ |
193 | struct cfq_rb_root service_trees[2][3]; | 202 | struct cfq_rb_root service_trees[2][3]; |
@@ -199,7 +208,7 @@ struct cfq_group { | |||
199 | struct blkio_group blkg; | 208 | struct blkio_group blkg; |
200 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 209 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
201 | struct hlist_node cfqd_node; | 210 | struct hlist_node cfqd_node; |
202 | atomic_t ref; | 211 | int ref; |
203 | #endif | 212 | #endif |
204 | /* number of requests that are on the dispatch list or inside driver */ | 213 | /* number of requests that are on the dispatch list or inside driver */ |
205 | int dispatched; | 214 | int dispatched; |
@@ -221,7 +230,6 @@ struct cfq_data { | |||
221 | enum wl_type_t serving_type; | 230 | enum wl_type_t serving_type; |
222 | unsigned long workload_expires; | 231 | unsigned long workload_expires; |
223 | struct cfq_group *serving_group; | 232 | struct cfq_group *serving_group; |
224 | bool noidle_tree_requires_idle; | ||
225 | 233 | ||
226 | /* | 234 | /* |
227 | * Each priority tree is sorted by next_request position. These | 235 | * Each priority tree is sorted by next_request position. These |
@@ -231,6 +239,7 @@ struct cfq_data { | |||
231 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; | 239 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; |
232 | 240 | ||
233 | unsigned int busy_queues; | 241 | unsigned int busy_queues; |
242 | unsigned int busy_sync_queues; | ||
234 | 243 | ||
235 | int rq_in_driver; | 244 | int rq_in_driver; |
236 | int rq_in_flight[2]; | 245 | int rq_in_flight[2]; |
@@ -278,7 +287,6 @@ struct cfq_data { | |||
278 | unsigned int cfq_slice_idle; | 287 | unsigned int cfq_slice_idle; |
279 | unsigned int cfq_group_idle; | 288 | unsigned int cfq_group_idle; |
280 | unsigned int cfq_latency; | 289 | unsigned int cfq_latency; |
281 | unsigned int cfq_group_isolation; | ||
282 | 290 | ||
283 | unsigned int cic_index; | 291 | unsigned int cic_index; |
284 | struct list_head cic_list; | 292 | struct list_head cic_list; |
@@ -292,7 +300,9 @@ struct cfq_data { | |||
292 | 300 | ||
293 | /* List of cfq groups being managed on this device*/ | 301 | /* List of cfq groups being managed on this device*/ |
294 | struct hlist_head cfqg_list; | 302 | struct hlist_head cfqg_list; |
295 | struct rcu_head rcu; | 303 | |
304 | /* Number of groups which are on blkcg->blkg_list */ | ||
305 | unsigned int nr_blkcg_linked_grps; | ||
296 | }; | 306 | }; |
297 | 307 | ||
298 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); | 308 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); |
@@ -359,16 +369,16 @@ CFQ_CFQQ_FNS(wait_busy); | |||
359 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 369 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
360 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ | 370 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ |
361 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ | 371 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ |
362 | blkg_path(&(cfqq)->cfqg->blkg), ##args); | 372 | blkg_path(&(cfqq)->cfqg->blkg), ##args) |
363 | 373 | ||
364 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ | 374 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ |
365 | blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ | 375 | blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ |
366 | blkg_path(&(cfqg)->blkg), ##args); \ | 376 | blkg_path(&(cfqg)->blkg), ##args) \ |
367 | 377 | ||
368 | #else | 378 | #else |
369 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 379 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
370 | blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) | 380 | blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) |
371 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0); | 381 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) |
372 | #endif | 382 | #endif |
373 | #define cfq_log(cfqd, fmt, args...) \ | 383 | #define cfq_log(cfqd, fmt, args...) \ |
374 | blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) | 384 | blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) |
@@ -494,13 +504,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | |||
494 | } | 504 | } |
495 | } | 505 | } |
496 | 506 | ||
497 | static int cfq_queue_empty(struct request_queue *q) | ||
498 | { | ||
499 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
500 | |||
501 | return !cfqd->rq_queued; | ||
502 | } | ||
503 | |||
504 | /* | 507 | /* |
505 | * Scale schedule slice based on io priority. Use the sync time slice only | 508 | * Scale schedule slice based on io priority. Use the sync time slice only |
506 | * if a queue is marked sync and has sync io queued. A sync queue with async | 509 | * if a queue is marked sync and has sync io queued. A sync queue with async |
@@ -551,20 +554,13 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) | |||
551 | 554 | ||
552 | static void update_min_vdisktime(struct cfq_rb_root *st) | 555 | static void update_min_vdisktime(struct cfq_rb_root *st) |
553 | { | 556 | { |
554 | u64 vdisktime = st->min_vdisktime; | ||
555 | struct cfq_group *cfqg; | 557 | struct cfq_group *cfqg; |
556 | 558 | ||
557 | if (st->active) { | ||
558 | cfqg = rb_entry_cfqg(st->active); | ||
559 | vdisktime = cfqg->vdisktime; | ||
560 | } | ||
561 | |||
562 | if (st->left) { | 559 | if (st->left) { |
563 | cfqg = rb_entry_cfqg(st->left); | 560 | cfqg = rb_entry_cfqg(st->left); |
564 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); | 561 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, |
562 | cfqg->vdisktime); | ||
565 | } | 563 | } |
566 | |||
567 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime); | ||
568 | } | 564 | } |
569 | 565 | ||
570 | /* | 566 | /* |
@@ -596,8 +592,8 @@ cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
596 | return cfq_target_latency * cfqg->weight / st->total_weight; | 592 | return cfq_target_latency * cfqg->weight / st->total_weight; |
597 | } | 593 | } |
598 | 594 | ||
599 | static inline void | 595 | static inline unsigned |
600 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 596 | cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
601 | { | 597 | { |
602 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); | 598 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); |
603 | if (cfqd->cfq_latency) { | 599 | if (cfqd->cfq_latency) { |
@@ -623,6 +619,14 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
623 | low_slice); | 619 | low_slice); |
624 | } | 620 | } |
625 | } | 621 | } |
622 | return slice; | ||
623 | } | ||
624 | |||
625 | static inline void | ||
626 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
627 | { | ||
628 | unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); | ||
629 | |||
626 | cfqq->slice_start = jiffies; | 630 | cfqq->slice_start = jiffies; |
627 | cfqq->slice_end = jiffies + slice; | 631 | cfqq->slice_end = jiffies + slice; |
628 | cfqq->allocated_slice = slice; | 632 | cfqq->allocated_slice = slice; |
@@ -637,11 +641,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
637 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) | 641 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) |
638 | { | 642 | { |
639 | if (cfq_cfqq_slice_new(cfqq)) | 643 | if (cfq_cfqq_slice_new(cfqq)) |
640 | return 0; | 644 | return false; |
641 | if (time_before(jiffies, cfqq->slice_end)) | 645 | if (time_before(jiffies, cfqq->slice_end)) |
642 | return 0; | 646 | return false; |
643 | 647 | ||
644 | return 1; | 648 | return true; |
645 | } | 649 | } |
646 | 650 | ||
647 | /* | 651 | /* |
@@ -663,15 +667,11 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, | |||
663 | if (rq2 == NULL) | 667 | if (rq2 == NULL) |
664 | return rq1; | 668 | return rq1; |
665 | 669 | ||
666 | if (rq_is_sync(rq1) && !rq_is_sync(rq2)) | 670 | if (rq_is_sync(rq1) != rq_is_sync(rq2)) |
667 | return rq1; | 671 | return rq_is_sync(rq1) ? rq1 : rq2; |
668 | else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) | 672 | |
669 | return rq2; | 673 | if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) |
670 | if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) | 674 | return rq1->cmd_flags & REQ_META ? rq1 : rq2; |
671 | return rq1; | ||
672 | else if ((rq2->cmd_flags & REQ_META) && | ||
673 | !(rq1->cmd_flags & REQ_META)) | ||
674 | return rq2; | ||
675 | 675 | ||
676 | s1 = blk_rq_pos(rq1); | 676 | s1 = blk_rq_pos(rq1); |
677 | s2 = blk_rq_pos(rq2); | 677 | s2 = blk_rq_pos(rq2); |
@@ -853,20 +853,40 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | |||
853 | } | 853 | } |
854 | 854 | ||
855 | static void | 855 | static void |
856 | cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | 856 | cfq_update_group_weight(struct cfq_group *cfqg) |
857 | { | ||
858 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | ||
859 | if (cfqg->needs_update) { | ||
860 | cfqg->weight = cfqg->new_weight; | ||
861 | cfqg->needs_update = false; | ||
862 | } | ||
863 | } | ||
864 | |||
865 | static void | ||
866 | cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | ||
867 | { | ||
868 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | ||
869 | |||
870 | cfq_update_group_weight(cfqg); | ||
871 | __cfq_group_service_tree_add(st, cfqg); | ||
872 | st->total_weight += cfqg->weight; | ||
873 | } | ||
874 | |||
875 | static void | ||
876 | cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | ||
857 | { | 877 | { |
858 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 878 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
859 | struct cfq_group *__cfqg; | 879 | struct cfq_group *__cfqg; |
860 | struct rb_node *n; | 880 | struct rb_node *n; |
861 | 881 | ||
862 | cfqg->nr_cfqq++; | 882 | cfqg->nr_cfqq++; |
863 | if (cfqg->on_st) | 883 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
864 | return; | 884 | return; |
865 | 885 | ||
866 | /* | 886 | /* |
867 | * Currently put the group at the end. Later implement something | 887 | * Currently put the group at the end. Later implement something |
868 | * so that groups get lesser vtime based on their weights, so that | 888 | * so that groups get lesser vtime based on their weights, so that |
869 | * if group does not loose all if it was not continously backlogged. | 889 | * if group does not loose all if it was not continuously backlogged. |
870 | */ | 890 | */ |
871 | n = rb_last(&st->rb); | 891 | n = rb_last(&st->rb); |
872 | if (n) { | 892 | if (n) { |
@@ -874,20 +894,22 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
874 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; | 894 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; |
875 | } else | 895 | } else |
876 | cfqg->vdisktime = st->min_vdisktime; | 896 | cfqg->vdisktime = st->min_vdisktime; |
897 | cfq_group_service_tree_add(st, cfqg); | ||
898 | } | ||
877 | 899 | ||
878 | __cfq_group_service_tree_add(st, cfqg); | 900 | static void |
879 | cfqg->on_st = true; | 901 | cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) |
880 | st->total_weight += cfqg->weight; | 902 | { |
903 | st->total_weight -= cfqg->weight; | ||
904 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | ||
905 | cfq_rb_erase(&cfqg->rb_node, st); | ||
881 | } | 906 | } |
882 | 907 | ||
883 | static void | 908 | static void |
884 | cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | 909 | cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) |
885 | { | 910 | { |
886 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 911 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
887 | 912 | ||
888 | if (st->active == &cfqg->rb_node) | ||
889 | st->active = NULL; | ||
890 | |||
891 | BUG_ON(cfqg->nr_cfqq < 1); | 913 | BUG_ON(cfqg->nr_cfqq < 1); |
892 | cfqg->nr_cfqq--; | 914 | cfqg->nr_cfqq--; |
893 | 915 | ||
@@ -896,15 +918,13 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
896 | return; | 918 | return; |
897 | 919 | ||
898 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); | 920 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
899 | cfqg->on_st = false; | 921 | cfq_group_service_tree_del(st, cfqg); |
900 | st->total_weight -= cfqg->weight; | ||
901 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | ||
902 | cfq_rb_erase(&cfqg->rb_node, st); | ||
903 | cfqg->saved_workload_slice = 0; | 922 | cfqg->saved_workload_slice = 0; |
904 | cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1); | 923 | cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1); |
905 | } | 924 | } |
906 | 925 | ||
907 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | 926 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, |
927 | unsigned int *unaccounted_time) | ||
908 | { | 928 | { |
909 | unsigned int slice_used; | 929 | unsigned int slice_used; |
910 | 930 | ||
@@ -923,8 +943,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | |||
923 | 1); | 943 | 1); |
924 | } else { | 944 | } else { |
925 | slice_used = jiffies - cfqq->slice_start; | 945 | slice_used = jiffies - cfqq->slice_start; |
926 | if (slice_used > cfqq->allocated_slice) | 946 | if (slice_used > cfqq->allocated_slice) { |
947 | *unaccounted_time = slice_used - cfqq->allocated_slice; | ||
927 | slice_used = cfqq->allocated_slice; | 948 | slice_used = cfqq->allocated_slice; |
949 | } | ||
950 | if (time_after(cfqq->slice_start, cfqq->dispatch_start)) | ||
951 | *unaccounted_time += cfqq->slice_start - | ||
952 | cfqq->dispatch_start; | ||
928 | } | 953 | } |
929 | 954 | ||
930 | return slice_used; | 955 | return slice_used; |
@@ -934,12 +959,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
934 | struct cfq_queue *cfqq) | 959 | struct cfq_queue *cfqq) |
935 | { | 960 | { |
936 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 961 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
937 | unsigned int used_sl, charge; | 962 | unsigned int used_sl, charge, unaccounted_sl = 0; |
938 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) | 963 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) |
939 | - cfqg->service_tree_idle.count; | 964 | - cfqg->service_tree_idle.count; |
940 | 965 | ||
941 | BUG_ON(nr_sync < 0); | 966 | BUG_ON(nr_sync < 0); |
942 | used_sl = charge = cfq_cfqq_slice_usage(cfqq); | 967 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); |
943 | 968 | ||
944 | if (iops_mode(cfqd)) | 969 | if (iops_mode(cfqd)) |
945 | charge = cfqq->slice_dispatch; | 970 | charge = cfqq->slice_dispatch; |
@@ -947,9 +972,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
947 | charge = cfqq->allocated_slice; | 972 | charge = cfqq->allocated_slice; |
948 | 973 | ||
949 | /* Can't update vdisktime while group is on service tree */ | 974 | /* Can't update vdisktime while group is on service tree */ |
950 | cfq_rb_erase(&cfqg->rb_node, st); | 975 | cfq_group_service_tree_del(st, cfqg); |
951 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); | 976 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); |
952 | __cfq_group_service_tree_add(st, cfqg); | 977 | /* If a new weight was requested, update now, off tree */ |
978 | cfq_group_service_tree_add(st, cfqg); | ||
953 | 979 | ||
954 | /* This group is being expired. Save the context */ | 980 | /* This group is being expired. Save the context */ |
955 | if (time_after(cfqd->workload_expires, jiffies)) { | 981 | if (time_after(cfqd->workload_expires, jiffies)) { |
@@ -962,10 +988,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
962 | 988 | ||
963 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, | 989 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, |
964 | st->min_vdisktime); | 990 | st->min_vdisktime); |
965 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" | 991 | cfq_log_cfqq(cfqq->cfqd, cfqq, |
966 | " sect=%u", used_sl, cfqq->slice_dispatch, charge, | 992 | "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", |
967 | iops_mode(cfqd), cfqq->nr_sectors); | 993 | used_sl, cfqq->slice_dispatch, charge, |
968 | cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); | 994 | iops_mode(cfqd), cfqq->nr_sectors); |
995 | cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl, | ||
996 | unaccounted_sl); | ||
969 | cfq_blkiocg_set_start_empty_time(&cfqg->blkg); | 997 | cfq_blkiocg_set_start_empty_time(&cfqg->blkg); |
970 | } | 998 | } |
971 | 999 | ||
@@ -977,35 +1005,55 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) | |||
977 | return NULL; | 1005 | return NULL; |
978 | } | 1006 | } |
979 | 1007 | ||
980 | void | 1008 | void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, |
981 | cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight) | 1009 | unsigned int weight) |
982 | { | 1010 | { |
983 | cfqg_of_blkg(blkg)->weight = weight; | 1011 | struct cfq_group *cfqg = cfqg_of_blkg(blkg); |
1012 | cfqg->new_weight = weight; | ||
1013 | cfqg->needs_update = true; | ||
984 | } | 1014 | } |
985 | 1015 | ||
986 | static struct cfq_group * | 1016 | static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, |
987 | cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | 1017 | struct cfq_group *cfqg, struct blkio_cgroup *blkcg) |
988 | { | 1018 | { |
989 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | ||
990 | struct cfq_group *cfqg = NULL; | ||
991 | void *key = cfqd; | ||
992 | int i, j; | ||
993 | struct cfq_rb_root *st; | ||
994 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | 1019 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; |
995 | unsigned int major, minor; | 1020 | unsigned int major, minor; |
996 | 1021 | ||
997 | cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); | 1022 | /* |
998 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { | 1023 | * Add group onto cgroup list. It might happen that bdi->dev is |
1024 | * not initialized yet. Initialize this new group without major | ||
1025 | * and minor info and this info will be filled in once a new thread | ||
1026 | * comes for IO. | ||
1027 | */ | ||
1028 | if (bdi->dev) { | ||
999 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 1029 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1000 | cfqg->blkg.dev = MKDEV(major, minor); | 1030 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, |
1001 | goto done; | 1031 | (void *)cfqd, MKDEV(major, minor)); |
1002 | } | 1032 | } else |
1003 | if (cfqg || !create) | 1033 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, |
1004 | goto done; | 1034 | (void *)cfqd, 0); |
1035 | |||
1036 | cfqd->nr_blkcg_linked_grps++; | ||
1037 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1038 | |||
1039 | /* Add group on cfqd list */ | ||
1040 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * Should be called from sleepable context. No request queue lock as per | ||
1045 | * cpu stats are allocated dynamically and alloc_percpu needs to be called | ||
1046 | * from sleepable context. | ||
1047 | */ | ||
1048 | static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) | ||
1049 | { | ||
1050 | struct cfq_group *cfqg = NULL; | ||
1051 | int i, j, ret; | ||
1052 | struct cfq_rb_root *st; | ||
1005 | 1053 | ||
1006 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); | 1054 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); |
1007 | if (!cfqg) | 1055 | if (!cfqg) |
1008 | goto done; | 1056 | return NULL; |
1009 | 1057 | ||
1010 | for_each_cfqg_st(cfqg, i, j, st) | 1058 | for_each_cfqg_st(cfqg, i, j, st) |
1011 | *st = CFQ_RB_ROOT; | 1059 | *st = CFQ_RB_ROOT; |
@@ -1017,52 +1065,103 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
1017 | * elevator which will be dropped by either elevator exit | 1065 | * elevator which will be dropped by either elevator exit |
1018 | * or cgroup deletion path depending on who is exiting first. | 1066 | * or cgroup deletion path depending on who is exiting first. |
1019 | */ | 1067 | */ |
1020 | atomic_set(&cfqg->ref, 1); | 1068 | cfqg->ref = 1; |
1069 | |||
1070 | ret = blkio_alloc_blkg_stats(&cfqg->blkg); | ||
1071 | if (ret) { | ||
1072 | kfree(cfqg); | ||
1073 | return NULL; | ||
1074 | } | ||
1075 | |||
1076 | return cfqg; | ||
1077 | } | ||
1078 | |||
1079 | static struct cfq_group * | ||
1080 | cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg) | ||
1081 | { | ||
1082 | struct cfq_group *cfqg = NULL; | ||
1083 | void *key = cfqd; | ||
1084 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | ||
1085 | unsigned int major, minor; | ||
1021 | 1086 | ||
1022 | /* | 1087 | /* |
1023 | * Add group onto cgroup list. It might happen that bdi->dev is | 1088 | * This is the common case when there are no blkio cgroups. |
1024 | * not initiliazed yet. Initialize this new group without major | 1089 | * Avoid lookup in this case |
1025 | * and minor info and this info will be filled in once a new thread | ||
1026 | * comes for IO. See code above. | ||
1027 | */ | 1090 | */ |
1028 | if (bdi->dev) { | 1091 | if (blkcg == &blkio_root_cgroup) |
1029 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 1092 | cfqg = &cfqd->root_group; |
1030 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, | 1093 | else |
1031 | MKDEV(major, minor)); | 1094 | cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); |
1032 | } else | ||
1033 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, | ||
1034 | 0); | ||
1035 | |||
1036 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1037 | 1095 | ||
1038 | /* Add group on cfqd list */ | 1096 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { |
1039 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | 1097 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1098 | cfqg->blkg.dev = MKDEV(major, minor); | ||
1099 | } | ||
1040 | 1100 | ||
1041 | done: | ||
1042 | return cfqg; | 1101 | return cfqg; |
1043 | } | 1102 | } |
1044 | 1103 | ||
1045 | /* | 1104 | /* |
1046 | * Search for the cfq group current task belongs to. If create = 1, then also | 1105 | * Search for the cfq group current task belongs to. request_queue lock must |
1047 | * create the cfq group if it does not exist. request_queue lock must be held. | 1106 | * be held. |
1048 | */ | 1107 | */ |
1049 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | 1108 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) |
1050 | { | 1109 | { |
1051 | struct cgroup *cgroup; | 1110 | struct blkio_cgroup *blkcg; |
1052 | struct cfq_group *cfqg = NULL; | 1111 | struct cfq_group *cfqg = NULL, *__cfqg = NULL; |
1112 | struct request_queue *q = cfqd->queue; | ||
1113 | |||
1114 | rcu_read_lock(); | ||
1115 | blkcg = task_blkio_cgroup(current); | ||
1116 | cfqg = cfq_find_cfqg(cfqd, blkcg); | ||
1117 | if (cfqg) { | ||
1118 | rcu_read_unlock(); | ||
1119 | return cfqg; | ||
1120 | } | ||
1121 | |||
1122 | /* | ||
1123 | * Need to allocate a group. Allocation of group also needs allocation | ||
1124 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | ||
1125 | * we need to drop rcu lock and queue_lock before we call alloc. | ||
1126 | * | ||
1127 | * Not taking any queue reference here and assuming that queue is | ||
1128 | * around by the time we return. CFQ queue allocation code does | ||
1129 | * the same. It might be racy though. | ||
1130 | */ | ||
1131 | |||
1132 | rcu_read_unlock(); | ||
1133 | spin_unlock_irq(q->queue_lock); | ||
1134 | |||
1135 | cfqg = cfq_alloc_cfqg(cfqd); | ||
1136 | |||
1137 | spin_lock_irq(q->queue_lock); | ||
1053 | 1138 | ||
1054 | rcu_read_lock(); | 1139 | rcu_read_lock(); |
1055 | cgroup = task_cgroup(current, blkio_subsys_id); | 1140 | blkcg = task_blkio_cgroup(current); |
1056 | cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create); | 1141 | |
1057 | if (!cfqg && create) | 1142 | /* |
1143 | * If some other thread already allocated the group while we were | ||
1144 | * not holding queue lock, free up the group | ||
1145 | */ | ||
1146 | __cfqg = cfq_find_cfqg(cfqd, blkcg); | ||
1147 | |||
1148 | if (__cfqg) { | ||
1149 | kfree(cfqg); | ||
1150 | rcu_read_unlock(); | ||
1151 | return __cfqg; | ||
1152 | } | ||
1153 | |||
1154 | if (!cfqg) | ||
1058 | cfqg = &cfqd->root_group; | 1155 | cfqg = &cfqd->root_group; |
1156 | |||
1157 | cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg); | ||
1059 | rcu_read_unlock(); | 1158 | rcu_read_unlock(); |
1060 | return cfqg; | 1159 | return cfqg; |
1061 | } | 1160 | } |
1062 | 1161 | ||
1063 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | 1162 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) |
1064 | { | 1163 | { |
1065 | atomic_inc(&cfqg->ref); | 1164 | cfqg->ref++; |
1066 | return cfqg; | 1165 | return cfqg; |
1067 | } | 1166 | } |
1068 | 1167 | ||
@@ -1074,7 +1173,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | |||
1074 | 1173 | ||
1075 | cfqq->cfqg = cfqg; | 1174 | cfqq->cfqg = cfqg; |
1076 | /* cfqq reference on cfqg */ | 1175 | /* cfqq reference on cfqg */ |
1077 | atomic_inc(&cfqq->cfqg->ref); | 1176 | cfqq->cfqg->ref++; |
1078 | } | 1177 | } |
1079 | 1178 | ||
1080 | static void cfq_put_cfqg(struct cfq_group *cfqg) | 1179 | static void cfq_put_cfqg(struct cfq_group *cfqg) |
@@ -1082,11 +1181,13 @@ static void cfq_put_cfqg(struct cfq_group *cfqg) | |||
1082 | struct cfq_rb_root *st; | 1181 | struct cfq_rb_root *st; |
1083 | int i, j; | 1182 | int i, j; |
1084 | 1183 | ||
1085 | BUG_ON(atomic_read(&cfqg->ref) <= 0); | 1184 | BUG_ON(cfqg->ref <= 0); |
1086 | if (!atomic_dec_and_test(&cfqg->ref)) | 1185 | cfqg->ref--; |
1186 | if (cfqg->ref) | ||
1087 | return; | 1187 | return; |
1088 | for_each_cfqg_st(cfqg, i, j, st) | 1188 | for_each_cfqg_st(cfqg, i, j, st) |
1089 | BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); | 1189 | BUG_ON(!RB_EMPTY_ROOT(&st->rb)); |
1190 | free_percpu(cfqg->blkg.stats_cpu); | ||
1090 | kfree(cfqg); | 1191 | kfree(cfqg); |
1091 | } | 1192 | } |
1092 | 1193 | ||
@@ -1145,7 +1246,7 @@ void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
1145 | } | 1246 | } |
1146 | 1247 | ||
1147 | #else /* GROUP_IOSCHED */ | 1248 | #else /* GROUP_IOSCHED */ |
1148 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | 1249 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) |
1149 | { | 1250 | { |
1150 | return &cfqd->root_group; | 1251 | return &cfqd->root_group; |
1151 | } | 1252 | } |
@@ -1179,33 +1280,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1179 | struct cfq_rb_root *service_tree; | 1280 | struct cfq_rb_root *service_tree; |
1180 | int left; | 1281 | int left; |
1181 | int new_cfqq = 1; | 1282 | int new_cfqq = 1; |
1182 | int group_changed = 0; | ||
1183 | |||
1184 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
1185 | if (!cfqd->cfq_group_isolation | ||
1186 | && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD | ||
1187 | && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) { | ||
1188 | /* Move this cfq to root group */ | ||
1189 | cfq_log_cfqq(cfqd, cfqq, "moving to root group"); | ||
1190 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1191 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1192 | cfqq->orig_cfqg = cfqq->cfqg; | ||
1193 | cfqq->cfqg = &cfqd->root_group; | ||
1194 | atomic_inc(&cfqd->root_group.ref); | ||
1195 | group_changed = 1; | ||
1196 | } else if (!cfqd->cfq_group_isolation | ||
1197 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { | ||
1198 | /* cfqq is sequential now needs to go to its original group */ | ||
1199 | BUG_ON(cfqq->cfqg != &cfqd->root_group); | ||
1200 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1201 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1202 | cfq_put_cfqg(cfqq->cfqg); | ||
1203 | cfqq->cfqg = cfqq->orig_cfqg; | ||
1204 | cfqq->orig_cfqg = NULL; | ||
1205 | group_changed = 1; | ||
1206 | cfq_log_cfqq(cfqd, cfqq, "moved to origin group"); | ||
1207 | } | ||
1208 | #endif | ||
1209 | 1283 | ||
1210 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), | 1284 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), |
1211 | cfqq_type(cfqq)); | 1285 | cfqq_type(cfqq)); |
@@ -1276,9 +1350,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1276 | rb_link_node(&cfqq->rb_node, parent, p); | 1350 | rb_link_node(&cfqq->rb_node, parent, p); |
1277 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); | 1351 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); |
1278 | service_tree->count++; | 1352 | service_tree->count++; |
1279 | if ((add_front || !new_cfqq) && !group_changed) | 1353 | if (add_front || !new_cfqq) |
1280 | return; | 1354 | return; |
1281 | cfq_group_service_tree_add(cfqd, cfqq->cfqg); | 1355 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); |
1282 | } | 1356 | } |
1283 | 1357 | ||
1284 | static struct cfq_queue * | 1358 | static struct cfq_queue * |
@@ -1366,6 +1440,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1366 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 1440 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
1367 | cfq_mark_cfqq_on_rr(cfqq); | 1441 | cfq_mark_cfqq_on_rr(cfqq); |
1368 | cfqd->busy_queues++; | 1442 | cfqd->busy_queues++; |
1443 | if (cfq_cfqq_sync(cfqq)) | ||
1444 | cfqd->busy_sync_queues++; | ||
1369 | 1445 | ||
1370 | cfq_resort_rr_list(cfqd, cfqq); | 1446 | cfq_resort_rr_list(cfqd, cfqq); |
1371 | } | 1447 | } |
@@ -1389,9 +1465,11 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1389 | cfqq->p_root = NULL; | 1465 | cfqq->p_root = NULL; |
1390 | } | 1466 | } |
1391 | 1467 | ||
1392 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | 1468 | cfq_group_notify_queue_del(cfqd, cfqq->cfqg); |
1393 | BUG_ON(!cfqd->busy_queues); | 1469 | BUG_ON(!cfqd->busy_queues); |
1394 | cfqd->busy_queues--; | 1470 | cfqd->busy_queues--; |
1471 | if (cfq_cfqq_sync(cfqq)) | ||
1472 | cfqd->busy_sync_queues--; | ||
1395 | } | 1473 | } |
1396 | 1474 | ||
1397 | /* | 1475 | /* |
@@ -1663,8 +1741,11 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1663 | /* | 1741 | /* |
1664 | * store what was left of this slice, if the queue idled/timed out | 1742 | * store what was left of this slice, if the queue idled/timed out |
1665 | */ | 1743 | */ |
1666 | if (timed_out && !cfq_cfqq_slice_new(cfqq)) { | 1744 | if (timed_out) { |
1667 | cfqq->slice_resid = cfqq->slice_end - jiffies; | 1745 | if (cfq_cfqq_slice_new(cfqq)) |
1746 | cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); | ||
1747 | else | ||
1748 | cfqq->slice_resid = cfqq->slice_end - jiffies; | ||
1668 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); | 1749 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); |
1669 | } | 1750 | } |
1670 | 1751 | ||
@@ -1678,9 +1759,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1678 | if (cfqq == cfqd->active_queue) | 1759 | if (cfqq == cfqd->active_queue) |
1679 | cfqd->active_queue = NULL; | 1760 | cfqd->active_queue = NULL; |
1680 | 1761 | ||
1681 | if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active) | ||
1682 | cfqd->grp_service_tree.active = NULL; | ||
1683 | |||
1684 | if (cfqd->active_cic) { | 1762 | if (cfqd->active_cic) { |
1685 | put_io_context(cfqd->active_cic->ioc); | 1763 | put_io_context(cfqd->active_cic->ioc); |
1686 | cfqd->active_cic = NULL; | 1764 | cfqd->active_cic = NULL; |
@@ -1892,10 +1970,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1892 | * in their service tree. | 1970 | * in their service tree. |
1893 | */ | 1971 | */ |
1894 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) | 1972 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) |
1895 | return 1; | 1973 | return true; |
1896 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", | 1974 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", |
1897 | service_tree->count); | 1975 | service_tree->count); |
1898 | return 0; | 1976 | return false; |
1899 | } | 1977 | } |
1900 | 1978 | ||
1901 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 1979 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
@@ -1946,8 +2024,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
1946 | */ | 2024 | */ |
1947 | if (sample_valid(cic->ttime_samples) && | 2025 | if (sample_valid(cic->ttime_samples) && |
1948 | (cfqq->slice_end - jiffies < cic->ttime_mean)) { | 2026 | (cfqq->slice_end - jiffies < cic->ttime_mean)) { |
1949 | cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d", | 2027 | cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", |
1950 | cic->ttime_mean); | 2028 | cic->ttime_mean); |
1951 | return; | 2029 | return; |
1952 | } | 2030 | } |
1953 | 2031 | ||
@@ -2020,7 +2098,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2020 | 2098 | ||
2021 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); | 2099 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); |
2022 | 2100 | ||
2023 | return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); | 2101 | return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio); |
2024 | } | 2102 | } |
2025 | 2103 | ||
2026 | /* | 2104 | /* |
@@ -2031,7 +2109,7 @@ static int cfqq_process_refs(struct cfq_queue *cfqq) | |||
2031 | int process_refs, io_refs; | 2109 | int process_refs, io_refs; |
2032 | 2110 | ||
2033 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; | 2111 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; |
2034 | process_refs = atomic_read(&cfqq->ref) - io_refs; | 2112 | process_refs = cfqq->ref - io_refs; |
2035 | BUG_ON(process_refs < 0); | 2113 | BUG_ON(process_refs < 0); |
2036 | return process_refs; | 2114 | return process_refs; |
2037 | } | 2115 | } |
@@ -2071,10 +2149,10 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) | |||
2071 | */ | 2149 | */ |
2072 | if (new_process_refs >= process_refs) { | 2150 | if (new_process_refs >= process_refs) { |
2073 | cfqq->new_cfqq = new_cfqq; | 2151 | cfqq->new_cfqq = new_cfqq; |
2074 | atomic_add(process_refs, &new_cfqq->ref); | 2152 | new_cfqq->ref += process_refs; |
2075 | } else { | 2153 | } else { |
2076 | new_cfqq->new_cfqq = cfqq; | 2154 | new_cfqq->new_cfqq = cfqq; |
2077 | atomic_add(new_process_refs, &cfqq->ref); | 2155 | cfqq->ref += new_process_refs; |
2078 | } | 2156 | } |
2079 | } | 2157 | } |
2080 | 2158 | ||
@@ -2107,12 +2185,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2107 | unsigned count; | 2185 | unsigned count; |
2108 | struct cfq_rb_root *st; | 2186 | struct cfq_rb_root *st; |
2109 | unsigned group_slice; | 2187 | unsigned group_slice; |
2110 | 2188 | enum wl_prio_t original_prio = cfqd->serving_prio; | |
2111 | if (!cfqg) { | ||
2112 | cfqd->serving_prio = IDLE_WORKLOAD; | ||
2113 | cfqd->workload_expires = jiffies + 1; | ||
2114 | return; | ||
2115 | } | ||
2116 | 2189 | ||
2117 | /* Choose next priority. RT > BE > IDLE */ | 2190 | /* Choose next priority. RT > BE > IDLE */ |
2118 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) | 2191 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) |
@@ -2125,6 +2198,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2125 | return; | 2198 | return; |
2126 | } | 2199 | } |
2127 | 2200 | ||
2201 | if (original_prio != cfqd->serving_prio) | ||
2202 | goto new_workload; | ||
2203 | |||
2128 | /* | 2204 | /* |
2129 | * For RT and BE, we have to choose also the type | 2205 | * For RT and BE, we have to choose also the type |
2130 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload | 2206 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload |
@@ -2139,6 +2215,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2139 | if (count && !time_after(jiffies, cfqd->workload_expires)) | 2215 | if (count && !time_after(jiffies, cfqd->workload_expires)) |
2140 | return; | 2216 | return; |
2141 | 2217 | ||
2218 | new_workload: | ||
2142 | /* otherwise select new workload type */ | 2219 | /* otherwise select new workload type */ |
2143 | cfqd->serving_type = | 2220 | cfqd->serving_type = |
2144 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); | 2221 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); |
@@ -2180,7 +2257,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2180 | slice = max_t(unsigned, slice, CFQ_MIN_TT); | 2257 | slice = max_t(unsigned, slice, CFQ_MIN_TT); |
2181 | cfq_log(cfqd, "workload slice:%d", slice); | 2258 | cfq_log(cfqd, "workload slice:%d", slice); |
2182 | cfqd->workload_expires = jiffies + slice; | 2259 | cfqd->workload_expires = jiffies + slice; |
2183 | cfqd->noidle_tree_requires_idle = false; | ||
2184 | } | 2260 | } |
2185 | 2261 | ||
2186 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | 2262 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) |
@@ -2191,7 +2267,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | |||
2191 | if (RB_EMPTY_ROOT(&st->rb)) | 2267 | if (RB_EMPTY_ROOT(&st->rb)) |
2192 | return NULL; | 2268 | return NULL; |
2193 | cfqg = cfq_rb_first_group(st); | 2269 | cfqg = cfq_rb_first_group(st); |
2194 | st->active = &cfqg->rb_node; | ||
2195 | update_min_vdisktime(st); | 2270 | update_min_vdisktime(st); |
2196 | return cfqg; | 2271 | return cfqg; |
2197 | } | 2272 | } |
@@ -2285,6 +2360,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
2285 | goto keep_queue; | 2360 | goto keep_queue; |
2286 | } | 2361 | } |
2287 | 2362 | ||
2363 | /* | ||
2364 | * This is a deep seek queue, but the device is much faster than | ||
2365 | * the queue can deliver, don't idle | ||
2366 | **/ | ||
2367 | if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && | ||
2368 | (cfq_cfqq_slice_new(cfqq) || | ||
2369 | (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { | ||
2370 | cfq_clear_cfqq_deep(cfqq); | ||
2371 | cfq_clear_cfqq_idle_window(cfqq); | ||
2372 | } | ||
2373 | |||
2288 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { | 2374 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { |
2289 | cfqq = NULL; | 2375 | cfqq = NULL; |
2290 | goto keep_queue; | 2376 | goto keep_queue; |
@@ -2359,12 +2445,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, | |||
2359 | { | 2445 | { |
2360 | /* the queue hasn't finished any request, can't estimate */ | 2446 | /* the queue hasn't finished any request, can't estimate */ |
2361 | if (cfq_cfqq_slice_new(cfqq)) | 2447 | if (cfq_cfqq_slice_new(cfqq)) |
2362 | return 1; | 2448 | return true; |
2363 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, | 2449 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, |
2364 | cfqq->slice_end)) | 2450 | cfqq->slice_end)) |
2365 | return 1; | 2451 | return true; |
2366 | 2452 | ||
2367 | return 0; | 2453 | return false; |
2368 | } | 2454 | } |
2369 | 2455 | ||
2370 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2456 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
@@ -2391,6 +2477,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2391 | * Does this cfqq already have too much IO in flight? | 2477 | * Does this cfqq already have too much IO in flight? |
2392 | */ | 2478 | */ |
2393 | if (cfqq->dispatched >= max_dispatch) { | 2479 | if (cfqq->dispatched >= max_dispatch) { |
2480 | bool promote_sync = false; | ||
2394 | /* | 2481 | /* |
2395 | * idle queue must always only have a single IO in flight | 2482 | * idle queue must always only have a single IO in flight |
2396 | */ | 2483 | */ |
@@ -2398,15 +2485,26 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2398 | return false; | 2485 | return false; |
2399 | 2486 | ||
2400 | /* | 2487 | /* |
2488 | * If there is only one sync queue | ||
2489 | * we can ignore async queue here and give the sync | ||
2490 | * queue no dispatch limit. The reason is a sync queue can | ||
2491 | * preempt async queue, limiting the sync queue doesn't make | ||
2492 | * sense. This is useful for aiostress test. | ||
2493 | */ | ||
2494 | if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1) | ||
2495 | promote_sync = true; | ||
2496 | |||
2497 | /* | ||
2401 | * We have other queues, don't allow more IO from this one | 2498 | * We have other queues, don't allow more IO from this one |
2402 | */ | 2499 | */ |
2403 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq)) | 2500 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) && |
2501 | !promote_sync) | ||
2404 | return false; | 2502 | return false; |
2405 | 2503 | ||
2406 | /* | 2504 | /* |
2407 | * Sole queue user, no limit | 2505 | * Sole queue user, no limit |
2408 | */ | 2506 | */ |
2409 | if (cfqd->busy_queues == 1) | 2507 | if (cfqd->busy_queues == 1 || promote_sync) |
2410 | max_dispatch = -1; | 2508 | max_dispatch = -1; |
2411 | else | 2509 | else |
2412 | /* | 2510 | /* |
@@ -2528,18 +2626,18 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) | |||
2528 | static void cfq_put_queue(struct cfq_queue *cfqq) | 2626 | static void cfq_put_queue(struct cfq_queue *cfqq) |
2529 | { | 2627 | { |
2530 | struct cfq_data *cfqd = cfqq->cfqd; | 2628 | struct cfq_data *cfqd = cfqq->cfqd; |
2531 | struct cfq_group *cfqg, *orig_cfqg; | 2629 | struct cfq_group *cfqg; |
2532 | 2630 | ||
2533 | BUG_ON(atomic_read(&cfqq->ref) <= 0); | 2631 | BUG_ON(cfqq->ref <= 0); |
2534 | 2632 | ||
2535 | if (!atomic_dec_and_test(&cfqq->ref)) | 2633 | cfqq->ref--; |
2634 | if (cfqq->ref) | ||
2536 | return; | 2635 | return; |
2537 | 2636 | ||
2538 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); | 2637 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); |
2539 | BUG_ON(rb_first(&cfqq->sort_list)); | 2638 | BUG_ON(rb_first(&cfqq->sort_list)); |
2540 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); | 2639 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); |
2541 | cfqg = cfqq->cfqg; | 2640 | cfqg = cfqq->cfqg; |
2542 | orig_cfqg = cfqq->orig_cfqg; | ||
2543 | 2641 | ||
2544 | if (unlikely(cfqd->active_queue == cfqq)) { | 2642 | if (unlikely(cfqd->active_queue == cfqq)) { |
2545 | __cfq_slice_expired(cfqd, cfqq, 0); | 2643 | __cfq_slice_expired(cfqd, cfqq, 0); |
@@ -2549,33 +2647,23 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2549 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 2647 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
2550 | kmem_cache_free(cfq_pool, cfqq); | 2648 | kmem_cache_free(cfq_pool, cfqq); |
2551 | cfq_put_cfqg(cfqg); | 2649 | cfq_put_cfqg(cfqg); |
2552 | if (orig_cfqg) | ||
2553 | cfq_put_cfqg(orig_cfqg); | ||
2554 | } | 2650 | } |
2555 | 2651 | ||
2556 | /* | 2652 | /* |
2557 | * Must always be called with the rcu_read_lock() held | 2653 | * Call func for each cic attached to this ioc. |
2558 | */ | 2654 | */ |
2559 | static void | 2655 | static void |
2560 | __call_for_each_cic(struct io_context *ioc, | 2656 | call_for_each_cic(struct io_context *ioc, |
2561 | void (*func)(struct io_context *, struct cfq_io_context *)) | 2657 | void (*func)(struct io_context *, struct cfq_io_context *)) |
2562 | { | 2658 | { |
2563 | struct cfq_io_context *cic; | 2659 | struct cfq_io_context *cic; |
2564 | struct hlist_node *n; | 2660 | struct hlist_node *n; |
2565 | 2661 | ||
2662 | rcu_read_lock(); | ||
2663 | |||
2566 | hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) | 2664 | hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) |
2567 | func(ioc, cic); | 2665 | func(ioc, cic); |
2568 | } | ||
2569 | 2666 | ||
2570 | /* | ||
2571 | * Call func for each cic attached to this ioc. | ||
2572 | */ | ||
2573 | static void | ||
2574 | call_for_each_cic(struct io_context *ioc, | ||
2575 | void (*func)(struct io_context *, struct cfq_io_context *)) | ||
2576 | { | ||
2577 | rcu_read_lock(); | ||
2578 | __call_for_each_cic(ioc, func); | ||
2579 | rcu_read_unlock(); | 2667 | rcu_read_unlock(); |
2580 | } | 2668 | } |
2581 | 2669 | ||
@@ -2636,7 +2724,7 @@ static void cfq_free_io_context(struct io_context *ioc) | |||
2636 | * should be ok to iterate over the known list, we will see all cic's | 2724 | * should be ok to iterate over the known list, we will see all cic's |
2637 | * since no new ones are added. | 2725 | * since no new ones are added. |
2638 | */ | 2726 | */ |
2639 | __call_for_each_cic(ioc, cic_free_func); | 2727 | call_for_each_cic(ioc, cic_free_func); |
2640 | } | 2728 | } |
2641 | 2729 | ||
2642 | static void cfq_put_cooperator(struct cfq_queue *cfqq) | 2730 | static void cfq_put_cooperator(struct cfq_queue *cfqq) |
@@ -2685,8 +2773,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
2685 | smp_wmb(); | 2773 | smp_wmb(); |
2686 | cic->key = cfqd_dead_key(cfqd); | 2774 | cic->key = cfqd_dead_key(cfqd); |
2687 | 2775 | ||
2688 | if (ioc->ioc_data == cic) | 2776 | rcu_read_lock(); |
2777 | if (rcu_dereference(ioc->ioc_data) == cic) { | ||
2778 | rcu_read_unlock(); | ||
2779 | spin_lock(&ioc->lock); | ||
2689 | rcu_assign_pointer(ioc->ioc_data, NULL); | 2780 | rcu_assign_pointer(ioc->ioc_data, NULL); |
2781 | spin_unlock(&ioc->lock); | ||
2782 | } else | ||
2783 | rcu_read_unlock(); | ||
2690 | 2784 | ||
2691 | if (cic->cfqq[BLK_RW_ASYNC]) { | 2785 | if (cic->cfqq[BLK_RW_ASYNC]) { |
2692 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); | 2786 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); |
@@ -2835,7 +2929,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2835 | RB_CLEAR_NODE(&cfqq->p_node); | 2929 | RB_CLEAR_NODE(&cfqq->p_node); |
2836 | INIT_LIST_HEAD(&cfqq->fifo); | 2930 | INIT_LIST_HEAD(&cfqq->fifo); |
2837 | 2931 | ||
2838 | atomic_set(&cfqq->ref, 0); | 2932 | cfqq->ref = 0; |
2839 | cfqq->cfqd = cfqd; | 2933 | cfqq->cfqd = cfqd; |
2840 | 2934 | ||
2841 | cfq_mark_cfqq_prio_changed(cfqq); | 2935 | cfq_mark_cfqq_prio_changed(cfqq); |
@@ -2892,7 +2986,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, | |||
2892 | struct cfq_group *cfqg; | 2986 | struct cfq_group *cfqg; |
2893 | 2987 | ||
2894 | retry: | 2988 | retry: |
2895 | cfqg = cfq_get_cfqg(cfqd, 1); | 2989 | cfqg = cfq_get_cfqg(cfqd); |
2896 | cic = cfq_cic_lookup(cfqd, ioc); | 2990 | cic = cfq_cic_lookup(cfqd, ioc); |
2897 | /* cic always exists here */ | 2991 | /* cic always exists here */ |
2898 | cfqq = cic_to_cfqq(cic, is_sync); | 2992 | cfqq = cic_to_cfqq(cic, is_sync); |
@@ -2971,11 +3065,11 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, | |||
2971 | * pin the queue now that it's allocated, scheduler exit will prune it | 3065 | * pin the queue now that it's allocated, scheduler exit will prune it |
2972 | */ | 3066 | */ |
2973 | if (!is_sync && !(*async_cfqq)) { | 3067 | if (!is_sync && !(*async_cfqq)) { |
2974 | atomic_inc(&cfqq->ref); | 3068 | cfqq->ref++; |
2975 | *async_cfqq = cfqq; | 3069 | *async_cfqq = cfqq; |
2976 | } | 3070 | } |
2977 | 3071 | ||
2978 | atomic_inc(&cfqq->ref); | 3072 | cfqq->ref++; |
2979 | return cfqq; | 3073 | return cfqq; |
2980 | } | 3074 | } |
2981 | 3075 | ||
@@ -2993,7 +3087,8 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, | |||
2993 | 3087 | ||
2994 | spin_lock_irqsave(&ioc->lock, flags); | 3088 | spin_lock_irqsave(&ioc->lock, flags); |
2995 | 3089 | ||
2996 | BUG_ON(ioc->ioc_data == cic); | 3090 | BUG_ON(rcu_dereference_check(ioc->ioc_data, |
3091 | lockdep_is_held(&ioc->lock)) == cic); | ||
2997 | 3092 | ||
2998 | radix_tree_delete(&ioc->radix_root, cfqd->cic_index); | 3093 | radix_tree_delete(&ioc->radix_root, cfqd->cic_index); |
2999 | hlist_del_rcu(&cic->cic_list); | 3094 | hlist_del_rcu(&cic->cic_list); |
@@ -3177,7 +3272,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3177 | if (cfqq->queued[0] + cfqq->queued[1] >= 4) | 3272 | if (cfqq->queued[0] + cfqq->queued[1] >= 4) |
3178 | cfq_mark_cfqq_deep(cfqq); | 3273 | cfq_mark_cfqq_deep(cfqq); |
3179 | 3274 | ||
3180 | if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || | 3275 | if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) |
3276 | enable_idle = 0; | ||
3277 | else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || | ||
3181 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) | 3278 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) |
3182 | enable_idle = 0; | 3279 | enable_idle = 0; |
3183 | else if (sample_valid(cic->ttime_samples)) { | 3280 | else if (sample_valid(cic->ttime_samples)) { |
@@ -3255,6 +3352,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3255 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) | 3352 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) |
3256 | return true; | 3353 | return true; |
3257 | 3354 | ||
3355 | /* An idle queue should not be idle now for some reason */ | ||
3356 | if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) | ||
3357 | return true; | ||
3358 | |||
3258 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) | 3359 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) |
3259 | return false; | 3360 | return false; |
3260 | 3361 | ||
@@ -3274,10 +3375,19 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3274 | */ | 3375 | */ |
3275 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 3376 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
3276 | { | 3377 | { |
3378 | struct cfq_queue *old_cfqq = cfqd->active_queue; | ||
3379 | |||
3277 | cfq_log_cfqq(cfqd, cfqq, "preempt"); | 3380 | cfq_log_cfqq(cfqd, cfqq, "preempt"); |
3278 | cfq_slice_expired(cfqd, 1); | 3381 | cfq_slice_expired(cfqd, 1); |
3279 | 3382 | ||
3280 | /* | 3383 | /* |
3384 | * workload type is changed, don't save slice, otherwise preempt | ||
3385 | * doesn't happen | ||
3386 | */ | ||
3387 | if (cfqq_type(old_cfqq) != cfqq_type(cfqq)) | ||
3388 | cfqq->cfqg->saved_workload_slice = 0; | ||
3389 | |||
3390 | /* | ||
3281 | * Put the new queue at the front of the of the current list, | 3391 | * Put the new queue at the front of the of the current list, |
3282 | * so we know that it will be selected next. | 3392 | * so we know that it will be selected next. |
3283 | */ | 3393 | */ |
@@ -3402,6 +3512,10 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
3402 | { | 3512 | { |
3403 | struct cfq_io_context *cic = cfqd->active_cic; | 3513 | struct cfq_io_context *cic = cfqd->active_cic; |
3404 | 3514 | ||
3515 | /* If the queue already has requests, don't wait */ | ||
3516 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) | ||
3517 | return false; | ||
3518 | |||
3405 | /* If there are other queues in the group, don't wait */ | 3519 | /* If there are other queues in the group, don't wait */ |
3406 | if (cfqq->cfqg->nr_cfqq > 1) | 3520 | if (cfqq->cfqg->nr_cfqq > 1) |
3407 | return false; | 3521 | return false; |
@@ -3494,17 +3608,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
3494 | cfq_slice_expired(cfqd, 1); | 3608 | cfq_slice_expired(cfqd, 1); |
3495 | else if (sync && cfqq_empty && | 3609 | else if (sync && cfqq_empty && |
3496 | !cfq_close_cooperator(cfqd, cfqq)) { | 3610 | !cfq_close_cooperator(cfqd, cfqq)) { |
3497 | cfqd->noidle_tree_requires_idle |= | 3611 | cfq_arm_slice_timer(cfqd); |
3498 | !(rq->cmd_flags & REQ_NOIDLE); | ||
3499 | /* | ||
3500 | * Idling is enabled for SYNC_WORKLOAD. | ||
3501 | * SYNC_NOIDLE_WORKLOAD idles at the end of the tree | ||
3502 | * only if we processed at least one !REQ_NOIDLE request | ||
3503 | */ | ||
3504 | if (cfqd->serving_type == SYNC_WORKLOAD | ||
3505 | || cfqd->noidle_tree_requires_idle | ||
3506 | || cfqq->cfqg->nr_cfqq == 1) | ||
3507 | cfq_arm_slice_timer(cfqd); | ||
3508 | } | 3612 | } |
3509 | } | 3613 | } |
3510 | 3614 | ||
@@ -3589,12 +3693,12 @@ static void cfq_put_request(struct request *rq) | |||
3589 | 3693 | ||
3590 | put_io_context(RQ_CIC(rq)->ioc); | 3694 | put_io_context(RQ_CIC(rq)->ioc); |
3591 | 3695 | ||
3592 | rq->elevator_private = NULL; | 3696 | rq->elevator_private[0] = NULL; |
3593 | rq->elevator_private2 = NULL; | 3697 | rq->elevator_private[1] = NULL; |
3594 | 3698 | ||
3595 | /* Put down rq reference on cfqg */ | 3699 | /* Put down rq reference on cfqg */ |
3596 | cfq_put_cfqg(RQ_CFQG(rq)); | 3700 | cfq_put_cfqg(RQ_CFQG(rq)); |
3597 | rq->elevator_private3 = NULL; | 3701 | rq->elevator_private[2] = NULL; |
3598 | 3702 | ||
3599 | cfq_put_queue(cfqq); | 3703 | cfq_put_queue(cfqq); |
3600 | } | 3704 | } |
@@ -3681,19 +3785,15 @@ new_queue: | |||
3681 | } | 3785 | } |
3682 | 3786 | ||
3683 | cfqq->allocated[rw]++; | 3787 | cfqq->allocated[rw]++; |
3684 | atomic_inc(&cfqq->ref); | ||
3685 | 3788 | ||
3789 | cfqq->ref++; | ||
3790 | rq->elevator_private[0] = cic; | ||
3791 | rq->elevator_private[1] = cfqq; | ||
3792 | rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3686 | spin_unlock_irqrestore(q->queue_lock, flags); | 3793 | spin_unlock_irqrestore(q->queue_lock, flags); |
3687 | |||
3688 | rq->elevator_private = cic; | ||
3689 | rq->elevator_private2 = cfqq; | ||
3690 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3691 | return 0; | 3794 | return 0; |
3692 | 3795 | ||
3693 | queue_fail: | 3796 | queue_fail: |
3694 | if (cic) | ||
3695 | put_io_context(cic->ioc); | ||
3696 | |||
3697 | cfq_schedule_dispatch(cfqd); | 3797 | cfq_schedule_dispatch(cfqd); |
3698 | spin_unlock_irqrestore(q->queue_lock, flags); | 3798 | spin_unlock_irqrestore(q->queue_lock, flags); |
3699 | cfq_log(cfqd, "set_request fail"); | 3799 | cfq_log(cfqd, "set_request fail"); |
@@ -3788,15 +3888,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd) | |||
3788 | cfq_put_queue(cfqd->async_idle_cfqq); | 3888 | cfq_put_queue(cfqd->async_idle_cfqq); |
3789 | } | 3889 | } |
3790 | 3890 | ||
3791 | static void cfq_cfqd_free(struct rcu_head *head) | ||
3792 | { | ||
3793 | kfree(container_of(head, struct cfq_data, rcu)); | ||
3794 | } | ||
3795 | |||
3796 | static void cfq_exit_queue(struct elevator_queue *e) | 3891 | static void cfq_exit_queue(struct elevator_queue *e) |
3797 | { | 3892 | { |
3798 | struct cfq_data *cfqd = e->elevator_data; | 3893 | struct cfq_data *cfqd = e->elevator_data; |
3799 | struct request_queue *q = cfqd->queue; | 3894 | struct request_queue *q = cfqd->queue; |
3895 | bool wait = false; | ||
3800 | 3896 | ||
3801 | cfq_shutdown_timer_wq(cfqd); | 3897 | cfq_shutdown_timer_wq(cfqd); |
3802 | 3898 | ||
@@ -3815,7 +3911,13 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3815 | 3911 | ||
3816 | cfq_put_async_queues(cfqd); | 3912 | cfq_put_async_queues(cfqd); |
3817 | cfq_release_cfq_groups(cfqd); | 3913 | cfq_release_cfq_groups(cfqd); |
3818 | cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); | 3914 | |
3915 | /* | ||
3916 | * If there are groups which we could not unlink from blkcg list, | ||
3917 | * wait for a rcu period for them to be freed. | ||
3918 | */ | ||
3919 | if (cfqd->nr_blkcg_linked_grps) | ||
3920 | wait = true; | ||
3819 | 3921 | ||
3820 | spin_unlock_irq(q->queue_lock); | 3922 | spin_unlock_irq(q->queue_lock); |
3821 | 3923 | ||
@@ -3825,8 +3927,25 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3825 | ida_remove(&cic_index_ida, cfqd->cic_index); | 3927 | ida_remove(&cic_index_ida, cfqd->cic_index); |
3826 | spin_unlock(&cic_index_lock); | 3928 | spin_unlock(&cic_index_lock); |
3827 | 3929 | ||
3828 | /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ | 3930 | /* |
3829 | call_rcu(&cfqd->rcu, cfq_cfqd_free); | 3931 | * Wait for cfqg->blkg->key accessors to exit their grace periods. |
3932 | * Do this wait only if there are other unlinked groups out | ||
3933 | * there. This can happen if cgroup deletion path claimed the | ||
3934 | * responsibility of cleaning up a group before queue cleanup code | ||
3935 | * get to the group. | ||
3936 | * | ||
3937 | * Do not call synchronize_rcu() unconditionally as there are drivers | ||
3938 | * which create/delete request queue hundreds of times during scan/boot | ||
3939 | * and synchronize_rcu() can take significant time and slow down boot. | ||
3940 | */ | ||
3941 | if (wait) | ||
3942 | synchronize_rcu(); | ||
3943 | |||
3944 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
3945 | /* Free up per cpu stats for root group */ | ||
3946 | free_percpu(cfqd->root_group.blkg.stats_cpu); | ||
3947 | #endif | ||
3948 | kfree(cfqd); | ||
3830 | } | 3949 | } |
3831 | 3950 | ||
3832 | static int cfq_alloc_cic_index(void) | 3951 | static int cfq_alloc_cic_index(void) |
@@ -3859,9 +3978,17 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3859 | return NULL; | 3978 | return NULL; |
3860 | 3979 | ||
3861 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 3980 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
3862 | if (!cfqd) | 3981 | if (!cfqd) { |
3982 | spin_lock(&cic_index_lock); | ||
3983 | ida_remove(&cic_index_ida, i); | ||
3984 | spin_unlock(&cic_index_lock); | ||
3863 | return NULL; | 3985 | return NULL; |
3986 | } | ||
3864 | 3987 | ||
3988 | /* | ||
3989 | * Don't need take queue_lock in the routine, since we are | ||
3990 | * initializing the ioscheduler, and nobody is using cfqd | ||
3991 | */ | ||
3865 | cfqd->cic_index = i; | 3992 | cfqd->cic_index = i; |
3866 | 3993 | ||
3867 | /* Init root service tree */ | 3994 | /* Init root service tree */ |
@@ -3878,14 +4005,29 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3878 | 4005 | ||
3879 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 4006 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3880 | /* | 4007 | /* |
3881 | * Take a reference to root group which we never drop. This is just | 4008 | * Set root group reference to 2. One reference will be dropped when |
3882 | * to make sure that cfq_put_cfqg() does not try to kfree root group | 4009 | * all groups on cfqd->cfqg_list are being deleted during queue exit. |
4010 | * Other reference will remain there as we don't want to delete this | ||
4011 | * group as it is statically allocated and gets destroyed when | ||
4012 | * throtl_data goes away. | ||
3883 | */ | 4013 | */ |
3884 | atomic_set(&cfqg->ref, 1); | 4014 | cfqg->ref = 2; |
4015 | |||
4016 | if (blkio_alloc_blkg_stats(&cfqg->blkg)) { | ||
4017 | kfree(cfqg); | ||
4018 | kfree(cfqd); | ||
4019 | return NULL; | ||
4020 | } | ||
4021 | |||
3885 | rcu_read_lock(); | 4022 | rcu_read_lock(); |
4023 | |||
3886 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, | 4024 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, |
3887 | (void *)cfqd, 0); | 4025 | (void *)cfqd, 0); |
3888 | rcu_read_unlock(); | 4026 | rcu_read_unlock(); |
4027 | cfqd->nr_blkcg_linked_grps++; | ||
4028 | |||
4029 | /* Add group on cfqd->cfqg_list */ | ||
4030 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | ||
3889 | #endif | 4031 | #endif |
3890 | /* | 4032 | /* |
3891 | * Not strictly needed (since RB_ROOT just clears the node and we | 4033 | * Not strictly needed (since RB_ROOT just clears the node and we |
@@ -3901,7 +4043,7 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3901 | * will not attempt to free it. | 4043 | * will not attempt to free it. |
3902 | */ | 4044 | */ |
3903 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | 4045 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); |
3904 | atomic_inc(&cfqd->oom_cfqq.ref); | 4046 | cfqd->oom_cfqq.ref++; |
3905 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); | 4047 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); |
3906 | 4048 | ||
3907 | INIT_LIST_HEAD(&cfqd->cic_list); | 4049 | INIT_LIST_HEAD(&cfqd->cic_list); |
@@ -3925,7 +4067,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3925 | cfqd->cfq_slice_idle = cfq_slice_idle; | 4067 | cfqd->cfq_slice_idle = cfq_slice_idle; |
3926 | cfqd->cfq_group_idle = cfq_group_idle; | 4068 | cfqd->cfq_group_idle = cfq_group_idle; |
3927 | cfqd->cfq_latency = 1; | 4069 | cfqd->cfq_latency = 1; |
3928 | cfqd->cfq_group_isolation = 0; | ||
3929 | cfqd->hw_tag = -1; | 4070 | cfqd->hw_tag = -1; |
3930 | /* | 4071 | /* |
3931 | * we optimistically start assuming sync ops weren't delayed in last | 4072 | * we optimistically start assuming sync ops weren't delayed in last |
@@ -4001,7 +4142,6 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); | |||
4001 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); | 4142 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); |
4002 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); | 4143 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); |
4003 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); | 4144 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); |
4004 | SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0); | ||
4005 | #undef SHOW_FUNCTION | 4145 | #undef SHOW_FUNCTION |
4006 | 4146 | ||
4007 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | 4147 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
@@ -4035,7 +4175,6 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); | |||
4035 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, | 4175 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, |
4036 | UINT_MAX, 0); | 4176 | UINT_MAX, 0); |
4037 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); | 4177 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); |
4038 | STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0); | ||
4039 | #undef STORE_FUNCTION | 4178 | #undef STORE_FUNCTION |
4040 | 4179 | ||
4041 | #define CFQ_ATTR(name) \ | 4180 | #define CFQ_ATTR(name) \ |
@@ -4053,7 +4192,6 @@ static struct elv_fs_entry cfq_attrs[] = { | |||
4053 | CFQ_ATTR(slice_idle), | 4192 | CFQ_ATTR(slice_idle), |
4054 | CFQ_ATTR(group_idle), | 4193 | CFQ_ATTR(group_idle), |
4055 | CFQ_ATTR(low_latency), | 4194 | CFQ_ATTR(low_latency), |
4056 | CFQ_ATTR(group_isolation), | ||
4057 | __ATTR_NULL | 4195 | __ATTR_NULL |
4058 | }; | 4196 | }; |
4059 | 4197 | ||
@@ -4068,7 +4206,6 @@ static struct elevator_type iosched_cfq = { | |||
4068 | .elevator_add_req_fn = cfq_insert_request, | 4206 | .elevator_add_req_fn = cfq_insert_request, |
4069 | .elevator_activate_req_fn = cfq_activate_request, | 4207 | .elevator_activate_req_fn = cfq_activate_request, |
4070 | .elevator_deactivate_req_fn = cfq_deactivate_request, | 4208 | .elevator_deactivate_req_fn = cfq_deactivate_request, |
4071 | .elevator_queue_empty_fn = cfq_queue_empty, | ||
4072 | .elevator_completed_req_fn = cfq_completed_request, | 4209 | .elevator_completed_req_fn = cfq_completed_request, |
4073 | .elevator_former_req_fn = elv_rb_former_request, | 4210 | .elevator_former_req_fn = elv_rb_former_request, |
4074 | .elevator_latter_req_fn = elv_rb_latter_request, | 4211 | .elevator_latter_req_fn = elv_rb_latter_request, |
@@ -4090,6 +4227,7 @@ static struct blkio_policy_type blkio_policy_cfq = { | |||
4090 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, | 4227 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, |
4091 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, | 4228 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, |
4092 | }, | 4229 | }, |
4230 | .plid = BLKIO_POLICY_PROP, | ||
4093 | }; | 4231 | }; |
4094 | #else | 4232 | #else |
4095 | static struct blkio_policy_type blkio_policy_cfq; | 4233 | static struct blkio_policy_type blkio_policy_cfq; |