diff options
-rw-r--r-- | block/bfq-iosched.c | 881 |
1 files changed, 840 insertions, 41 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index deb1f21c535f..6e7388a1d220 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -281,11 +281,12 @@ struct bfq_ttime { | |||
281 | * struct bfq_queue - leaf schedulable entity. | 281 | * struct bfq_queue - leaf schedulable entity. |
282 | * | 282 | * |
283 | * A bfq_queue is a leaf request queue; it can be associated with an | 283 | * A bfq_queue is a leaf request queue; it can be associated with an |
284 | * io_context or more, if it is async. @cgroup holds a reference to | 284 | * io_context or more, if it is async or shared between cooperating |
285 | * the cgroup, to be sure that it does not disappear while a bfqq | 285 | * processes. @cgroup holds a reference to the cgroup, to be sure that it |
286 | * still references it (mostly to avoid races between request issuing | 286 | * does not disappear while a bfqq still references it (mostly to avoid |
287 | * and task migration followed by cgroup destruction). All the fields | 287 | * races between request issuing and task migration followed by cgroup |
288 | * are protected by the queue lock of the containing bfqd. | 288 | * destruction). |
289 | * All the fields are protected by the queue lock of the containing bfqd. | ||
289 | */ | 290 | */ |
290 | struct bfq_queue { | 291 | struct bfq_queue { |
291 | /* reference counter */ | 292 | /* reference counter */ |
@@ -298,6 +299,16 @@ struct bfq_queue { | |||
298 | /* next ioprio and ioprio class if a change is in progress */ | 299 | /* next ioprio and ioprio class if a change is in progress */ |
299 | unsigned short new_ioprio, new_ioprio_class; | 300 | unsigned short new_ioprio, new_ioprio_class; |
300 | 301 | ||
302 | /* | ||
303 | * Shared bfq_queue if queue is cooperating with one or more | ||
304 | * other queues. | ||
305 | */ | ||
306 | struct bfq_queue *new_bfqq; | ||
307 | /* request-position tree member (see bfq_group's @rq_pos_tree) */ | ||
308 | struct rb_node pos_node; | ||
309 | /* request-position tree root (see bfq_group's @rq_pos_tree) */ | ||
310 | struct rb_root *pos_root; | ||
311 | |||
301 | /* sorted list of pending requests */ | 312 | /* sorted list of pending requests */ |
302 | struct rb_root sort_list; | 313 | struct rb_root sort_list; |
303 | /* if fifo isn't expired, next request to serve */ | 314 | /* if fifo isn't expired, next request to serve */ |
@@ -347,6 +358,12 @@ struct bfq_queue { | |||
347 | /* pid of the process owning the queue, used for logging purposes */ | 358 | /* pid of the process owning the queue, used for logging purposes */ |
348 | pid_t pid; | 359 | pid_t pid; |
349 | 360 | ||
361 | /* | ||
362 | * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL | ||
363 | * if the queue is shared. | ||
364 | */ | ||
365 | struct bfq_io_cq *bic; | ||
366 | |||
350 | /* current maximum weight-raising time for this queue */ | 367 | /* current maximum weight-raising time for this queue */ |
351 | unsigned long wr_cur_max_time; | 368 | unsigned long wr_cur_max_time; |
352 | /* | 369 | /* |
@@ -375,10 +392,13 @@ struct bfq_queue { | |||
375 | * last transition from idle to backlogged. | 392 | * last transition from idle to backlogged. |
376 | */ | 393 | */ |
377 | unsigned long service_from_backlogged; | 394 | unsigned long service_from_backlogged; |
395 | |||
378 | /* | 396 | /* |
379 | * Value of wr start time when switching to soft rt | 397 | * Value of wr start time when switching to soft rt |
380 | */ | 398 | */ |
381 | unsigned long wr_start_at_switch_to_srt; | 399 | unsigned long wr_start_at_switch_to_srt; |
400 | |||
401 | unsigned long split_time; /* time of last split */ | ||
382 | }; | 402 | }; |
383 | 403 | ||
384 | /** | 404 | /** |
@@ -394,6 +414,26 @@ struct bfq_io_cq { | |||
394 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 414 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
395 | uint64_t blkcg_serial_nr; /* the current blkcg serial */ | 415 | uint64_t blkcg_serial_nr; /* the current blkcg serial */ |
396 | #endif | 416 | #endif |
417 | /* | ||
418 | * Snapshot of the idle window before merging; taken to | ||
419 | * remember this value while the queue is merged, so as to be | ||
420 | * able to restore it in case of split. | ||
421 | */ | ||
422 | bool saved_idle_window; | ||
423 | /* | ||
424 | * Same purpose as the previous two fields for the I/O bound | ||
425 | * classification of a queue. | ||
426 | */ | ||
427 | bool saved_IO_bound; | ||
428 | |||
429 | /* | ||
430 | * Similar to previous fields: save wr information. | ||
431 | */ | ||
432 | unsigned long saved_wr_coeff; | ||
433 | unsigned long saved_last_wr_start_finish; | ||
434 | unsigned long saved_wr_start_at_switch_to_srt; | ||
435 | unsigned int saved_wr_cur_max_time; | ||
436 | struct bfq_ttime saved_ttime; | ||
397 | }; | 437 | }; |
398 | 438 | ||
399 | enum bfq_device_speed { | 439 | enum bfq_device_speed { |
@@ -584,6 +624,15 @@ struct bfq_data { | |||
584 | struct bfq_io_cq *bio_bic; | 624 | struct bfq_io_cq *bio_bic; |
585 | /* bfqq associated with the task issuing current bio for merging */ | 625 | /* bfqq associated with the task issuing current bio for merging */ |
586 | struct bfq_queue *bio_bfqq; | 626 | struct bfq_queue *bio_bfqq; |
627 | |||
628 | /* | ||
629 | * io context to put right after bfqd->lock is released. This | ||
630 | * filed is used to perform put_io_context, when needed, to | ||
631 | * after the scheduler lock has been released, and thus | ||
632 | * prevent an ioc->lock from being possibly taken while the | ||
633 | * scheduler lock is being held. | ||
634 | */ | ||
635 | struct io_context *ioc_to_put; | ||
587 | }; | 636 | }; |
588 | 637 | ||
589 | enum bfqq_state_flags { | 638 | enum bfqq_state_flags { |
@@ -605,6 +654,8 @@ enum bfqq_state_flags { | |||
605 | * may need softrt-next-start | 654 | * may need softrt-next-start |
606 | * update | 655 | * update |
607 | */ | 656 | */ |
657 | BFQQF_coop, /* bfqq is shared */ | ||
658 | BFQQF_split_coop /* shared bfqq will be split */ | ||
608 | }; | 659 | }; |
609 | 660 | ||
610 | #define BFQ_BFQQ_FNS(name) \ | 661 | #define BFQ_BFQQ_FNS(name) \ |
@@ -628,6 +679,8 @@ BFQ_BFQQ_FNS(fifo_expire); | |||
628 | BFQ_BFQQ_FNS(idle_window); | 679 | BFQ_BFQQ_FNS(idle_window); |
629 | BFQ_BFQQ_FNS(sync); | 680 | BFQ_BFQQ_FNS(sync); |
630 | BFQ_BFQQ_FNS(IO_bound); | 681 | BFQ_BFQQ_FNS(IO_bound); |
682 | BFQ_BFQQ_FNS(coop); | ||
683 | BFQ_BFQQ_FNS(split_coop); | ||
631 | BFQ_BFQQ_FNS(softrt_update); | 684 | BFQ_BFQQ_FNS(softrt_update); |
632 | #undef BFQ_BFQQ_FNS | 685 | #undef BFQ_BFQQ_FNS |
633 | 686 | ||
@@ -738,6 +791,9 @@ struct bfq_group_data { | |||
738 | * to avoid too many special cases during group creation/ | 791 | * to avoid too many special cases during group creation/ |
739 | * migration. | 792 | * migration. |
740 | * @stats: stats for this bfqg. | 793 | * @stats: stats for this bfqg. |
794 | * @rq_pos_tree: rbtree sorted by next_request position, used when | ||
795 | * determining if two or more queues have interleaving | ||
796 | * requests (see bfq_find_close_cooperator()). | ||
741 | * | 797 | * |
742 | * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup | 798 | * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup |
743 | * there is a set of bfq_groups, each one collecting the lower-level | 799 | * there is a set of bfq_groups, each one collecting the lower-level |
@@ -762,6 +818,8 @@ struct bfq_group { | |||
762 | 818 | ||
763 | struct bfq_entity *my_entity; | 819 | struct bfq_entity *my_entity; |
764 | 820 | ||
821 | struct rb_root rq_pos_tree; | ||
822 | |||
765 | struct bfqg_stats stats; | 823 | struct bfqg_stats stats; |
766 | }; | 824 | }; |
767 | 825 | ||
@@ -811,6 +869,27 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) | |||
811 | return bic->icq.q->elevator->elevator_data; | 869 | return bic->icq.q->elevator->elevator_data; |
812 | } | 870 | } |
813 | 871 | ||
872 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | ||
873 | |||
874 | static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) | ||
875 | { | ||
876 | struct bfq_entity *group_entity = bfqq->entity.parent; | ||
877 | |||
878 | if (!group_entity) | ||
879 | group_entity = &bfqq->bfqd->root_group->entity; | ||
880 | |||
881 | return container_of(group_entity, struct bfq_group, entity); | ||
882 | } | ||
883 | |||
884 | #else | ||
885 | |||
886 | static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) | ||
887 | { | ||
888 | return bfqq->bfqd->root_group; | ||
889 | } | ||
890 | |||
891 | #endif | ||
892 | |||
814 | static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); | 893 | static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); |
815 | static void bfq_put_queue(struct bfq_queue *bfqq); | 894 | static void bfq_put_queue(struct bfq_queue *bfqq); |
816 | static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, | 895 | static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, |
@@ -975,6 +1054,34 @@ static void bfq_schedule_dispatch(struct bfq_data *bfqd) | |||
975 | } | 1054 | } |
976 | } | 1055 | } |
977 | 1056 | ||
1057 | /* | ||
1058 | * Next two functions release bfqd->lock and put the io context | ||
1059 | * pointed by bfqd->ioc_to_put. This delayed put is used to not risk | ||
1060 | * to take an ioc->lock while the scheduler lock is being held. | ||
1061 | */ | ||
1062 | static void bfq_unlock_put_ioc(struct bfq_data *bfqd) | ||
1063 | { | ||
1064 | struct io_context *ioc_to_put = bfqd->ioc_to_put; | ||
1065 | |||
1066 | bfqd->ioc_to_put = NULL; | ||
1067 | spin_unlock_irq(&bfqd->lock); | ||
1068 | |||
1069 | if (ioc_to_put) | ||
1070 | put_io_context(ioc_to_put); | ||
1071 | } | ||
1072 | |||
1073 | static void bfq_unlock_put_ioc_restore(struct bfq_data *bfqd, | ||
1074 | unsigned long flags) | ||
1075 | { | ||
1076 | struct io_context *ioc_to_put = bfqd->ioc_to_put; | ||
1077 | |||
1078 | bfqd->ioc_to_put = NULL; | ||
1079 | spin_unlock_irqrestore(&bfqd->lock, flags); | ||
1080 | |||
1081 | if (ioc_to_put) | ||
1082 | put_io_context(ioc_to_put); | ||
1083 | } | ||
1084 | |||
978 | /** | 1085 | /** |
979 | * bfq_gt - compare two timestamps. | 1086 | * bfq_gt - compare two timestamps. |
980 | * @a: first ts. | 1087 | * @a: first ts. |
@@ -2425,7 +2532,14 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) | |||
2425 | struct bfq_entity *entity = in_serv_entity; | 2532 | struct bfq_entity *entity = in_serv_entity; |
2426 | 2533 | ||
2427 | if (bfqd->in_service_bic) { | 2534 | if (bfqd->in_service_bic) { |
2428 | put_io_context(bfqd->in_service_bic->icq.ioc); | 2535 | /* |
2536 | * Schedule the release of a reference to | ||
2537 | * bfqd->in_service_bic->icq.ioc to right after the | ||
2538 | * scheduler lock is released. This ioc is not | ||
2539 | * released immediately, to not risk to possibly take | ||
2540 | * an ioc->lock while holding the scheduler lock. | ||
2541 | */ | ||
2542 | bfqd->ioc_to_put = bfqd->in_service_bic->icq.ioc; | ||
2429 | bfqd->in_service_bic = NULL; | 2543 | bfqd->in_service_bic = NULL; |
2430 | } | 2544 | } |
2431 | 2545 | ||
@@ -2914,6 +3028,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) | |||
2914 | * in bfq_init_queue() | 3028 | * in bfq_init_queue() |
2915 | */ | 3029 | */ |
2916 | bfqg->bfqd = bfqd; | 3030 | bfqg->bfqd = bfqd; |
3031 | bfqg->rq_pos_tree = RB_ROOT; | ||
2917 | } | 3032 | } |
2918 | 3033 | ||
2919 | static void bfq_pd_free(struct blkg_policy_data *pd) | 3034 | static void bfq_pd_free(struct blkg_policy_data *pd) |
@@ -2982,6 +3097,8 @@ static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, | |||
2982 | return bfqg; | 3097 | return bfqg; |
2983 | } | 3098 | } |
2984 | 3099 | ||
3100 | static void bfq_pos_tree_add_move(struct bfq_data *bfqd, | ||
3101 | struct bfq_queue *bfqq); | ||
2985 | static void bfq_bfqq_expire(struct bfq_data *bfqd, | 3102 | static void bfq_bfqq_expire(struct bfq_data *bfqd, |
2986 | struct bfq_queue *bfqq, | 3103 | struct bfq_queue *bfqq, |
2987 | bool compensate, | 3104 | bool compensate, |
@@ -3030,8 +3147,10 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
3030 | entity->sched_data = &bfqg->sched_data; | 3147 | entity->sched_data = &bfqg->sched_data; |
3031 | bfqg_get(bfqg); | 3148 | bfqg_get(bfqg); |
3032 | 3149 | ||
3033 | if (bfq_bfqq_busy(bfqq)) | 3150 | if (bfq_bfqq_busy(bfqq)) { |
3151 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
3034 | bfq_activate_bfqq(bfqd, bfqq); | 3152 | bfq_activate_bfqq(bfqd, bfqq); |
3153 | } | ||
3035 | 3154 | ||
3036 | if (!bfqd->in_service_queue && !bfqd->rq_in_driver) | 3155 | if (!bfqd->in_service_queue && !bfqd->rq_in_driver) |
3037 | bfq_schedule_dispatch(bfqd); | 3156 | bfq_schedule_dispatch(bfqd); |
@@ -3071,8 +3190,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, | |||
3071 | bic_set_bfqq(bic, NULL, 0); | 3190 | bic_set_bfqq(bic, NULL, 0); |
3072 | bfq_log_bfqq(bfqd, async_bfqq, | 3191 | bfq_log_bfqq(bfqd, async_bfqq, |
3073 | "bic_change_group: %p %d", | 3192 | "bic_change_group: %p %d", |
3074 | async_bfqq, | 3193 | async_bfqq, async_bfqq->ref); |
3075 | async_bfqq->ref); | ||
3076 | bfq_put_queue(async_bfqq); | 3194 | bfq_put_queue(async_bfqq); |
3077 | } | 3195 | } |
3078 | } | 3196 | } |
@@ -3214,7 +3332,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) | |||
3214 | __bfq_deactivate_entity(entity, false); | 3332 | __bfq_deactivate_entity(entity, false); |
3215 | bfq_put_async_queues(bfqd, bfqg); | 3333 | bfq_put_async_queues(bfqd, bfqg); |
3216 | 3334 | ||
3217 | spin_unlock_irqrestore(&bfqd->lock, flags); | 3335 | bfq_unlock_put_ioc_restore(bfqd, flags); |
3218 | /* | 3336 | /* |
3219 | * @blkg is going offline and will be ignored by | 3337 | * @blkg is going offline and will be ignored by |
3220 | * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so | 3338 | * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so |
@@ -3731,6 +3849,72 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, | |||
3731 | } | 3849 | } |
3732 | } | 3850 | } |
3733 | 3851 | ||
3852 | static struct bfq_queue * | ||
3853 | bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, | ||
3854 | sector_t sector, struct rb_node **ret_parent, | ||
3855 | struct rb_node ***rb_link) | ||
3856 | { | ||
3857 | struct rb_node **p, *parent; | ||
3858 | struct bfq_queue *bfqq = NULL; | ||
3859 | |||
3860 | parent = NULL; | ||
3861 | p = &root->rb_node; | ||
3862 | while (*p) { | ||
3863 | struct rb_node **n; | ||
3864 | |||
3865 | parent = *p; | ||
3866 | bfqq = rb_entry(parent, struct bfq_queue, pos_node); | ||
3867 | |||
3868 | /* | ||
3869 | * Sort strictly based on sector. Smallest to the left, | ||
3870 | * largest to the right. | ||
3871 | */ | ||
3872 | if (sector > blk_rq_pos(bfqq->next_rq)) | ||
3873 | n = &(*p)->rb_right; | ||
3874 | else if (sector < blk_rq_pos(bfqq->next_rq)) | ||
3875 | n = &(*p)->rb_left; | ||
3876 | else | ||
3877 | break; | ||
3878 | p = n; | ||
3879 | bfqq = NULL; | ||
3880 | } | ||
3881 | |||
3882 | *ret_parent = parent; | ||
3883 | if (rb_link) | ||
3884 | *rb_link = p; | ||
3885 | |||
3886 | bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", | ||
3887 | (unsigned long long)sector, | ||
3888 | bfqq ? bfqq->pid : 0); | ||
3889 | |||
3890 | return bfqq; | ||
3891 | } | ||
3892 | |||
3893 | static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) | ||
3894 | { | ||
3895 | struct rb_node **p, *parent; | ||
3896 | struct bfq_queue *__bfqq; | ||
3897 | |||
3898 | if (bfqq->pos_root) { | ||
3899 | rb_erase(&bfqq->pos_node, bfqq->pos_root); | ||
3900 | bfqq->pos_root = NULL; | ||
3901 | } | ||
3902 | |||
3903 | if (bfq_class_idle(bfqq)) | ||
3904 | return; | ||
3905 | if (!bfqq->next_rq) | ||
3906 | return; | ||
3907 | |||
3908 | bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree; | ||
3909 | __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, | ||
3910 | blk_rq_pos(bfqq->next_rq), &parent, &p); | ||
3911 | if (!__bfqq) { | ||
3912 | rb_link_node(&bfqq->pos_node, parent, p); | ||
3913 | rb_insert_color(&bfqq->pos_node, bfqq->pos_root); | ||
3914 | } else | ||
3915 | bfqq->pos_root = NULL; | ||
3916 | } | ||
3917 | |||
3734 | /* | 3918 | /* |
3735 | * Return expired entry, or NULL to just start from scratch in rbtree. | 3919 | * Return expired entry, or NULL to just start from scratch in rbtree. |
3736 | */ | 3920 | */ |
@@ -3837,6 +4021,43 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, | |||
3837 | } | 4021 | } |
3838 | } | 4022 | } |
3839 | 4023 | ||
4024 | static void | ||
4025 | bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) | ||
4026 | { | ||
4027 | if (bic->saved_idle_window) | ||
4028 | bfq_mark_bfqq_idle_window(bfqq); | ||
4029 | else | ||
4030 | bfq_clear_bfqq_idle_window(bfqq); | ||
4031 | |||
4032 | if (bic->saved_IO_bound) | ||
4033 | bfq_mark_bfqq_IO_bound(bfqq); | ||
4034 | else | ||
4035 | bfq_clear_bfqq_IO_bound(bfqq); | ||
4036 | |||
4037 | bfqq->ttime = bic->saved_ttime; | ||
4038 | bfqq->wr_coeff = bic->saved_wr_coeff; | ||
4039 | bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; | ||
4040 | bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; | ||
4041 | bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; | ||
4042 | |||
4043 | if (bfqq->wr_coeff > 1 && | ||
4044 | time_is_before_jiffies(bfqq->last_wr_start_finish + | ||
4045 | bfqq->wr_cur_max_time)) { | ||
4046 | bfq_log_bfqq(bfqq->bfqd, bfqq, | ||
4047 | "resume state: switching off wr"); | ||
4048 | |||
4049 | bfqq->wr_coeff = 1; | ||
4050 | } | ||
4051 | |||
4052 | /* make sure weight will be updated, however we got here */ | ||
4053 | bfqq->entity.prio_changed = 1; | ||
4054 | } | ||
4055 | |||
4056 | static int bfqq_process_refs(struct bfq_queue *bfqq) | ||
4057 | { | ||
4058 | return bfqq->ref - bfqq->allocated - bfqq->entity.on_st; | ||
4059 | } | ||
4060 | |||
3840 | static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) | 4061 | static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) |
3841 | { | 4062 | { |
3842 | struct bfq_entity *entity = &bfqq->entity; | 4063 | struct bfq_entity *entity = &bfqq->entity; |
@@ -4157,14 +4378,16 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
4157 | /* | 4378 | /* |
4158 | * bfqq deserves to be weight-raised if: | 4379 | * bfqq deserves to be weight-raised if: |
4159 | * - it is sync, | 4380 | * - it is sync, |
4160 | * - it has been idle for enough time or is soft real-time. | 4381 | * - it has been idle for enough time or is soft real-time, |
4382 | * - is linked to a bfq_io_cq (it is not shared in any sense). | ||
4161 | */ | 4383 | */ |
4162 | soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && | 4384 | soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && |
4163 | time_is_before_jiffies(bfqq->soft_rt_next_start); | 4385 | time_is_before_jiffies(bfqq->soft_rt_next_start); |
4164 | *interactive = idle_for_long_time; | 4386 | *interactive = idle_for_long_time; |
4165 | wr_or_deserves_wr = bfqd->low_latency && | 4387 | wr_or_deserves_wr = bfqd->low_latency && |
4166 | (bfqq->wr_coeff > 1 || | 4388 | (bfqq->wr_coeff > 1 || |
4167 | (bfq_bfqq_sync(bfqq) && (*interactive || soft_rt))); | 4389 | (bfq_bfqq_sync(bfqq) && |
4390 | bfqq->bic && (*interactive || soft_rt))); | ||
4168 | 4391 | ||
4169 | /* | 4392 | /* |
4170 | * Using the last flag, update budget and check whether bfqq | 4393 | * Using the last flag, update budget and check whether bfqq |
@@ -4186,14 +4409,22 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
4186 | } | 4409 | } |
4187 | 4410 | ||
4188 | if (bfqd->low_latency) { | 4411 | if (bfqd->low_latency) { |
4189 | bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq, | 4412 | if (unlikely(time_is_after_jiffies(bfqq->split_time))) |
4190 | old_wr_coeff, | 4413 | /* wraparound */ |
4191 | wr_or_deserves_wr, | 4414 | bfqq->split_time = |
4192 | *interactive, | 4415 | jiffies - bfqd->bfq_wr_min_idle_time - 1; |
4193 | soft_rt); | 4416 | |
4194 | 4417 | if (time_is_before_jiffies(bfqq->split_time + | |
4195 | if (old_wr_coeff != bfqq->wr_coeff) | 4418 | bfqd->bfq_wr_min_idle_time)) { |
4196 | bfqq->entity.prio_changed = 1; | 4419 | bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq, |
4420 | old_wr_coeff, | ||
4421 | wr_or_deserves_wr, | ||
4422 | *interactive, | ||
4423 | soft_rt); | ||
4424 | |||
4425 | if (old_wr_coeff != bfqq->wr_coeff) | ||
4426 | bfqq->entity.prio_changed = 1; | ||
4427 | } | ||
4197 | } | 4428 | } |
4198 | 4429 | ||
4199 | bfqq->last_idle_bklogged = jiffies; | 4430 | bfqq->last_idle_bklogged = jiffies; |
@@ -4240,6 +4471,12 @@ static void bfq_add_request(struct request *rq) | |||
4240 | next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); | 4471 | next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); |
4241 | bfqq->next_rq = next_rq; | 4472 | bfqq->next_rq = next_rq; |
4242 | 4473 | ||
4474 | /* | ||
4475 | * Adjust priority tree position, if next_rq changes. | ||
4476 | */ | ||
4477 | if (prev != bfqq->next_rq) | ||
4478 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
4479 | |||
4243 | if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ | 4480 | if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ |
4244 | bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff, | 4481 | bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff, |
4245 | rq, &interactive); | 4482 | rq, &interactive); |
@@ -4368,6 +4605,14 @@ static void bfq_remove_request(struct request_queue *q, | |||
4368 | */ | 4605 | */ |
4369 | bfqq->entity.budget = bfqq->entity.service = 0; | 4606 | bfqq->entity.budget = bfqq->entity.service = 0; |
4370 | } | 4607 | } |
4608 | |||
4609 | /* | ||
4610 | * Remove queue from request-position tree as it is empty. | ||
4611 | */ | ||
4612 | if (bfqq->pos_root) { | ||
4613 | rb_erase(&bfqq->pos_node, bfqq->pos_root); | ||
4614 | bfqq->pos_root = NULL; | ||
4615 | } | ||
4371 | } | 4616 | } |
4372 | 4617 | ||
4373 | if (rq->cmd_flags & REQ_META) | 4618 | if (rq->cmd_flags & REQ_META) |
@@ -4445,11 +4690,14 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, | |||
4445 | bfqd->last_position); | 4690 | bfqd->last_position); |
4446 | bfqq->next_rq = next_rq; | 4691 | bfqq->next_rq = next_rq; |
4447 | /* | 4692 | /* |
4448 | * If next_rq changes, update the queue's budget to fit | 4693 | * If next_rq changes, update both the queue's budget to |
4449 | * the new request. | 4694 | * fit the new request and the queue's position in its |
4695 | * rq_pos_tree. | ||
4450 | */ | 4696 | */ |
4451 | if (prev != bfqq->next_rq) | 4697 | if (prev != bfqq->next_rq) { |
4452 | bfq_updated_next_req(bfqd, bfqq); | 4698 | bfq_updated_next_req(bfqd, bfqq); |
4699 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
4700 | } | ||
4453 | } | 4701 | } |
4454 | } | 4702 | } |
4455 | 4703 | ||
@@ -4532,12 +4780,364 @@ static void bfq_end_wr(struct bfq_data *bfqd) | |||
4532 | spin_unlock_irq(&bfqd->lock); | 4780 | spin_unlock_irq(&bfqd->lock); |
4533 | } | 4781 | } |
4534 | 4782 | ||
4783 | static sector_t bfq_io_struct_pos(void *io_struct, bool request) | ||
4784 | { | ||
4785 | if (request) | ||
4786 | return blk_rq_pos(io_struct); | ||
4787 | else | ||
4788 | return ((struct bio *)io_struct)->bi_iter.bi_sector; | ||
4789 | } | ||
4790 | |||
4791 | static int bfq_rq_close_to_sector(void *io_struct, bool request, | ||
4792 | sector_t sector) | ||
4793 | { | ||
4794 | return abs(bfq_io_struct_pos(io_struct, request) - sector) <= | ||
4795 | BFQQ_CLOSE_THR; | ||
4796 | } | ||
4797 | |||
4798 | static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd, | ||
4799 | struct bfq_queue *bfqq, | ||
4800 | sector_t sector) | ||
4801 | { | ||
4802 | struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree; | ||
4803 | struct rb_node *parent, *node; | ||
4804 | struct bfq_queue *__bfqq; | ||
4805 | |||
4806 | if (RB_EMPTY_ROOT(root)) | ||
4807 | return NULL; | ||
4808 | |||
4809 | /* | ||
4810 | * First, if we find a request starting at the end of the last | ||
4811 | * request, choose it. | ||
4812 | */ | ||
4813 | __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); | ||
4814 | if (__bfqq) | ||
4815 | return __bfqq; | ||
4816 | |||
4817 | /* | ||
4818 | * If the exact sector wasn't found, the parent of the NULL leaf | ||
4819 | * will contain the closest sector (rq_pos_tree sorted by | ||
4820 | * next_request position). | ||
4821 | */ | ||
4822 | __bfqq = rb_entry(parent, struct bfq_queue, pos_node); | ||
4823 | if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) | ||
4824 | return __bfqq; | ||
4825 | |||
4826 | if (blk_rq_pos(__bfqq->next_rq) < sector) | ||
4827 | node = rb_next(&__bfqq->pos_node); | ||
4828 | else | ||
4829 | node = rb_prev(&__bfqq->pos_node); | ||
4830 | if (!node) | ||
4831 | return NULL; | ||
4832 | |||
4833 | __bfqq = rb_entry(node, struct bfq_queue, pos_node); | ||
4834 | if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) | ||
4835 | return __bfqq; | ||
4836 | |||
4837 | return NULL; | ||
4838 | } | ||
4839 | |||
4840 | static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd, | ||
4841 | struct bfq_queue *cur_bfqq, | ||
4842 | sector_t sector) | ||
4843 | { | ||
4844 | struct bfq_queue *bfqq; | ||
4845 | |||
4846 | /* | ||
4847 | * We shall notice if some of the queues are cooperating, | ||
4848 | * e.g., working closely on the same area of the device. In | ||
4849 | * that case, we can group them together and: 1) don't waste | ||
4850 | * time idling, and 2) serve the union of their requests in | ||
4851 | * the best possible order for throughput. | ||
4852 | */ | ||
4853 | bfqq = bfqq_find_close(bfqd, cur_bfqq, sector); | ||
4854 | if (!bfqq || bfqq == cur_bfqq) | ||
4855 | return NULL; | ||
4856 | |||
4857 | return bfqq; | ||
4858 | } | ||
4859 | |||
4860 | static struct bfq_queue * | ||
4861 | bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) | ||
4862 | { | ||
4863 | int process_refs, new_process_refs; | ||
4864 | struct bfq_queue *__bfqq; | ||
4865 | |||
4866 | /* | ||
4867 | * If there are no process references on the new_bfqq, then it is | ||
4868 | * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain | ||
4869 | * may have dropped their last reference (not just their last process | ||
4870 | * reference). | ||
4871 | */ | ||
4872 | if (!bfqq_process_refs(new_bfqq)) | ||
4873 | return NULL; | ||
4874 | |||
4875 | /* Avoid a circular list and skip interim queue merges. */ | ||
4876 | while ((__bfqq = new_bfqq->new_bfqq)) { | ||
4877 | if (__bfqq == bfqq) | ||
4878 | return NULL; | ||
4879 | new_bfqq = __bfqq; | ||
4880 | } | ||
4881 | |||
4882 | process_refs = bfqq_process_refs(bfqq); | ||
4883 | new_process_refs = bfqq_process_refs(new_bfqq); | ||
4884 | /* | ||
4885 | * If the process for the bfqq has gone away, there is no | ||
4886 | * sense in merging the queues. | ||
4887 | */ | ||
4888 | if (process_refs == 0 || new_process_refs == 0) | ||
4889 | return NULL; | ||
4890 | |||
4891 | bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", | ||
4892 | new_bfqq->pid); | ||
4893 | |||
4894 | /* | ||
4895 | * Merging is just a redirection: the requests of the process | ||
4896 | * owning one of the two queues are redirected to the other queue. | ||
4897 | * The latter queue, in its turn, is set as shared if this is the | ||
4898 | * first time that the requests of some process are redirected to | ||
4899 | * it. | ||
4900 | * | ||
4901 | * We redirect bfqq to new_bfqq and not the opposite, because we | ||
4902 | * are in the context of the process owning bfqq, hence we have | ||
4903 | * the io_cq of this process. So we can immediately configure this | ||
4904 | * io_cq to redirect the requests of the process to new_bfqq. | ||
4905 | * | ||
4906 | * NOTE, even if new_bfqq coincides with the in-service queue, the | ||
4907 | * io_cq of new_bfqq is not available, because, if the in-service | ||
4908 | * queue is shared, bfqd->in_service_bic may not point to the | ||
4909 | * io_cq of the in-service queue. | ||
4910 | * Redirecting the requests of the process owning bfqq to the | ||
4911 | * currently in-service queue is in any case the best option, as | ||
4912 | * we feed the in-service queue with new requests close to the | ||
4913 | * last request served and, by doing so, hopefully increase the | ||
4914 | * throughput. | ||
4915 | */ | ||
4916 | bfqq->new_bfqq = new_bfqq; | ||
4917 | new_bfqq->ref += process_refs; | ||
4918 | return new_bfqq; | ||
4919 | } | ||
4920 | |||
4921 | static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, | ||
4922 | struct bfq_queue *new_bfqq) | ||
4923 | { | ||
4924 | if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || | ||
4925 | (bfqq->ioprio_class != new_bfqq->ioprio_class)) | ||
4926 | return false; | ||
4927 | |||
4928 | /* | ||
4929 | * If either of the queues has already been detected as seeky, | ||
4930 | * then merging it with the other queue is unlikely to lead to | ||
4931 | * sequential I/O. | ||
4932 | */ | ||
4933 | if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq)) | ||
4934 | return false; | ||
4935 | |||
4936 | /* | ||
4937 | * Interleaved I/O is known to be done by (some) applications | ||
4938 | * only for reads, so it does not make sense to merge async | ||
4939 | * queues. | ||
4940 | */ | ||
4941 | if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq)) | ||
4942 | return false; | ||
4943 | |||
4944 | return true; | ||
4945 | } | ||
4946 | |||
4947 | /* | ||
4948 | * If this function returns true, then bfqq cannot be merged. The idea | ||
4949 | * is that true cooperation happens very early after processes start | ||
4950 | * to do I/O. Usually, late cooperations are just accidental false | ||
4951 | * positives. In case bfqq is weight-raised, such false positives | ||
4952 | * would evidently degrade latency guarantees for bfqq. | ||
4953 | */ | ||
4954 | static bool wr_from_too_long(struct bfq_queue *bfqq) | ||
4955 | { | ||
4956 | return bfqq->wr_coeff > 1 && | ||
4957 | time_is_before_jiffies(bfqq->last_wr_start_finish + | ||
4958 | msecs_to_jiffies(100)); | ||
4959 | } | ||
4960 | |||
4961 | /* | ||
4962 | * Attempt to schedule a merge of bfqq with the currently in-service | ||
4963 | * queue or with a close queue among the scheduled queues. Return | ||
4964 | * NULL if no merge was scheduled, a pointer to the shared bfq_queue | ||
4965 | * structure otherwise. | ||
4966 | * | ||
4967 | * The OOM queue is not allowed to participate to cooperation: in fact, since | ||
4968 | * the requests temporarily redirected to the OOM queue could be redirected | ||
4969 | * again to dedicated queues at any time, the state needed to correctly | ||
4970 | * handle merging with the OOM queue would be quite complex and expensive | ||
4971 | * to maintain. Besides, in such a critical condition as an out of memory, | ||
4972 | * the benefits of queue merging may be little relevant, or even negligible. | ||
4973 | * | ||
4974 | * Weight-raised queues can be merged only if their weight-raising | ||
4975 | * period has just started. In fact cooperating processes are usually | ||
4976 | * started together. Thus, with this filter we avoid false positives | ||
4977 | * that would jeopardize low-latency guarantees. | ||
4978 | * | ||
4979 | * WARNING: queue merging may impair fairness among non-weight raised | ||
4980 | * queues, for at least two reasons: 1) the original weight of a | ||
4981 | * merged queue may change during the merged state, 2) even being the | ||
4982 | * weight the same, a merged queue may be bloated with many more | ||
4983 | * requests than the ones produced by its originally-associated | ||
4984 | * process. | ||
4985 | */ | ||
4986 | static struct bfq_queue * | ||
4987 | bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, | ||
4988 | void *io_struct, bool request) | ||
4989 | { | ||
4990 | struct bfq_queue *in_service_bfqq, *new_bfqq; | ||
4991 | |||
4992 | if (bfqq->new_bfqq) | ||
4993 | return bfqq->new_bfqq; | ||
4994 | |||
4995 | if (!io_struct || | ||
4996 | wr_from_too_long(bfqq) || | ||
4997 | unlikely(bfqq == &bfqd->oom_bfqq)) | ||
4998 | return NULL; | ||
4999 | |||
5000 | /* If there is only one backlogged queue, don't search. */ | ||
5001 | if (bfqd->busy_queues == 1) | ||
5002 | return NULL; | ||
5003 | |||
5004 | in_service_bfqq = bfqd->in_service_queue; | ||
5005 | |||
5006 | if (!in_service_bfqq || in_service_bfqq == bfqq || | ||
5007 | !bfqd->in_service_bic || wr_from_too_long(in_service_bfqq) || | ||
5008 | unlikely(in_service_bfqq == &bfqd->oom_bfqq)) | ||
5009 | goto check_scheduled; | ||
5010 | |||
5011 | if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && | ||
5012 | bfqq->entity.parent == in_service_bfqq->entity.parent && | ||
5013 | bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) { | ||
5014 | new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); | ||
5015 | if (new_bfqq) | ||
5016 | return new_bfqq; | ||
5017 | } | ||
5018 | /* | ||
5019 | * Check whether there is a cooperator among currently scheduled | ||
5020 | * queues. The only thing we need is that the bio/request is not | ||
5021 | * NULL, as we need it to establish whether a cooperator exists. | ||
5022 | */ | ||
5023 | check_scheduled: | ||
5024 | new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, | ||
5025 | bfq_io_struct_pos(io_struct, request)); | ||
5026 | |||
5027 | if (new_bfqq && !wr_from_too_long(new_bfqq) && | ||
5028 | likely(new_bfqq != &bfqd->oom_bfqq) && | ||
5029 | bfq_may_be_close_cooperator(bfqq, new_bfqq)) | ||
5030 | return bfq_setup_merge(bfqq, new_bfqq); | ||
5031 | |||
5032 | return NULL; | ||
5033 | } | ||
5034 | |||
5035 | static void bfq_bfqq_save_state(struct bfq_queue *bfqq) | ||
5036 | { | ||
5037 | struct bfq_io_cq *bic = bfqq->bic; | ||
5038 | |||
5039 | /* | ||
5040 | * If !bfqq->bic, the queue is already shared or its requests | ||
5041 | * have already been redirected to a shared queue; both idle window | ||
5042 | * and weight raising state have already been saved. Do nothing. | ||
5043 | */ | ||
5044 | if (!bic) | ||
5045 | return; | ||
5046 | |||
5047 | bic->saved_ttime = bfqq->ttime; | ||
5048 | bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); | ||
5049 | bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); | ||
5050 | bic->saved_wr_coeff = bfqq->wr_coeff; | ||
5051 | bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; | ||
5052 | bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; | ||
5053 | bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; | ||
5054 | } | ||
5055 | |||
5056 | static void bfq_get_bic_reference(struct bfq_queue *bfqq) | ||
5057 | { | ||
5058 | /* | ||
5059 | * If bfqq->bic has a non-NULL value, the bic to which it belongs | ||
5060 | * is about to begin using a shared bfq_queue. | ||
5061 | */ | ||
5062 | if (bfqq->bic) | ||
5063 | atomic_long_inc(&bfqq->bic->icq.ioc->refcount); | ||
5064 | } | ||
5065 | |||
5066 | static void | ||
5067 | bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, | ||
5068 | struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) | ||
5069 | { | ||
5070 | bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", | ||
5071 | (unsigned long)new_bfqq->pid); | ||
5072 | /* Save weight raising and idle window of the merged queues */ | ||
5073 | bfq_bfqq_save_state(bfqq); | ||
5074 | bfq_bfqq_save_state(new_bfqq); | ||
5075 | if (bfq_bfqq_IO_bound(bfqq)) | ||
5076 | bfq_mark_bfqq_IO_bound(new_bfqq); | ||
5077 | bfq_clear_bfqq_IO_bound(bfqq); | ||
5078 | |||
5079 | /* | ||
5080 | * If bfqq is weight-raised, then let new_bfqq inherit | ||
5081 | * weight-raising. To reduce false positives, neglect the case | ||
5082 | * where bfqq has just been created, but has not yet made it | ||
5083 | * to be weight-raised (which may happen because EQM may merge | ||
5084 | * bfqq even before bfq_add_request is executed for the first | ||
5085 | * time for bfqq). | ||
5086 | */ | ||
5087 | if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) { | ||
5088 | new_bfqq->wr_coeff = bfqq->wr_coeff; | ||
5089 | new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time; | ||
5090 | new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish; | ||
5091 | new_bfqq->wr_start_at_switch_to_srt = | ||
5092 | bfqq->wr_start_at_switch_to_srt; | ||
5093 | if (bfq_bfqq_busy(new_bfqq)) | ||
5094 | bfqd->wr_busy_queues++; | ||
5095 | new_bfqq->entity.prio_changed = 1; | ||
5096 | } | ||
5097 | |||
5098 | if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */ | ||
5099 | bfqq->wr_coeff = 1; | ||
5100 | bfqq->entity.prio_changed = 1; | ||
5101 | if (bfq_bfqq_busy(bfqq)) | ||
5102 | bfqd->wr_busy_queues--; | ||
5103 | } | ||
5104 | |||
5105 | bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", | ||
5106 | bfqd->wr_busy_queues); | ||
5107 | |||
5108 | /* | ||
5109 | * Grab a reference to the bic, to prevent it from being destroyed | ||
5110 | * before being possibly touched by a bfq_split_bfqq(). | ||
5111 | */ | ||
5112 | bfq_get_bic_reference(bfqq); | ||
5113 | bfq_get_bic_reference(new_bfqq); | ||
5114 | /* | ||
5115 | * Merge queues (that is, let bic redirect its requests to new_bfqq) | ||
5116 | */ | ||
5117 | bic_set_bfqq(bic, new_bfqq, 1); | ||
5118 | bfq_mark_bfqq_coop(new_bfqq); | ||
5119 | /* | ||
5120 | * new_bfqq now belongs to at least two bics (it is a shared queue): | ||
5121 | * set new_bfqq->bic to NULL. bfqq either: | ||
5122 | * - does not belong to any bic any more, and hence bfqq->bic must | ||
5123 | * be set to NULL, or | ||
5124 | * - is a queue whose owning bics have already been redirected to a | ||
5125 | * different queue, hence the queue is destined to not belong to | ||
5126 | * any bic soon and bfqq->bic is already NULL (therefore the next | ||
5127 | * assignment causes no harm). | ||
5128 | */ | ||
5129 | new_bfqq->bic = NULL; | ||
5130 | bfqq->bic = NULL; | ||
5131 | /* release process reference to bfqq */ | ||
5132 | bfq_put_queue(bfqq); | ||
5133 | } | ||
5134 | |||
4535 | static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, | 5135 | static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, |
4536 | struct bio *bio) | 5136 | struct bio *bio) |
4537 | { | 5137 | { |
4538 | struct bfq_data *bfqd = q->elevator->elevator_data; | 5138 | struct bfq_data *bfqd = q->elevator->elevator_data; |
4539 | bool is_sync = op_is_sync(bio->bi_opf); | 5139 | bool is_sync = op_is_sync(bio->bi_opf); |
4540 | struct bfq_queue *bfqq = bfqd->bio_bfqq; | 5140 | struct bfq_queue *bfqq = bfqd->bio_bfqq, *new_bfqq; |
4541 | 5141 | ||
4542 | /* | 5142 | /* |
4543 | * Disallow merge of a sync bio into an async request. | 5143 | * Disallow merge of a sync bio into an async request. |
@@ -4552,6 +5152,37 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, | |||
4552 | if (!bfqq) | 5152 | if (!bfqq) |
4553 | return false; | 5153 | return false; |
4554 | 5154 | ||
5155 | /* | ||
5156 | * We take advantage of this function to perform an early merge | ||
5157 | * of the queues of possible cooperating processes. | ||
5158 | */ | ||
5159 | new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); | ||
5160 | if (new_bfqq) { | ||
5161 | /* | ||
5162 | * bic still points to bfqq, then it has not yet been | ||
5163 | * redirected to some other bfq_queue, and a queue | ||
5164 | * merge beween bfqq and new_bfqq can be safely | ||
5165 | * fulfillled, i.e., bic can be redirected to new_bfqq | ||
5166 | * and bfqq can be put. | ||
5167 | */ | ||
5168 | bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq, | ||
5169 | new_bfqq); | ||
5170 | /* | ||
5171 | * If we get here, bio will be queued into new_queue, | ||
5172 | * so use new_bfqq to decide whether bio and rq can be | ||
5173 | * merged. | ||
5174 | */ | ||
5175 | bfqq = new_bfqq; | ||
5176 | |||
5177 | /* | ||
5178 | * Change also bqfd->bio_bfqq, as | ||
5179 | * bfqd->bio_bic now points to new_bfqq, and | ||
5180 | * this function may be invoked again (and then may | ||
5181 | * use again bqfd->bio_bfqq). | ||
5182 | */ | ||
5183 | bfqd->bio_bfqq = bfqq; | ||
5184 | } | ||
5185 | |||
4555 | return bfqq == RQ_BFQQ(rq); | 5186 | return bfqq == RQ_BFQQ(rq); |
4556 | } | 5187 | } |
4557 | 5188 | ||
@@ -4959,6 +5590,15 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) | |||
4959 | 5590 | ||
4960 | static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) | 5591 | static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
4961 | { | 5592 | { |
5593 | /* | ||
5594 | * If this bfqq is shared between multiple processes, check | ||
5595 | * to make sure that those processes are still issuing I/Os | ||
5596 | * within the mean seek distance. If not, it may be time to | ||
5597 | * break the queues apart again. | ||
5598 | */ | ||
5599 | if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) | ||
5600 | bfq_mark_bfqq_split_coop(bfqq); | ||
5601 | |||
4962 | if (RB_EMPTY_ROOT(&bfqq->sort_list)) { | 5602 | if (RB_EMPTY_ROOT(&bfqq->sort_list)) { |
4963 | if (bfqq->dispatched == 0) | 5603 | if (bfqq->dispatched == 0) |
4964 | /* | 5604 | /* |
@@ -4970,8 +5610,13 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
4970 | bfqq->budget_timeout = jiffies; | 5610 | bfqq->budget_timeout = jiffies; |
4971 | 5611 | ||
4972 | bfq_del_bfqq_busy(bfqd, bfqq, true); | 5612 | bfq_del_bfqq_busy(bfqd, bfqq, true); |
4973 | } else | 5613 | } else { |
4974 | bfq_requeue_bfqq(bfqd, bfqq); | 5614 | bfq_requeue_bfqq(bfqd, bfqq); |
5615 | /* | ||
5616 | * Resort priority tree of potential close cooperators. | ||
5617 | */ | ||
5618 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
5619 | } | ||
4975 | 5620 | ||
4976 | /* | 5621 | /* |
4977 | * All in-service entities must have been properly deactivated | 5622 | * All in-service entities must have been properly deactivated |
@@ -5792,8 +6437,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
5792 | 6437 | ||
5793 | /* | 6438 | /* |
5794 | * If too much time has elapsed from the beginning of | 6439 | * If too much time has elapsed from the beginning of |
5795 | * this weight-raising period, then end weight | 6440 | * this weight-raising period, then end weight raising. |
5796 | * raising. | ||
5797 | */ | 6441 | */ |
5798 | if (time_is_before_jiffies(bfqq->last_wr_start_finish + | 6442 | if (time_is_before_jiffies(bfqq->last_wr_start_finish + |
5799 | bfqq->wr_cur_max_time)) { | 6443 | bfqq->wr_cur_max_time)) { |
@@ -5969,8 +6613,9 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) | |||
5969 | struct request *rq; | 6613 | struct request *rq; |
5970 | 6614 | ||
5971 | spin_lock_irq(&bfqd->lock); | 6615 | spin_lock_irq(&bfqd->lock); |
6616 | |||
5972 | rq = __bfq_dispatch_request(hctx); | 6617 | rq = __bfq_dispatch_request(hctx); |
5973 | spin_unlock_irq(&bfqd->lock); | 6618 | bfq_unlock_put_ioc(bfqd); |
5974 | 6619 | ||
5975 | return rq; | 6620 | return rq; |
5976 | } | 6621 | } |
@@ -6004,6 +6649,25 @@ static void bfq_put_queue(struct bfq_queue *bfqq) | |||
6004 | #endif | 6649 | #endif |
6005 | } | 6650 | } |
6006 | 6651 | ||
6652 | static void bfq_put_cooperator(struct bfq_queue *bfqq) | ||
6653 | { | ||
6654 | struct bfq_queue *__bfqq, *next; | ||
6655 | |||
6656 | /* | ||
6657 | * If this queue was scheduled to merge with another queue, be | ||
6658 | * sure to drop the reference taken on that queue (and others in | ||
6659 | * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. | ||
6660 | */ | ||
6661 | __bfqq = bfqq->new_bfqq; | ||
6662 | while (__bfqq) { | ||
6663 | if (__bfqq == bfqq) | ||
6664 | break; | ||
6665 | next = __bfqq->new_bfqq; | ||
6666 | bfq_put_queue(__bfqq); | ||
6667 | __bfqq = next; | ||
6668 | } | ||
6669 | } | ||
6670 | |||
6007 | static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) | 6671 | static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
6008 | { | 6672 | { |
6009 | if (bfqq == bfqd->in_service_queue) { | 6673 | if (bfqq == bfqd->in_service_queue) { |
@@ -6013,6 +6677,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
6013 | 6677 | ||
6014 | bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); | 6678 | bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); |
6015 | 6679 | ||
6680 | bfq_put_cooperator(bfqq); | ||
6681 | |||
6016 | bfq_put_queue(bfqq); /* release process reference */ | 6682 | bfq_put_queue(bfqq); /* release process reference */ |
6017 | } | 6683 | } |
6018 | 6684 | ||
@@ -6028,9 +6694,20 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) | |||
6028 | unsigned long flags; | 6694 | unsigned long flags; |
6029 | 6695 | ||
6030 | spin_lock_irqsave(&bfqd->lock, flags); | 6696 | spin_lock_irqsave(&bfqd->lock, flags); |
6697 | /* | ||
6698 | * If the bic is using a shared queue, put the | ||
6699 | * reference taken on the io_context when the bic | ||
6700 | * started using a shared bfq_queue. This put cannot | ||
6701 | * make ioc->ref_count reach 0, then no ioc->lock | ||
6702 | * risks to be taken (leading to possible deadlock | ||
6703 | * scenarios). | ||
6704 | */ | ||
6705 | if (is_sync && bfq_bfqq_coop(bfqq)) | ||
6706 | put_io_context(bic->icq.ioc); | ||
6707 | |||
6031 | bfq_exit_bfqq(bfqd, bfqq); | 6708 | bfq_exit_bfqq(bfqd, bfqq); |
6032 | bic_set_bfqq(bic, NULL, is_sync); | 6709 | bic_set_bfqq(bic, NULL, is_sync); |
6033 | spin_unlock_irq(&bfqd->lock); | 6710 | bfq_unlock_put_ioc_restore(bfqd, flags); |
6034 | } | 6711 | } |
6035 | } | 6712 | } |
6036 | 6713 | ||
@@ -6152,8 +6829,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
6152 | bfqq->budget_timeout = bfq_smallest_from_now(); | 6829 | bfqq->budget_timeout = bfq_smallest_from_now(); |
6153 | 6830 | ||
6154 | bfqq->wr_coeff = 1; | 6831 | bfqq->wr_coeff = 1; |
6155 | bfqq->last_wr_start_finish = bfq_smallest_from_now(); | 6832 | bfqq->last_wr_start_finish = jiffies; |
6156 | bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now(); | 6833 | bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now(); |
6834 | bfqq->split_time = bfq_smallest_from_now(); | ||
6157 | 6835 | ||
6158 | /* | 6836 | /* |
6159 | * Set to the value for which bfqq will not be deemed as | 6837 | * Set to the value for which bfqq will not be deemed as |
@@ -6288,6 +6966,11 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, | |||
6288 | if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) | 6966 | if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) |
6289 | return; | 6967 | return; |
6290 | 6968 | ||
6969 | /* Idle window just restored, statistics are meaningless. */ | ||
6970 | if (time_is_after_eq_jiffies(bfqq->split_time + | ||
6971 | bfqd->bfq_wr_min_idle_time)) | ||
6972 | return; | ||
6973 | |||
6291 | enable_idle = bfq_bfqq_idle_window(bfqq); | 6974 | enable_idle = bfq_bfqq_idle_window(bfqq); |
6292 | 6975 | ||
6293 | if (atomic_read(&bic->icq.ioc->active_ref) == 0 || | 6976 | if (atomic_read(&bic->icq.ioc->active_ref) == 0 || |
@@ -6383,7 +7066,38 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
6383 | 7066 | ||
6384 | static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) | 7067 | static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) |
6385 | { | 7068 | { |
6386 | struct bfq_queue *bfqq = RQ_BFQQ(rq); | 7069 | struct bfq_queue *bfqq = RQ_BFQQ(rq), |
7070 | *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); | ||
7071 | |||
7072 | if (new_bfqq) { | ||
7073 | if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) | ||
7074 | new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); | ||
7075 | /* | ||
7076 | * Release the request's reference to the old bfqq | ||
7077 | * and make sure one is taken to the shared queue. | ||
7078 | */ | ||
7079 | new_bfqq->allocated++; | ||
7080 | bfqq->allocated--; | ||
7081 | new_bfqq->ref++; | ||
7082 | /* | ||
7083 | * If the bic associated with the process | ||
7084 | * issuing this request still points to bfqq | ||
7085 | * (and thus has not been already redirected | ||
7086 | * to new_bfqq or even some other bfq_queue), | ||
7087 | * then complete the merge and redirect it to | ||
7088 | * new_bfqq. | ||
7089 | */ | ||
7090 | if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) | ||
7091 | bfq_merge_bfqqs(bfqd, RQ_BIC(rq), | ||
7092 | bfqq, new_bfqq); | ||
7093 | /* | ||
7094 | * rq is about to be enqueued into new_bfqq, | ||
7095 | * release rq reference on bfqq | ||
7096 | */ | ||
7097 | bfq_put_queue(bfqq); | ||
7098 | rq->elv.priv[1] = new_bfqq; | ||
7099 | bfqq = new_bfqq; | ||
7100 | } | ||
6387 | 7101 | ||
6388 | bfq_add_request(rq); | 7102 | bfq_add_request(rq); |
6389 | 7103 | ||
@@ -6425,7 +7139,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, | |||
6425 | } | 7139 | } |
6426 | } | 7140 | } |
6427 | 7141 | ||
6428 | spin_unlock_irq(&bfqd->lock); | 7142 | bfq_unlock_put_ioc(bfqd); |
6429 | } | 7143 | } |
6430 | 7144 | ||
6431 | static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, | 7145 | static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, |
@@ -6576,7 +7290,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq) | |||
6576 | bfq_completed_request(bfqq, bfqd); | 7290 | bfq_completed_request(bfqq, bfqd); |
6577 | bfq_put_rq_priv_body(bfqq); | 7291 | bfq_put_rq_priv_body(bfqq); |
6578 | 7292 | ||
6579 | spin_unlock_irqrestore(&bfqd->lock, flags); | 7293 | bfq_unlock_put_ioc_restore(bfqd, flags); |
6580 | } else { | 7294 | } else { |
6581 | /* | 7295 | /* |
6582 | * Request rq may be still/already in the scheduler, | 7296 | * Request rq may be still/already in the scheduler, |
@@ -6600,6 +7314,55 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq) | |||
6600 | } | 7314 | } |
6601 | 7315 | ||
6602 | /* | 7316 | /* |
7317 | * Returns NULL if a new bfqq should be allocated, or the old bfqq if this | ||
7318 | * was the last process referring to that bfqq. | ||
7319 | */ | ||
7320 | static struct bfq_queue * | ||
7321 | bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) | ||
7322 | { | ||
7323 | bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); | ||
7324 | |||
7325 | if (bfqq_process_refs(bfqq) == 1) { | ||
7326 | bfqq->pid = current->pid; | ||
7327 | bfq_clear_bfqq_coop(bfqq); | ||
7328 | bfq_clear_bfqq_split_coop(bfqq); | ||
7329 | return bfqq; | ||
7330 | } | ||
7331 | |||
7332 | bic_set_bfqq(bic, NULL, 1); | ||
7333 | |||
7334 | bfq_put_cooperator(bfqq); | ||
7335 | |||
7336 | bfq_put_queue(bfqq); | ||
7337 | return NULL; | ||
7338 | } | ||
7339 | |||
7340 | static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, | ||
7341 | struct bfq_io_cq *bic, | ||
7342 | struct bio *bio, | ||
7343 | bool split, bool is_sync, | ||
7344 | bool *new_queue) | ||
7345 | { | ||
7346 | struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync); | ||
7347 | |||
7348 | if (likely(bfqq && bfqq != &bfqd->oom_bfqq)) | ||
7349 | return bfqq; | ||
7350 | |||
7351 | if (new_queue) | ||
7352 | *new_queue = true; | ||
7353 | |||
7354 | if (bfqq) | ||
7355 | bfq_put_queue(bfqq); | ||
7356 | bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); | ||
7357 | |||
7358 | bic_set_bfqq(bic, bfqq, is_sync); | ||
7359 | if (split && is_sync) | ||
7360 | bfqq->split_time = jiffies; | ||
7361 | |||
7362 | return bfqq; | ||
7363 | } | ||
7364 | |||
7365 | /* | ||
6603 | * Allocate bfq data structures associated with this request. | 7366 | * Allocate bfq data structures associated with this request. |
6604 | */ | 7367 | */ |
6605 | static int bfq_get_rq_private(struct request_queue *q, struct request *rq, | 7368 | static int bfq_get_rq_private(struct request_queue *q, struct request *rq, |
@@ -6609,6 +7372,7 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq, | |||
6609 | struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); | 7372 | struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); |
6610 | const int is_sync = rq_is_sync(rq); | 7373 | const int is_sync = rq_is_sync(rq); |
6611 | struct bfq_queue *bfqq; | 7374 | struct bfq_queue *bfqq; |
7375 | bool new_queue = false; | ||
6612 | 7376 | ||
6613 | spin_lock_irq(&bfqd->lock); | 7377 | spin_lock_irq(&bfqd->lock); |
6614 | 7378 | ||
@@ -6619,12 +7383,28 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq, | |||
6619 | 7383 | ||
6620 | bfq_bic_update_cgroup(bic, bio); | 7384 | bfq_bic_update_cgroup(bic, bio); |
6621 | 7385 | ||
6622 | bfqq = bic_to_bfqq(bic, is_sync); | 7386 | bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync, |
6623 | if (!bfqq || bfqq == &bfqd->oom_bfqq) { | 7387 | &new_queue); |
6624 | if (bfqq) | 7388 | |
6625 | bfq_put_queue(bfqq); | 7389 | if (likely(!new_queue)) { |
6626 | bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); | 7390 | /* If the queue was seeky for too long, break it apart. */ |
6627 | bic_set_bfqq(bic, bfqq, is_sync); | 7391 | if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { |
7392 | bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); | ||
7393 | bfqq = bfq_split_bfqq(bic, bfqq); | ||
7394 | /* | ||
7395 | * A reference to bic->icq.ioc needs to be | ||
7396 | * released after a queue split. Do not do it | ||
7397 | * immediately, to not risk to possibly take | ||
7398 | * an ioc->lock while holding the scheduler | ||
7399 | * lock. | ||
7400 | */ | ||
7401 | bfqd->ioc_to_put = bic->icq.ioc; | ||
7402 | |||
7403 | if (!bfqq) | ||
7404 | bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, | ||
7405 | true, is_sync, | ||
7406 | NULL); | ||
7407 | } | ||
6628 | } | 7408 | } |
6629 | 7409 | ||
6630 | bfqq->allocated++; | 7410 | bfqq->allocated++; |
@@ -6635,7 +7415,25 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq, | |||
6635 | rq->elv.priv[0] = bic; | 7415 | rq->elv.priv[0] = bic; |
6636 | rq->elv.priv[1] = bfqq; | 7416 | rq->elv.priv[1] = bfqq; |
6637 | 7417 | ||
6638 | spin_unlock_irq(&bfqd->lock); | 7418 | /* |
7419 | * If a bfq_queue has only one process reference, it is owned | ||
7420 | * by only this bic: we can then set bfqq->bic = bic. in | ||
7421 | * addition, if the queue has also just been split, we have to | ||
7422 | * resume its state. | ||
7423 | */ | ||
7424 | if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { | ||
7425 | bfqq->bic = bic; | ||
7426 | if (bfqd->ioc_to_put) { /* if true, there has been a split */ | ||
7427 | /* | ||
7428 | * The queue has just been split from a shared | ||
7429 | * queue: restore the idle window and the | ||
7430 | * possible weight raising period. | ||
7431 | */ | ||
7432 | bfq_bfqq_resume_state(bfqq, bic); | ||
7433 | } | ||
7434 | } | ||
7435 | |||
7436 | bfq_unlock_put_ioc(bfqd); | ||
6639 | 7437 | ||
6640 | return 0; | 7438 | return 0; |
6641 | 7439 | ||
@@ -6680,7 +7478,7 @@ static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) | |||
6680 | bfq_bfqq_expire(bfqd, bfqq, true, reason); | 7478 | bfq_bfqq_expire(bfqd, bfqq, true, reason); |
6681 | 7479 | ||
6682 | schedule_dispatch: | 7480 | schedule_dispatch: |
6683 | spin_unlock_irqrestore(&bfqd->lock, flags); | 7481 | bfq_unlock_put_ioc_restore(bfqd, flags); |
6684 | bfq_schedule_dispatch(bfqd); | 7482 | bfq_schedule_dispatch(bfqd); |
6685 | } | 7483 | } |
6686 | 7484 | ||
@@ -6777,6 +7575,7 @@ static void bfq_init_root_group(struct bfq_group *root_group, | |||
6777 | root_group->my_entity = NULL; | 7575 | root_group->my_entity = NULL; |
6778 | root_group->bfqd = bfqd; | 7576 | root_group->bfqd = bfqd; |
6779 | #endif | 7577 | #endif |
7578 | root_group->rq_pos_tree = RB_ROOT; | ||
6780 | for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) | 7579 | for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) |
6781 | root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; | 7580 | root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; |
6782 | root_group->sched_data.bfq_class_idle_last_service = jiffies; | 7581 | root_group->sched_data.bfq_class_idle_last_service = jiffies; |