summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/bfq-iosched.c881
1 files changed, 840 insertions, 41 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index deb1f21c535f..6e7388a1d220 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -281,11 +281,12 @@ struct bfq_ttime {
281 * struct bfq_queue - leaf schedulable entity. 281 * struct bfq_queue - leaf schedulable entity.
282 * 282 *
283 * A bfq_queue is a leaf request queue; it can be associated with an 283 * A bfq_queue is a leaf request queue; it can be associated with an
284 * io_context or more, if it is async. @cgroup holds a reference to 284 * io_context or more, if it is async or shared between cooperating
285 * the cgroup, to be sure that it does not disappear while a bfqq 285 * processes. @cgroup holds a reference to the cgroup, to be sure that it
286 * still references it (mostly to avoid races between request issuing 286 * does not disappear while a bfqq still references it (mostly to avoid
287 * and task migration followed by cgroup destruction). All the fields 287 * races between request issuing and task migration followed by cgroup
288 * are protected by the queue lock of the containing bfqd. 288 * destruction).
289 * All the fields are protected by the queue lock of the containing bfqd.
289 */ 290 */
290struct bfq_queue { 291struct bfq_queue {
291 /* reference counter */ 292 /* reference counter */
@@ -298,6 +299,16 @@ struct bfq_queue {
298 /* next ioprio and ioprio class if a change is in progress */ 299 /* next ioprio and ioprio class if a change is in progress */
299 unsigned short new_ioprio, new_ioprio_class; 300 unsigned short new_ioprio, new_ioprio_class;
300 301
302 /*
303 * Shared bfq_queue if queue is cooperating with one or more
304 * other queues.
305 */
306 struct bfq_queue *new_bfqq;
307 /* request-position tree member (see bfq_group's @rq_pos_tree) */
308 struct rb_node pos_node;
309 /* request-position tree root (see bfq_group's @rq_pos_tree) */
310 struct rb_root *pos_root;
311
301 /* sorted list of pending requests */ 312 /* sorted list of pending requests */
302 struct rb_root sort_list; 313 struct rb_root sort_list;
303 /* if fifo isn't expired, next request to serve */ 314 /* if fifo isn't expired, next request to serve */
@@ -347,6 +358,12 @@ struct bfq_queue {
347 /* pid of the process owning the queue, used for logging purposes */ 358 /* pid of the process owning the queue, used for logging purposes */
348 pid_t pid; 359 pid_t pid;
349 360
361 /*
362 * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
363 * if the queue is shared.
364 */
365 struct bfq_io_cq *bic;
366
350 /* current maximum weight-raising time for this queue */ 367 /* current maximum weight-raising time for this queue */
351 unsigned long wr_cur_max_time; 368 unsigned long wr_cur_max_time;
352 /* 369 /*
@@ -375,10 +392,13 @@ struct bfq_queue {
375 * last transition from idle to backlogged. 392 * last transition from idle to backlogged.
376 */ 393 */
377 unsigned long service_from_backlogged; 394 unsigned long service_from_backlogged;
395
378 /* 396 /*
379 * Value of wr start time when switching to soft rt 397 * Value of wr start time when switching to soft rt
380 */ 398 */
381 unsigned long wr_start_at_switch_to_srt; 399 unsigned long wr_start_at_switch_to_srt;
400
401 unsigned long split_time; /* time of last split */
382}; 402};
383 403
384/** 404/**
@@ -394,6 +414,26 @@ struct bfq_io_cq {
394#ifdef CONFIG_BFQ_GROUP_IOSCHED 414#ifdef CONFIG_BFQ_GROUP_IOSCHED
395 uint64_t blkcg_serial_nr; /* the current blkcg serial */ 415 uint64_t blkcg_serial_nr; /* the current blkcg serial */
396#endif 416#endif
417 /*
418 * Snapshot of the idle window before merging; taken to
419 * remember this value while the queue is merged, so as to be
420 * able to restore it in case of split.
421 */
422 bool saved_idle_window;
423 /*
424 * Same purpose as the previous two fields for the I/O bound
425 * classification of a queue.
426 */
427 bool saved_IO_bound;
428
429 /*
430 * Similar to previous fields: save wr information.
431 */
432 unsigned long saved_wr_coeff;
433 unsigned long saved_last_wr_start_finish;
434 unsigned long saved_wr_start_at_switch_to_srt;
435 unsigned int saved_wr_cur_max_time;
436 struct bfq_ttime saved_ttime;
397}; 437};
398 438
399enum bfq_device_speed { 439enum bfq_device_speed {
@@ -584,6 +624,15 @@ struct bfq_data {
584 struct bfq_io_cq *bio_bic; 624 struct bfq_io_cq *bio_bic;
585 /* bfqq associated with the task issuing current bio for merging */ 625 /* bfqq associated with the task issuing current bio for merging */
586 struct bfq_queue *bio_bfqq; 626 struct bfq_queue *bio_bfqq;
627
628 /*
629 * io context to put right after bfqd->lock is released. This
630 * filed is used to perform put_io_context, when needed, to
631 * after the scheduler lock has been released, and thus
632 * prevent an ioc->lock from being possibly taken while the
633 * scheduler lock is being held.
634 */
635 struct io_context *ioc_to_put;
587}; 636};
588 637
589enum bfqq_state_flags { 638enum bfqq_state_flags {
@@ -605,6 +654,8 @@ enum bfqq_state_flags {
605 * may need softrt-next-start 654 * may need softrt-next-start
606 * update 655 * update
607 */ 656 */
657 BFQQF_coop, /* bfqq is shared */
658 BFQQF_split_coop /* shared bfqq will be split */
608}; 659};
609 660
610#define BFQ_BFQQ_FNS(name) \ 661#define BFQ_BFQQ_FNS(name) \
@@ -628,6 +679,8 @@ BFQ_BFQQ_FNS(fifo_expire);
628BFQ_BFQQ_FNS(idle_window); 679BFQ_BFQQ_FNS(idle_window);
629BFQ_BFQQ_FNS(sync); 680BFQ_BFQQ_FNS(sync);
630BFQ_BFQQ_FNS(IO_bound); 681BFQ_BFQQ_FNS(IO_bound);
682BFQ_BFQQ_FNS(coop);
683BFQ_BFQQ_FNS(split_coop);
631BFQ_BFQQ_FNS(softrt_update); 684BFQ_BFQQ_FNS(softrt_update);
632#undef BFQ_BFQQ_FNS 685#undef BFQ_BFQQ_FNS
633 686
@@ -738,6 +791,9 @@ struct bfq_group_data {
738 * to avoid too many special cases during group creation/ 791 * to avoid too many special cases during group creation/
739 * migration. 792 * migration.
740 * @stats: stats for this bfqg. 793 * @stats: stats for this bfqg.
794 * @rq_pos_tree: rbtree sorted by next_request position, used when
795 * determining if two or more queues have interleaving
796 * requests (see bfq_find_close_cooperator()).
741 * 797 *
742 * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup 798 * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
743 * there is a set of bfq_groups, each one collecting the lower-level 799 * there is a set of bfq_groups, each one collecting the lower-level
@@ -762,6 +818,8 @@ struct bfq_group {
762 818
763 struct bfq_entity *my_entity; 819 struct bfq_entity *my_entity;
764 820
821 struct rb_root rq_pos_tree;
822
765 struct bfqg_stats stats; 823 struct bfqg_stats stats;
766}; 824};
767 825
@@ -811,6 +869,27 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
811 return bic->icq.q->elevator->elevator_data; 869 return bic->icq.q->elevator->elevator_data;
812} 870}
813 871
872#ifdef CONFIG_BFQ_GROUP_IOSCHED
873
874static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
875{
876 struct bfq_entity *group_entity = bfqq->entity.parent;
877
878 if (!group_entity)
879 group_entity = &bfqq->bfqd->root_group->entity;
880
881 return container_of(group_entity, struct bfq_group, entity);
882}
883
884#else
885
886static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
887{
888 return bfqq->bfqd->root_group;
889}
890
891#endif
892
814static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); 893static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
815static void bfq_put_queue(struct bfq_queue *bfqq); 894static void bfq_put_queue(struct bfq_queue *bfqq);
816static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, 895static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
@@ -975,6 +1054,34 @@ static void bfq_schedule_dispatch(struct bfq_data *bfqd)
975 } 1054 }
976} 1055}
977 1056
1057/*
1058 * Next two functions release bfqd->lock and put the io context
1059 * pointed by bfqd->ioc_to_put. This delayed put is used to not risk
1060 * to take an ioc->lock while the scheduler lock is being held.
1061 */
1062static void bfq_unlock_put_ioc(struct bfq_data *bfqd)
1063{
1064 struct io_context *ioc_to_put = bfqd->ioc_to_put;
1065
1066 bfqd->ioc_to_put = NULL;
1067 spin_unlock_irq(&bfqd->lock);
1068
1069 if (ioc_to_put)
1070 put_io_context(ioc_to_put);
1071}
1072
1073static void bfq_unlock_put_ioc_restore(struct bfq_data *bfqd,
1074 unsigned long flags)
1075{
1076 struct io_context *ioc_to_put = bfqd->ioc_to_put;
1077
1078 bfqd->ioc_to_put = NULL;
1079 spin_unlock_irqrestore(&bfqd->lock, flags);
1080
1081 if (ioc_to_put)
1082 put_io_context(ioc_to_put);
1083}
1084
978/** 1085/**
979 * bfq_gt - compare two timestamps. 1086 * bfq_gt - compare two timestamps.
980 * @a: first ts. 1087 * @a: first ts.
@@ -2425,7 +2532,14 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
2425 struct bfq_entity *entity = in_serv_entity; 2532 struct bfq_entity *entity = in_serv_entity;
2426 2533
2427 if (bfqd->in_service_bic) { 2534 if (bfqd->in_service_bic) {
2428 put_io_context(bfqd->in_service_bic->icq.ioc); 2535 /*
2536 * Schedule the release of a reference to
2537 * bfqd->in_service_bic->icq.ioc to right after the
2538 * scheduler lock is released. This ioc is not
2539 * released immediately, to not risk to possibly take
2540 * an ioc->lock while holding the scheduler lock.
2541 */
2542 bfqd->ioc_to_put = bfqd->in_service_bic->icq.ioc;
2429 bfqd->in_service_bic = NULL; 2543 bfqd->in_service_bic = NULL;
2430 } 2544 }
2431 2545
@@ -2914,6 +3028,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
2914 * in bfq_init_queue() 3028 * in bfq_init_queue()
2915 */ 3029 */
2916 bfqg->bfqd = bfqd; 3030 bfqg->bfqd = bfqd;
3031 bfqg->rq_pos_tree = RB_ROOT;
2917} 3032}
2918 3033
2919static void bfq_pd_free(struct blkg_policy_data *pd) 3034static void bfq_pd_free(struct blkg_policy_data *pd)
@@ -2982,6 +3097,8 @@ static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
2982 return bfqg; 3097 return bfqg;
2983} 3098}
2984 3099
3100static void bfq_pos_tree_add_move(struct bfq_data *bfqd,
3101 struct bfq_queue *bfqq);
2985static void bfq_bfqq_expire(struct bfq_data *bfqd, 3102static void bfq_bfqq_expire(struct bfq_data *bfqd,
2986 struct bfq_queue *bfqq, 3103 struct bfq_queue *bfqq,
2987 bool compensate, 3104 bool compensate,
@@ -3030,8 +3147,10 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
3030 entity->sched_data = &bfqg->sched_data; 3147 entity->sched_data = &bfqg->sched_data;
3031 bfqg_get(bfqg); 3148 bfqg_get(bfqg);
3032 3149
3033 if (bfq_bfqq_busy(bfqq)) 3150 if (bfq_bfqq_busy(bfqq)) {
3151 bfq_pos_tree_add_move(bfqd, bfqq);
3034 bfq_activate_bfqq(bfqd, bfqq); 3152 bfq_activate_bfqq(bfqd, bfqq);
3153 }
3035 3154
3036 if (!bfqd->in_service_queue && !bfqd->rq_in_driver) 3155 if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
3037 bfq_schedule_dispatch(bfqd); 3156 bfq_schedule_dispatch(bfqd);
@@ -3071,8 +3190,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
3071 bic_set_bfqq(bic, NULL, 0); 3190 bic_set_bfqq(bic, NULL, 0);
3072 bfq_log_bfqq(bfqd, async_bfqq, 3191 bfq_log_bfqq(bfqd, async_bfqq,
3073 "bic_change_group: %p %d", 3192 "bic_change_group: %p %d",
3074 async_bfqq, 3193 async_bfqq, async_bfqq->ref);
3075 async_bfqq->ref);
3076 bfq_put_queue(async_bfqq); 3194 bfq_put_queue(async_bfqq);
3077 } 3195 }
3078 } 3196 }
@@ -3214,7 +3332,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
3214 __bfq_deactivate_entity(entity, false); 3332 __bfq_deactivate_entity(entity, false);
3215 bfq_put_async_queues(bfqd, bfqg); 3333 bfq_put_async_queues(bfqd, bfqg);
3216 3334
3217 spin_unlock_irqrestore(&bfqd->lock, flags); 3335 bfq_unlock_put_ioc_restore(bfqd, flags);
3218 /* 3336 /*
3219 * @blkg is going offline and will be ignored by 3337 * @blkg is going offline and will be ignored by
3220 * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so 3338 * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so
@@ -3731,6 +3849,72 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
3731 } 3849 }
3732} 3850}
3733 3851
3852static struct bfq_queue *
3853bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
3854 sector_t sector, struct rb_node **ret_parent,
3855 struct rb_node ***rb_link)
3856{
3857 struct rb_node **p, *parent;
3858 struct bfq_queue *bfqq = NULL;
3859
3860 parent = NULL;
3861 p = &root->rb_node;
3862 while (*p) {
3863 struct rb_node **n;
3864
3865 parent = *p;
3866 bfqq = rb_entry(parent, struct bfq_queue, pos_node);
3867
3868 /*
3869 * Sort strictly based on sector. Smallest to the left,
3870 * largest to the right.
3871 */
3872 if (sector > blk_rq_pos(bfqq->next_rq))
3873 n = &(*p)->rb_right;
3874 else if (sector < blk_rq_pos(bfqq->next_rq))
3875 n = &(*p)->rb_left;
3876 else
3877 break;
3878 p = n;
3879 bfqq = NULL;
3880 }
3881
3882 *ret_parent = parent;
3883 if (rb_link)
3884 *rb_link = p;
3885
3886 bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
3887 (unsigned long long)sector,
3888 bfqq ? bfqq->pid : 0);
3889
3890 return bfqq;
3891}
3892
3893static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
3894{
3895 struct rb_node **p, *parent;
3896 struct bfq_queue *__bfqq;
3897
3898 if (bfqq->pos_root) {
3899 rb_erase(&bfqq->pos_node, bfqq->pos_root);
3900 bfqq->pos_root = NULL;
3901 }
3902
3903 if (bfq_class_idle(bfqq))
3904 return;
3905 if (!bfqq->next_rq)
3906 return;
3907
3908 bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
3909 __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
3910 blk_rq_pos(bfqq->next_rq), &parent, &p);
3911 if (!__bfqq) {
3912 rb_link_node(&bfqq->pos_node, parent, p);
3913 rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
3914 } else
3915 bfqq->pos_root = NULL;
3916}
3917
3734/* 3918/*
3735 * Return expired entry, or NULL to just start from scratch in rbtree. 3919 * Return expired entry, or NULL to just start from scratch in rbtree.
3736 */ 3920 */
@@ -3837,6 +4021,43 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
3837 } 4021 }
3838} 4022}
3839 4023
4024static void
4025bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
4026{
4027 if (bic->saved_idle_window)
4028 bfq_mark_bfqq_idle_window(bfqq);
4029 else
4030 bfq_clear_bfqq_idle_window(bfqq);
4031
4032 if (bic->saved_IO_bound)
4033 bfq_mark_bfqq_IO_bound(bfqq);
4034 else
4035 bfq_clear_bfqq_IO_bound(bfqq);
4036
4037 bfqq->ttime = bic->saved_ttime;
4038 bfqq->wr_coeff = bic->saved_wr_coeff;
4039 bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
4040 bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
4041 bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
4042
4043 if (bfqq->wr_coeff > 1 &&
4044 time_is_before_jiffies(bfqq->last_wr_start_finish +
4045 bfqq->wr_cur_max_time)) {
4046 bfq_log_bfqq(bfqq->bfqd, bfqq,
4047 "resume state: switching off wr");
4048
4049 bfqq->wr_coeff = 1;
4050 }
4051
4052 /* make sure weight will be updated, however we got here */
4053 bfqq->entity.prio_changed = 1;
4054}
4055
4056static int bfqq_process_refs(struct bfq_queue *bfqq)
4057{
4058 return bfqq->ref - bfqq->allocated - bfqq->entity.on_st;
4059}
4060
3840static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) 4061static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
3841{ 4062{
3842 struct bfq_entity *entity = &bfqq->entity; 4063 struct bfq_entity *entity = &bfqq->entity;
@@ -4157,14 +4378,16 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
4157 /* 4378 /*
4158 * bfqq deserves to be weight-raised if: 4379 * bfqq deserves to be weight-raised if:
4159 * - it is sync, 4380 * - it is sync,
4160 * - it has been idle for enough time or is soft real-time. 4381 * - it has been idle for enough time or is soft real-time,
4382 * - is linked to a bfq_io_cq (it is not shared in any sense).
4161 */ 4383 */
4162 soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && 4384 soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
4163 time_is_before_jiffies(bfqq->soft_rt_next_start); 4385 time_is_before_jiffies(bfqq->soft_rt_next_start);
4164 *interactive = idle_for_long_time; 4386 *interactive = idle_for_long_time;
4165 wr_or_deserves_wr = bfqd->low_latency && 4387 wr_or_deserves_wr = bfqd->low_latency &&
4166 (bfqq->wr_coeff > 1 || 4388 (bfqq->wr_coeff > 1 ||
4167 (bfq_bfqq_sync(bfqq) && (*interactive || soft_rt))); 4389 (bfq_bfqq_sync(bfqq) &&
4390 bfqq->bic && (*interactive || soft_rt)));
4168 4391
4169 /* 4392 /*
4170 * Using the last flag, update budget and check whether bfqq 4393 * Using the last flag, update budget and check whether bfqq
@@ -4186,14 +4409,22 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
4186 } 4409 }
4187 4410
4188 if (bfqd->low_latency) { 4411 if (bfqd->low_latency) {
4189 bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq, 4412 if (unlikely(time_is_after_jiffies(bfqq->split_time)))
4190 old_wr_coeff, 4413 /* wraparound */
4191 wr_or_deserves_wr, 4414 bfqq->split_time =
4192 *interactive, 4415 jiffies - bfqd->bfq_wr_min_idle_time - 1;
4193 soft_rt); 4416
4194 4417 if (time_is_before_jiffies(bfqq->split_time +
4195 if (old_wr_coeff != bfqq->wr_coeff) 4418 bfqd->bfq_wr_min_idle_time)) {
4196 bfqq->entity.prio_changed = 1; 4419 bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq,
4420 old_wr_coeff,
4421 wr_or_deserves_wr,
4422 *interactive,
4423 soft_rt);
4424
4425 if (old_wr_coeff != bfqq->wr_coeff)
4426 bfqq->entity.prio_changed = 1;
4427 }
4197 } 4428 }
4198 4429
4199 bfqq->last_idle_bklogged = jiffies; 4430 bfqq->last_idle_bklogged = jiffies;
@@ -4240,6 +4471,12 @@ static void bfq_add_request(struct request *rq)
4240 next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); 4471 next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
4241 bfqq->next_rq = next_rq; 4472 bfqq->next_rq = next_rq;
4242 4473
4474 /*
4475 * Adjust priority tree position, if next_rq changes.
4476 */
4477 if (prev != bfqq->next_rq)
4478 bfq_pos_tree_add_move(bfqd, bfqq);
4479
4243 if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ 4480 if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */
4244 bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff, 4481 bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff,
4245 rq, &interactive); 4482 rq, &interactive);
@@ -4368,6 +4605,14 @@ static void bfq_remove_request(struct request_queue *q,
4368 */ 4605 */
4369 bfqq->entity.budget = bfqq->entity.service = 0; 4606 bfqq->entity.budget = bfqq->entity.service = 0;
4370 } 4607 }
4608
4609 /*
4610 * Remove queue from request-position tree as it is empty.
4611 */
4612 if (bfqq->pos_root) {
4613 rb_erase(&bfqq->pos_node, bfqq->pos_root);
4614 bfqq->pos_root = NULL;
4615 }
4371 } 4616 }
4372 4617
4373 if (rq->cmd_flags & REQ_META) 4618 if (rq->cmd_flags & REQ_META)
@@ -4445,11 +4690,14 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
4445 bfqd->last_position); 4690 bfqd->last_position);
4446 bfqq->next_rq = next_rq; 4691 bfqq->next_rq = next_rq;
4447 /* 4692 /*
4448 * If next_rq changes, update the queue's budget to fit 4693 * If next_rq changes, update both the queue's budget to
4449 * the new request. 4694 * fit the new request and the queue's position in its
4695 * rq_pos_tree.
4450 */ 4696 */
4451 if (prev != bfqq->next_rq) 4697 if (prev != bfqq->next_rq) {
4452 bfq_updated_next_req(bfqd, bfqq); 4698 bfq_updated_next_req(bfqd, bfqq);
4699 bfq_pos_tree_add_move(bfqd, bfqq);
4700 }
4453 } 4701 }
4454} 4702}
4455 4703
@@ -4532,12 +4780,364 @@ static void bfq_end_wr(struct bfq_data *bfqd)
4532 spin_unlock_irq(&bfqd->lock); 4780 spin_unlock_irq(&bfqd->lock);
4533} 4781}
4534 4782
4783static sector_t bfq_io_struct_pos(void *io_struct, bool request)
4784{
4785 if (request)
4786 return blk_rq_pos(io_struct);
4787 else
4788 return ((struct bio *)io_struct)->bi_iter.bi_sector;
4789}
4790
4791static int bfq_rq_close_to_sector(void *io_struct, bool request,
4792 sector_t sector)
4793{
4794 return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
4795 BFQQ_CLOSE_THR;
4796}
4797
4798static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
4799 struct bfq_queue *bfqq,
4800 sector_t sector)
4801{
4802 struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
4803 struct rb_node *parent, *node;
4804 struct bfq_queue *__bfqq;
4805
4806 if (RB_EMPTY_ROOT(root))
4807 return NULL;
4808
4809 /*
4810 * First, if we find a request starting at the end of the last
4811 * request, choose it.
4812 */
4813 __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
4814 if (__bfqq)
4815 return __bfqq;
4816
4817 /*
4818 * If the exact sector wasn't found, the parent of the NULL leaf
4819 * will contain the closest sector (rq_pos_tree sorted by
4820 * next_request position).
4821 */
4822 __bfqq = rb_entry(parent, struct bfq_queue, pos_node);
4823 if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
4824 return __bfqq;
4825
4826 if (blk_rq_pos(__bfqq->next_rq) < sector)
4827 node = rb_next(&__bfqq->pos_node);
4828 else
4829 node = rb_prev(&__bfqq->pos_node);
4830 if (!node)
4831 return NULL;
4832
4833 __bfqq = rb_entry(node, struct bfq_queue, pos_node);
4834 if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
4835 return __bfqq;
4836
4837 return NULL;
4838}
4839
4840static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd,
4841 struct bfq_queue *cur_bfqq,
4842 sector_t sector)
4843{
4844 struct bfq_queue *bfqq;
4845
4846 /*
4847 * We shall notice if some of the queues are cooperating,
4848 * e.g., working closely on the same area of the device. In
4849 * that case, we can group them together and: 1) don't waste
4850 * time idling, and 2) serve the union of their requests in
4851 * the best possible order for throughput.
4852 */
4853 bfqq = bfqq_find_close(bfqd, cur_bfqq, sector);
4854 if (!bfqq || bfqq == cur_bfqq)
4855 return NULL;
4856
4857 return bfqq;
4858}
4859
4860static struct bfq_queue *
4861bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
4862{
4863 int process_refs, new_process_refs;
4864 struct bfq_queue *__bfqq;
4865
4866 /*
4867 * If there are no process references on the new_bfqq, then it is
4868 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
4869 * may have dropped their last reference (not just their last process
4870 * reference).
4871 */
4872 if (!bfqq_process_refs(new_bfqq))
4873 return NULL;
4874
4875 /* Avoid a circular list and skip interim queue merges. */
4876 while ((__bfqq = new_bfqq->new_bfqq)) {
4877 if (__bfqq == bfqq)
4878 return NULL;
4879 new_bfqq = __bfqq;
4880 }
4881
4882 process_refs = bfqq_process_refs(bfqq);
4883 new_process_refs = bfqq_process_refs(new_bfqq);
4884 /*
4885 * If the process for the bfqq has gone away, there is no
4886 * sense in merging the queues.
4887 */
4888 if (process_refs == 0 || new_process_refs == 0)
4889 return NULL;
4890
4891 bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
4892 new_bfqq->pid);
4893
4894 /*
4895 * Merging is just a redirection: the requests of the process
4896 * owning one of the two queues are redirected to the other queue.
4897 * The latter queue, in its turn, is set as shared if this is the
4898 * first time that the requests of some process are redirected to
4899 * it.
4900 *
4901 * We redirect bfqq to new_bfqq and not the opposite, because we
4902 * are in the context of the process owning bfqq, hence we have
4903 * the io_cq of this process. So we can immediately configure this
4904 * io_cq to redirect the requests of the process to new_bfqq.
4905 *
4906 * NOTE, even if new_bfqq coincides with the in-service queue, the
4907 * io_cq of new_bfqq is not available, because, if the in-service
4908 * queue is shared, bfqd->in_service_bic may not point to the
4909 * io_cq of the in-service queue.
4910 * Redirecting the requests of the process owning bfqq to the
4911 * currently in-service queue is in any case the best option, as
4912 * we feed the in-service queue with new requests close to the
4913 * last request served and, by doing so, hopefully increase the
4914 * throughput.
4915 */
4916 bfqq->new_bfqq = new_bfqq;
4917 new_bfqq->ref += process_refs;
4918 return new_bfqq;
4919}
4920
4921static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
4922 struct bfq_queue *new_bfqq)
4923{
4924 if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
4925 (bfqq->ioprio_class != new_bfqq->ioprio_class))
4926 return false;
4927
4928 /*
4929 * If either of the queues has already been detected as seeky,
4930 * then merging it with the other queue is unlikely to lead to
4931 * sequential I/O.
4932 */
4933 if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq))
4934 return false;
4935
4936 /*
4937 * Interleaved I/O is known to be done by (some) applications
4938 * only for reads, so it does not make sense to merge async
4939 * queues.
4940 */
4941 if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq))
4942 return false;
4943
4944 return true;
4945}
4946
4947/*
4948 * If this function returns true, then bfqq cannot be merged. The idea
4949 * is that true cooperation happens very early after processes start
4950 * to do I/O. Usually, late cooperations are just accidental false
4951 * positives. In case bfqq is weight-raised, such false positives
4952 * would evidently degrade latency guarantees for bfqq.
4953 */
4954static bool wr_from_too_long(struct bfq_queue *bfqq)
4955{
4956 return bfqq->wr_coeff > 1 &&
4957 time_is_before_jiffies(bfqq->last_wr_start_finish +
4958 msecs_to_jiffies(100));
4959}
4960
4961/*
4962 * Attempt to schedule a merge of bfqq with the currently in-service
4963 * queue or with a close queue among the scheduled queues. Return
4964 * NULL if no merge was scheduled, a pointer to the shared bfq_queue
4965 * structure otherwise.
4966 *
4967 * The OOM queue is not allowed to participate to cooperation: in fact, since
4968 * the requests temporarily redirected to the OOM queue could be redirected
4969 * again to dedicated queues at any time, the state needed to correctly
4970 * handle merging with the OOM queue would be quite complex and expensive
4971 * to maintain. Besides, in such a critical condition as an out of memory,
4972 * the benefits of queue merging may be little relevant, or even negligible.
4973 *
4974 * Weight-raised queues can be merged only if their weight-raising
4975 * period has just started. In fact cooperating processes are usually
4976 * started together. Thus, with this filter we avoid false positives
4977 * that would jeopardize low-latency guarantees.
4978 *
4979 * WARNING: queue merging may impair fairness among non-weight raised
4980 * queues, for at least two reasons: 1) the original weight of a
4981 * merged queue may change during the merged state, 2) even being the
4982 * weight the same, a merged queue may be bloated with many more
4983 * requests than the ones produced by its originally-associated
4984 * process.
4985 */
4986static struct bfq_queue *
4987bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
4988 void *io_struct, bool request)
4989{
4990 struct bfq_queue *in_service_bfqq, *new_bfqq;
4991
4992 if (bfqq->new_bfqq)
4993 return bfqq->new_bfqq;
4994
4995 if (!io_struct ||
4996 wr_from_too_long(bfqq) ||
4997 unlikely(bfqq == &bfqd->oom_bfqq))
4998 return NULL;
4999
5000 /* If there is only one backlogged queue, don't search. */
5001 if (bfqd->busy_queues == 1)
5002 return NULL;
5003
5004 in_service_bfqq = bfqd->in_service_queue;
5005
5006 if (!in_service_bfqq || in_service_bfqq == bfqq ||
5007 !bfqd->in_service_bic || wr_from_too_long(in_service_bfqq) ||
5008 unlikely(in_service_bfqq == &bfqd->oom_bfqq))
5009 goto check_scheduled;
5010
5011 if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
5012 bfqq->entity.parent == in_service_bfqq->entity.parent &&
5013 bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
5014 new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
5015 if (new_bfqq)
5016 return new_bfqq;
5017 }
5018 /*
5019 * Check whether there is a cooperator among currently scheduled
5020 * queues. The only thing we need is that the bio/request is not
5021 * NULL, as we need it to establish whether a cooperator exists.
5022 */
5023check_scheduled:
5024 new_bfqq = bfq_find_close_cooperator(bfqd, bfqq,
5025 bfq_io_struct_pos(io_struct, request));
5026
5027 if (new_bfqq && !wr_from_too_long(new_bfqq) &&
5028 likely(new_bfqq != &bfqd->oom_bfqq) &&
5029 bfq_may_be_close_cooperator(bfqq, new_bfqq))
5030 return bfq_setup_merge(bfqq, new_bfqq);
5031
5032 return NULL;
5033}
5034
5035static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
5036{
5037 struct bfq_io_cq *bic = bfqq->bic;
5038
5039 /*
5040 * If !bfqq->bic, the queue is already shared or its requests
5041 * have already been redirected to a shared queue; both idle window
5042 * and weight raising state have already been saved. Do nothing.
5043 */
5044 if (!bic)
5045 return;
5046
5047 bic->saved_ttime = bfqq->ttime;
5048 bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
5049 bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
5050 bic->saved_wr_coeff = bfqq->wr_coeff;
5051 bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
5052 bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
5053 bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
5054}
5055
5056static void bfq_get_bic_reference(struct bfq_queue *bfqq)
5057{
5058 /*
5059 * If bfqq->bic has a non-NULL value, the bic to which it belongs
5060 * is about to begin using a shared bfq_queue.
5061 */
5062 if (bfqq->bic)
5063 atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
5064}
5065
5066static void
5067bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
5068 struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
5069{
5070 bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
5071 (unsigned long)new_bfqq->pid);
5072 /* Save weight raising and idle window of the merged queues */
5073 bfq_bfqq_save_state(bfqq);
5074 bfq_bfqq_save_state(new_bfqq);
5075 if (bfq_bfqq_IO_bound(bfqq))
5076 bfq_mark_bfqq_IO_bound(new_bfqq);
5077 bfq_clear_bfqq_IO_bound(bfqq);
5078
5079 /*
5080 * If bfqq is weight-raised, then let new_bfqq inherit
5081 * weight-raising. To reduce false positives, neglect the case
5082 * where bfqq has just been created, but has not yet made it
5083 * to be weight-raised (which may happen because EQM may merge
5084 * bfqq even before bfq_add_request is executed for the first
5085 * time for bfqq).
5086 */
5087 if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
5088 new_bfqq->wr_coeff = bfqq->wr_coeff;
5089 new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
5090 new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
5091 new_bfqq->wr_start_at_switch_to_srt =
5092 bfqq->wr_start_at_switch_to_srt;
5093 if (bfq_bfqq_busy(new_bfqq))
5094 bfqd->wr_busy_queues++;
5095 new_bfqq->entity.prio_changed = 1;
5096 }
5097
5098 if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */
5099 bfqq->wr_coeff = 1;
5100 bfqq->entity.prio_changed = 1;
5101 if (bfq_bfqq_busy(bfqq))
5102 bfqd->wr_busy_queues--;
5103 }
5104
5105 bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d",
5106 bfqd->wr_busy_queues);
5107
5108 /*
5109 * Grab a reference to the bic, to prevent it from being destroyed
5110 * before being possibly touched by a bfq_split_bfqq().
5111 */
5112 bfq_get_bic_reference(bfqq);
5113 bfq_get_bic_reference(new_bfqq);
5114 /*
5115 * Merge queues (that is, let bic redirect its requests to new_bfqq)
5116 */
5117 bic_set_bfqq(bic, new_bfqq, 1);
5118 bfq_mark_bfqq_coop(new_bfqq);
5119 /*
5120 * new_bfqq now belongs to at least two bics (it is a shared queue):
5121 * set new_bfqq->bic to NULL. bfqq either:
5122 * - does not belong to any bic any more, and hence bfqq->bic must
5123 * be set to NULL, or
5124 * - is a queue whose owning bics have already been redirected to a
5125 * different queue, hence the queue is destined to not belong to
5126 * any bic soon and bfqq->bic is already NULL (therefore the next
5127 * assignment causes no harm).
5128 */
5129 new_bfqq->bic = NULL;
5130 bfqq->bic = NULL;
5131 /* release process reference to bfqq */
5132 bfq_put_queue(bfqq);
5133}
5134
4535static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, 5135static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
4536 struct bio *bio) 5136 struct bio *bio)
4537{ 5137{
4538 struct bfq_data *bfqd = q->elevator->elevator_data; 5138 struct bfq_data *bfqd = q->elevator->elevator_data;
4539 bool is_sync = op_is_sync(bio->bi_opf); 5139 bool is_sync = op_is_sync(bio->bi_opf);
4540 struct bfq_queue *bfqq = bfqd->bio_bfqq; 5140 struct bfq_queue *bfqq = bfqd->bio_bfqq, *new_bfqq;
4541 5141
4542 /* 5142 /*
4543 * Disallow merge of a sync bio into an async request. 5143 * Disallow merge of a sync bio into an async request.
@@ -4552,6 +5152,37 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
4552 if (!bfqq) 5152 if (!bfqq)
4553 return false; 5153 return false;
4554 5154
5155 /*
5156 * We take advantage of this function to perform an early merge
5157 * of the queues of possible cooperating processes.
5158 */
5159 new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
5160 if (new_bfqq) {
5161 /*
5162 * bic still points to bfqq, then it has not yet been
5163 * redirected to some other bfq_queue, and a queue
5164 * merge beween bfqq and new_bfqq can be safely
5165 * fulfillled, i.e., bic can be redirected to new_bfqq
5166 * and bfqq can be put.
5167 */
5168 bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
5169 new_bfqq);
5170 /*
5171 * If we get here, bio will be queued into new_queue,
5172 * so use new_bfqq to decide whether bio and rq can be
5173 * merged.
5174 */
5175 bfqq = new_bfqq;
5176
5177 /*
5178 * Change also bqfd->bio_bfqq, as
5179 * bfqd->bio_bic now points to new_bfqq, and
5180 * this function may be invoked again (and then may
5181 * use again bqfd->bio_bfqq).
5182 */
5183 bfqd->bio_bfqq = bfqq;
5184 }
5185
4555 return bfqq == RQ_BFQQ(rq); 5186 return bfqq == RQ_BFQQ(rq);
4556} 5187}
4557 5188
@@ -4959,6 +5590,15 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
4959 5590
4960static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) 5591static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
4961{ 5592{
5593 /*
5594 * If this bfqq is shared between multiple processes, check
5595 * to make sure that those processes are still issuing I/Os
5596 * within the mean seek distance. If not, it may be time to
5597 * break the queues apart again.
5598 */
5599 if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
5600 bfq_mark_bfqq_split_coop(bfqq);
5601
4962 if (RB_EMPTY_ROOT(&bfqq->sort_list)) { 5602 if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
4963 if (bfqq->dispatched == 0) 5603 if (bfqq->dispatched == 0)
4964 /* 5604 /*
@@ -4970,8 +5610,13 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
4970 bfqq->budget_timeout = jiffies; 5610 bfqq->budget_timeout = jiffies;
4971 5611
4972 bfq_del_bfqq_busy(bfqd, bfqq, true); 5612 bfq_del_bfqq_busy(bfqd, bfqq, true);
4973 } else 5613 } else {
4974 bfq_requeue_bfqq(bfqd, bfqq); 5614 bfq_requeue_bfqq(bfqd, bfqq);
5615 /*
5616 * Resort priority tree of potential close cooperators.
5617 */
5618 bfq_pos_tree_add_move(bfqd, bfqq);
5619 }
4975 5620
4976 /* 5621 /*
4977 * All in-service entities must have been properly deactivated 5622 * All in-service entities must have been properly deactivated
@@ -5792,8 +6437,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
5792 6437
5793 /* 6438 /*
5794 * If too much time has elapsed from the beginning of 6439 * If too much time has elapsed from the beginning of
5795 * this weight-raising period, then end weight 6440 * this weight-raising period, then end weight raising.
5796 * raising.
5797 */ 6441 */
5798 if (time_is_before_jiffies(bfqq->last_wr_start_finish + 6442 if (time_is_before_jiffies(bfqq->last_wr_start_finish +
5799 bfqq->wr_cur_max_time)) { 6443 bfqq->wr_cur_max_time)) {
@@ -5969,8 +6613,9 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
5969 struct request *rq; 6613 struct request *rq;
5970 6614
5971 spin_lock_irq(&bfqd->lock); 6615 spin_lock_irq(&bfqd->lock);
6616
5972 rq = __bfq_dispatch_request(hctx); 6617 rq = __bfq_dispatch_request(hctx);
5973 spin_unlock_irq(&bfqd->lock); 6618 bfq_unlock_put_ioc(bfqd);
5974 6619
5975 return rq; 6620 return rq;
5976} 6621}
@@ -6004,6 +6649,25 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
6004#endif 6649#endif
6005} 6650}
6006 6651
6652static void bfq_put_cooperator(struct bfq_queue *bfqq)
6653{
6654 struct bfq_queue *__bfqq, *next;
6655
6656 /*
6657 * If this queue was scheduled to merge with another queue, be
6658 * sure to drop the reference taken on that queue (and others in
6659 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
6660 */
6661 __bfqq = bfqq->new_bfqq;
6662 while (__bfqq) {
6663 if (__bfqq == bfqq)
6664 break;
6665 next = __bfqq->new_bfqq;
6666 bfq_put_queue(__bfqq);
6667 __bfqq = next;
6668 }
6669}
6670
6007static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) 6671static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
6008{ 6672{
6009 if (bfqq == bfqd->in_service_queue) { 6673 if (bfqq == bfqd->in_service_queue) {
@@ -6013,6 +6677,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
6013 6677
6014 bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); 6678 bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);
6015 6679
6680 bfq_put_cooperator(bfqq);
6681
6016 bfq_put_queue(bfqq); /* release process reference */ 6682 bfq_put_queue(bfqq); /* release process reference */
6017} 6683}
6018 6684
@@ -6028,9 +6694,20 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
6028 unsigned long flags; 6694 unsigned long flags;
6029 6695
6030 spin_lock_irqsave(&bfqd->lock, flags); 6696 spin_lock_irqsave(&bfqd->lock, flags);
6697 /*
6698 * If the bic is using a shared queue, put the
6699 * reference taken on the io_context when the bic
6700 * started using a shared bfq_queue. This put cannot
6701 * make ioc->ref_count reach 0, then no ioc->lock
6702 * risks to be taken (leading to possible deadlock
6703 * scenarios).
6704 */
6705 if (is_sync && bfq_bfqq_coop(bfqq))
6706 put_io_context(bic->icq.ioc);
6707
6031 bfq_exit_bfqq(bfqd, bfqq); 6708 bfq_exit_bfqq(bfqd, bfqq);
6032 bic_set_bfqq(bic, NULL, is_sync); 6709 bic_set_bfqq(bic, NULL, is_sync);
6033 spin_unlock_irq(&bfqd->lock); 6710 bfq_unlock_put_ioc_restore(bfqd, flags);
6034 } 6711 }
6035} 6712}
6036 6713
@@ -6152,8 +6829,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
6152 bfqq->budget_timeout = bfq_smallest_from_now(); 6829 bfqq->budget_timeout = bfq_smallest_from_now();
6153 6830
6154 bfqq->wr_coeff = 1; 6831 bfqq->wr_coeff = 1;
6155 bfqq->last_wr_start_finish = bfq_smallest_from_now(); 6832 bfqq->last_wr_start_finish = jiffies;
6156 bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now(); 6833 bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now();
6834 bfqq->split_time = bfq_smallest_from_now();
6157 6835
6158 /* 6836 /*
6159 * Set to the value for which bfqq will not be deemed as 6837 * Set to the value for which bfqq will not be deemed as
@@ -6288,6 +6966,11 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
6288 if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) 6966 if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
6289 return; 6967 return;
6290 6968
6969 /* Idle window just restored, statistics are meaningless. */
6970 if (time_is_after_eq_jiffies(bfqq->split_time +
6971 bfqd->bfq_wr_min_idle_time))
6972 return;
6973
6291 enable_idle = bfq_bfqq_idle_window(bfqq); 6974 enable_idle = bfq_bfqq_idle_window(bfqq);
6292 6975
6293 if (atomic_read(&bic->icq.ioc->active_ref) == 0 || 6976 if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
@@ -6383,7 +7066,38 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
6383 7066
6384static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) 7067static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
6385{ 7068{
6386 struct bfq_queue *bfqq = RQ_BFQQ(rq); 7069 struct bfq_queue *bfqq = RQ_BFQQ(rq),
7070 *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
7071
7072 if (new_bfqq) {
7073 if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
7074 new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
7075 /*
7076 * Release the request's reference to the old bfqq
7077 * and make sure one is taken to the shared queue.
7078 */
7079 new_bfqq->allocated++;
7080 bfqq->allocated--;
7081 new_bfqq->ref++;
7082 /*
7083 * If the bic associated with the process
7084 * issuing this request still points to bfqq
7085 * (and thus has not been already redirected
7086 * to new_bfqq or even some other bfq_queue),
7087 * then complete the merge and redirect it to
7088 * new_bfqq.
7089 */
7090 if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
7091 bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
7092 bfqq, new_bfqq);
7093 /*
7094 * rq is about to be enqueued into new_bfqq,
7095 * release rq reference on bfqq
7096 */
7097 bfq_put_queue(bfqq);
7098 rq->elv.priv[1] = new_bfqq;
7099 bfqq = new_bfqq;
7100 }
6387 7101
6388 bfq_add_request(rq); 7102 bfq_add_request(rq);
6389 7103
@@ -6425,7 +7139,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
6425 } 7139 }
6426 } 7140 }
6427 7141
6428 spin_unlock_irq(&bfqd->lock); 7142 bfq_unlock_put_ioc(bfqd);
6429} 7143}
6430 7144
6431static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, 7145static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
@@ -6576,7 +7290,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
6576 bfq_completed_request(bfqq, bfqd); 7290 bfq_completed_request(bfqq, bfqd);
6577 bfq_put_rq_priv_body(bfqq); 7291 bfq_put_rq_priv_body(bfqq);
6578 7292
6579 spin_unlock_irqrestore(&bfqd->lock, flags); 7293 bfq_unlock_put_ioc_restore(bfqd, flags);
6580 } else { 7294 } else {
6581 /* 7295 /*
6582 * Request rq may be still/already in the scheduler, 7296 * Request rq may be still/already in the scheduler,
@@ -6600,6 +7314,55 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
6600} 7314}
6601 7315
6602/* 7316/*
7317 * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
7318 * was the last process referring to that bfqq.
7319 */
7320static struct bfq_queue *
7321bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
7322{
7323 bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
7324
7325 if (bfqq_process_refs(bfqq) == 1) {
7326 bfqq->pid = current->pid;
7327 bfq_clear_bfqq_coop(bfqq);
7328 bfq_clear_bfqq_split_coop(bfqq);
7329 return bfqq;
7330 }
7331
7332 bic_set_bfqq(bic, NULL, 1);
7333
7334 bfq_put_cooperator(bfqq);
7335
7336 bfq_put_queue(bfqq);
7337 return NULL;
7338}
7339
7340static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
7341 struct bfq_io_cq *bic,
7342 struct bio *bio,
7343 bool split, bool is_sync,
7344 bool *new_queue)
7345{
7346 struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
7347
7348 if (likely(bfqq && bfqq != &bfqd->oom_bfqq))
7349 return bfqq;
7350
7351 if (new_queue)
7352 *new_queue = true;
7353
7354 if (bfqq)
7355 bfq_put_queue(bfqq);
7356 bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
7357
7358 bic_set_bfqq(bic, bfqq, is_sync);
7359 if (split && is_sync)
7360 bfqq->split_time = jiffies;
7361
7362 return bfqq;
7363}
7364
7365/*
6603 * Allocate bfq data structures associated with this request. 7366 * Allocate bfq data structures associated with this request.
6604 */ 7367 */
6605static int bfq_get_rq_private(struct request_queue *q, struct request *rq, 7368static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
@@ -6609,6 +7372,7 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
6609 struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); 7372 struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
6610 const int is_sync = rq_is_sync(rq); 7373 const int is_sync = rq_is_sync(rq);
6611 struct bfq_queue *bfqq; 7374 struct bfq_queue *bfqq;
7375 bool new_queue = false;
6612 7376
6613 spin_lock_irq(&bfqd->lock); 7377 spin_lock_irq(&bfqd->lock);
6614 7378
@@ -6619,12 +7383,28 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
6619 7383
6620 bfq_bic_update_cgroup(bic, bio); 7384 bfq_bic_update_cgroup(bic, bio);
6621 7385
6622 bfqq = bic_to_bfqq(bic, is_sync); 7386 bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync,
6623 if (!bfqq || bfqq == &bfqd->oom_bfqq) { 7387 &new_queue);
6624 if (bfqq) 7388
6625 bfq_put_queue(bfqq); 7389 if (likely(!new_queue)) {
6626 bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); 7390 /* If the queue was seeky for too long, break it apart. */
6627 bic_set_bfqq(bic, bfqq, is_sync); 7391 if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
7392 bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
7393 bfqq = bfq_split_bfqq(bic, bfqq);
7394 /*
7395 * A reference to bic->icq.ioc needs to be
7396 * released after a queue split. Do not do it
7397 * immediately, to not risk to possibly take
7398 * an ioc->lock while holding the scheduler
7399 * lock.
7400 */
7401 bfqd->ioc_to_put = bic->icq.ioc;
7402
7403 if (!bfqq)
7404 bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
7405 true, is_sync,
7406 NULL);
7407 }
6628 } 7408 }
6629 7409
6630 bfqq->allocated++; 7410 bfqq->allocated++;
@@ -6635,7 +7415,25 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
6635 rq->elv.priv[0] = bic; 7415 rq->elv.priv[0] = bic;
6636 rq->elv.priv[1] = bfqq; 7416 rq->elv.priv[1] = bfqq;
6637 7417
6638 spin_unlock_irq(&bfqd->lock); 7418 /*
7419 * If a bfq_queue has only one process reference, it is owned
7420 * by only this bic: we can then set bfqq->bic = bic. in
7421 * addition, if the queue has also just been split, we have to
7422 * resume its state.
7423 */
7424 if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
7425 bfqq->bic = bic;
7426 if (bfqd->ioc_to_put) { /* if true, there has been a split */
7427 /*
7428 * The queue has just been split from a shared
7429 * queue: restore the idle window and the
7430 * possible weight raising period.
7431 */
7432 bfq_bfqq_resume_state(bfqq, bic);
7433 }
7434 }
7435
7436 bfq_unlock_put_ioc(bfqd);
6639 7437
6640 return 0; 7438 return 0;
6641 7439
@@ -6680,7 +7478,7 @@ static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
6680 bfq_bfqq_expire(bfqd, bfqq, true, reason); 7478 bfq_bfqq_expire(bfqd, bfqq, true, reason);
6681 7479
6682schedule_dispatch: 7480schedule_dispatch:
6683 spin_unlock_irqrestore(&bfqd->lock, flags); 7481 bfq_unlock_put_ioc_restore(bfqd, flags);
6684 bfq_schedule_dispatch(bfqd); 7482 bfq_schedule_dispatch(bfqd);
6685} 7483}
6686 7484
@@ -6777,6 +7575,7 @@ static void bfq_init_root_group(struct bfq_group *root_group,
6777 root_group->my_entity = NULL; 7575 root_group->my_entity = NULL;
6778 root_group->bfqd = bfqd; 7576 root_group->bfqd = bfqd;
6779#endif 7577#endif
7578 root_group->rq_pos_tree = RB_ROOT;
6780 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) 7579 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
6781 root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; 7580 root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
6782 root_group->sched_data.bfq_class_idle_last_service = jiffies; 7581 root_group->sched_data.bfq_class_idle_last_service = jiffies;