aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/bfq-iosched.c76
-rw-r--r--block/bfq-iosched.h51
-rw-r--r--block/bfq-wf2q.c5
3 files changed, 107 insertions, 25 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3a27d31fcda6..97337214bec4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -638,7 +638,7 @@ static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
638 bfqd->queue_weights_tree.rb_node->rb_right) 638 bfqd->queue_weights_tree.rb_node->rb_right)
639#ifdef CONFIG_BFQ_GROUP_IOSCHED 639#ifdef CONFIG_BFQ_GROUP_IOSCHED
640 ) || 640 ) ||
641 (bfqd->num_active_groups > 0 641 (bfqd->num_groups_with_pending_reqs > 0
642#endif 642#endif
643 ); 643 );
644} 644}
@@ -802,7 +802,21 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
802 */ 802 */
803 break; 803 break;
804 } 804 }
805 bfqd->num_active_groups--; 805
806 /*
807 * The decrement of num_groups_with_pending_reqs is
808 * not performed immediately upon the deactivation of
809 * entity, but it is delayed to when it also happens
810 * that the first leaf descendant bfqq of entity gets
811 * all its pending requests completed. The following
812 * instructions perform this delayed decrement, if
813 * needed. See the comments on
814 * num_groups_with_pending_reqs for details.
815 */
816 if (entity->in_groups_with_pending_reqs) {
817 entity->in_groups_with_pending_reqs = false;
818 bfqd->num_groups_with_pending_reqs--;
819 }
806 } 820 }
807} 821}
808 822
@@ -3529,27 +3543,44 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
3529 * fact, if there are active groups, then, for condition (i) 3543 * fact, if there are active groups, then, for condition (i)
3530 * to become false, it is enough that an active group contains 3544 * to become false, it is enough that an active group contains
3531 * more active processes or sub-groups than some other active 3545 * more active processes or sub-groups than some other active
3532 * group. We address this issue with the following bi-modal 3546 * group. More precisely, for condition (i) to hold because of
3533 * behavior, implemented in the function 3547 * such a group, it is not even necessary that the group is
3548 * (still) active: it is sufficient that, even if the group
3549 * has become inactive, some of its descendant processes still
3550 * have some request already dispatched but still waiting for
3551 * completion. In fact, requests have still to be guaranteed
3552 * their share of the throughput even after being
3553 * dispatched. In this respect, it is easy to show that, if a
3554 * group frequently becomes inactive while still having
3555 * in-flight requests, and if, when this happens, the group is
3556 * not considered in the calculation of whether the scenario
3557 * is asymmetric, then the group may fail to be guaranteed its
3558 * fair share of the throughput (basically because idling may
3559 * not be performed for the descendant processes of the group,
3560 * but it had to be). We address this issue with the
3561 * following bi-modal behavior, implemented in the function
3534 * bfq_symmetric_scenario(). 3562 * bfq_symmetric_scenario().
3535 * 3563 *
3536 * If there are active groups, then the scenario is tagged as 3564 * If there are groups with requests waiting for completion
3565 * (as commented above, some of these groups may even be
3566 * already inactive), then the scenario is tagged as
3537 * asymmetric, conservatively, without checking any of the 3567 * asymmetric, conservatively, without checking any of the
3538 * conditions (i) and (ii). So the device is idled for bfqq. 3568 * conditions (i) and (ii). So the device is idled for bfqq.
3539 * This behavior matches also the fact that groups are created 3569 * This behavior matches also the fact that groups are created
3540 * exactly if controlling I/O (to preserve bandwidth and 3570 * exactly if controlling I/O is a primary concern (to
3541 * latency guarantees) is a primary concern. 3571 * preserve bandwidth and latency guarantees).
3542 * 3572 *
3543 * On the opposite end, if there are no active groups, then 3573 * On the opposite end, if there are no groups with requests
3544 * only condition (i) is actually controlled, i.e., provided 3574 * waiting for completion, then only condition (i) is actually
3545 * that condition (i) holds, idling is not performed, 3575 * controlled, i.e., provided that condition (i) holds, idling
3546 * regardless of whether condition (ii) holds. In other words, 3576 * is not performed, regardless of whether condition (ii)
3547 * only if condition (i) does not hold, then idling is 3577 * holds. In other words, only if condition (i) does not hold,
3548 * allowed, and the device tends to be prevented from queueing 3578 * then idling is allowed, and the device tends to be
3549 * many requests, possibly of several processes. Since there 3579 * prevented from queueing many requests, possibly of several
3550 * are no active groups, then, to control condition (i) it is 3580 * processes. Since there are no groups with requests waiting
3551 * enough to check whether all active queues have the same 3581 * for completion, then, to control condition (i) it is enough
3552 * weight. 3582 * to check just whether all the queues with requests waiting
3583 * for completion also have the same weight.
3553 * 3584 *
3554 * Not checking condition (ii) evidently exposes bfqq to the 3585 * Not checking condition (ii) evidently exposes bfqq to the
3555 * risk of getting less throughput than its fair share. 3586 * risk of getting less throughput than its fair share.
@@ -3607,10 +3638,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
3607 * bfqq is weight-raised is checked explicitly here. More 3638 * bfqq is weight-raised is checked explicitly here. More
3608 * precisely, the compound condition below takes into account 3639 * precisely, the compound condition below takes into account
3609 * also the fact that, even if bfqq is being weight-raised, 3640 * also the fact that, even if bfqq is being weight-raised,
3610 * the scenario is still symmetric if all active queues happen 3641 * the scenario is still symmetric if all queues with requests
3611 * to be weight-raised. Actually, we should be even more 3642 * waiting for completion happen to be
3612 * precise here, and differentiate between interactive weight 3643 * weight-raised. Actually, we should be even more precise
3613 * raising and soft real-time weight raising. 3644 * here, and differentiate between interactive weight raising
3645 * and soft real-time weight raising.
3614 * 3646 *
3615 * As a side note, it is worth considering that the above 3647 * As a side note, it is worth considering that the above
3616 * device-idling countermeasures may however fail in the 3648 * device-idling countermeasures may however fail in the
@@ -5417,7 +5449,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
5417 bfqd->idle_slice_timer.function = bfq_idle_slice_timer; 5449 bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
5418 5450
5419 bfqd->queue_weights_tree = RB_ROOT; 5451 bfqd->queue_weights_tree = RB_ROOT;
5420 bfqd->num_active_groups = 0; 5452 bfqd->num_groups_with_pending_reqs = 0;
5421 5453
5422 INIT_LIST_HEAD(&bfqd->active_list); 5454 INIT_LIST_HEAD(&bfqd->active_list);
5423 INIT_LIST_HEAD(&bfqd->idle_list); 5455 INIT_LIST_HEAD(&bfqd->idle_list);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 77651d817ecd..0b02bf302de0 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -196,6 +196,9 @@ struct bfq_entity {
196 196
197 /* flag, set to request a weight, ioprio or ioprio_class change */ 197 /* flag, set to request a weight, ioprio or ioprio_class change */
198 int prio_changed; 198 int prio_changed;
199
200 /* flag, set if the entity is counted in groups_with_pending_reqs */
201 bool in_groups_with_pending_reqs;
199}; 202};
200 203
201struct bfq_group; 204struct bfq_group;
@@ -448,10 +451,54 @@ struct bfq_data {
448 * bfq_weights_tree_[add|remove] for further details). 451 * bfq_weights_tree_[add|remove] for further details).
449 */ 452 */
450 struct rb_root queue_weights_tree; 453 struct rb_root queue_weights_tree;
454
451 /* 455 /*
452 * number of groups with requests still waiting for completion 456 * Number of groups with at least one descendant process that
457 * has at least one request waiting for completion. Note that
458 * this accounts for also requests already dispatched, but not
459 * yet completed. Therefore this number of groups may differ
460 * (be larger) than the number of active groups, as a group is
461 * considered active only if its corresponding entity has
462 * descendant queues with at least one request queued. This
463 * number is used to decide whether a scenario is symmetric.
464 * For a detailed explanation see comments on the computation
465 * of the variable asymmetric_scenario in the function
466 * bfq_better_to_idle().
467 *
468 * However, it is hard to compute this number exactly, for
469 * groups with multiple descendant processes. Consider a group
470 * that is inactive, i.e., that has no descendant process with
471 * pending I/O inside BFQ queues. Then suppose that
472 * num_groups_with_pending_reqs is still accounting for this
473 * group, because the group has descendant processes with some
474 * I/O request still in flight. num_groups_with_pending_reqs
475 * should be decremented when the in-flight request of the
476 * last descendant process is finally completed (assuming that
477 * nothing else has changed for the group in the meantime, in
478 * terms of composition of the group and active/inactive state of child
479 * groups and processes). To accomplish this, an additional
480 * pending-request counter must be added to entities, and must
481 * be updated correctly. To avoid this additional field and operations,
482 * we resort to the following tradeoff between simplicity and
483 * accuracy: for an inactive group that is still counted in
484 * num_groups_with_pending_reqs, we decrement
485 * num_groups_with_pending_reqs when the first descendant
486 * process of the group remains with no request waiting for
487 * completion.
488 *
489 * Even this simpler decrement strategy requires a little
490 * carefulness: to avoid multiple decrements, we flag a group,
491 * more precisely an entity representing a group, as still
492 * counted in num_groups_with_pending_reqs when it becomes
493 * inactive. Then, when the first descendant queue of the
494 * entity remains with no request waiting for completion,
495 * num_groups_with_pending_reqs is decremented, and this flag
496 * is reset. After this flag is reset for the entity,
497 * num_groups_with_pending_reqs won't be decremented any
498 * longer in case a new descendant queue of the entity remains
499 * with no request waiting for completion.
453 */ 500 */
454 unsigned int num_active_groups; 501 unsigned int num_groups_with_pending_reqs;
455 502
456 /* 503 /*
457 * Number of bfq_queues containing requests (including the 504 * Number of bfq_queues containing requests (including the
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 4b0d5fb69160..63e0f12be7c9 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1012,7 +1012,10 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
1012 container_of(entity, struct bfq_group, entity); 1012 container_of(entity, struct bfq_group, entity);
1013 struct bfq_data *bfqd = bfqg->bfqd; 1013 struct bfq_data *bfqd = bfqg->bfqd;
1014 1014
1015 bfqd->num_active_groups++; 1015 if (!entity->in_groups_with_pending_reqs) {
1016 entity->in_groups_with_pending_reqs = true;
1017 bfqd->num_groups_with_pending_reqs++;
1018 }
1016 } 1019 }
1017#endif 1020#endif
1018 1021