diff options
-rw-r--r-- | block/bfq-iosched.c | 76 | ||||
-rw-r--r-- | block/bfq-iosched.h | 51 | ||||
-rw-r--r-- | block/bfq-wf2q.c | 5 |
3 files changed, 107 insertions, 25 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3a27d31fcda6..97337214bec4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -638,7 +638,7 @@ static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) | |||
638 | bfqd->queue_weights_tree.rb_node->rb_right) | 638 | bfqd->queue_weights_tree.rb_node->rb_right) |
639 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 639 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
640 | ) || | 640 | ) || |
641 | (bfqd->num_active_groups > 0 | 641 | (bfqd->num_groups_with_pending_reqs > 0 |
642 | #endif | 642 | #endif |
643 | ); | 643 | ); |
644 | } | 644 | } |
@@ -802,7 +802,21 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, | |||
802 | */ | 802 | */ |
803 | break; | 803 | break; |
804 | } | 804 | } |
805 | bfqd->num_active_groups--; | 805 | |
806 | /* | ||
807 | * The decrement of num_groups_with_pending_reqs is | ||
808 | * not performed immediately upon the deactivation of | ||
809 | * entity, but it is delayed to when it also happens | ||
810 | * that the first leaf descendant bfqq of entity gets | ||
811 | * all its pending requests completed. The following | ||
812 | * instructions perform this delayed decrement, if | ||
813 | * needed. See the comments on | ||
814 | * num_groups_with_pending_reqs for details. | ||
815 | */ | ||
816 | if (entity->in_groups_with_pending_reqs) { | ||
817 | entity->in_groups_with_pending_reqs = false; | ||
818 | bfqd->num_groups_with_pending_reqs--; | ||
819 | } | ||
806 | } | 820 | } |
807 | } | 821 | } |
808 | 822 | ||
@@ -3529,27 +3543,44 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) | |||
3529 | * fact, if there are active groups, then, for condition (i) | 3543 | * fact, if there are active groups, then, for condition (i) |
3530 | * to become false, it is enough that an active group contains | 3544 | * to become false, it is enough that an active group contains |
3531 | * more active processes or sub-groups than some other active | 3545 | * more active processes or sub-groups than some other active |
3532 | * group. We address this issue with the following bi-modal | 3546 | * group. More precisely, for condition (i) to hold because of |
3533 | * behavior, implemented in the function | 3547 | * such a group, it is not even necessary that the group is |
3548 | * (still) active: it is sufficient that, even if the group | ||
3549 | * has become inactive, some of its descendant processes still | ||
3550 | * have some request already dispatched but still waiting for | ||
3551 | * completion. In fact, requests have still to be guaranteed | ||
3552 | * their share of the throughput even after being | ||
3553 | * dispatched. In this respect, it is easy to show that, if a | ||
3554 | * group frequently becomes inactive while still having | ||
3555 | * in-flight requests, and if, when this happens, the group is | ||
3556 | * not considered in the calculation of whether the scenario | ||
3557 | * is asymmetric, then the group may fail to be guaranteed its | ||
3558 | * fair share of the throughput (basically because idling may | ||
3559 | * not be performed for the descendant processes of the group, | ||
3560 | * but it had to be). We address this issue with the | ||
3561 | * following bi-modal behavior, implemented in the function | ||
3534 | * bfq_symmetric_scenario(). | 3562 | * bfq_symmetric_scenario(). |
3535 | * | 3563 | * |
3536 | * If there are active groups, then the scenario is tagged as | 3564 | * If there are groups with requests waiting for completion |
3565 | * (as commented above, some of these groups may even be | ||
3566 | * already inactive), then the scenario is tagged as | ||
3537 | * asymmetric, conservatively, without checking any of the | 3567 | * asymmetric, conservatively, without checking any of the |
3538 | * conditions (i) and (ii). So the device is idled for bfqq. | 3568 | * conditions (i) and (ii). So the device is idled for bfqq. |
3539 | * This behavior matches also the fact that groups are created | 3569 | * This behavior matches also the fact that groups are created |
3540 | * exactly if controlling I/O (to preserve bandwidth and | 3570 | * exactly if controlling I/O is a primary concern (to |
3541 | * latency guarantees) is a primary concern. | 3571 | * preserve bandwidth and latency guarantees). |
3542 | * | 3572 | * |
3543 | * On the opposite end, if there are no active groups, then | 3573 | * On the opposite end, if there are no groups with requests |
3544 | * only condition (i) is actually controlled, i.e., provided | 3574 | * waiting for completion, then only condition (i) is actually |
3545 | * that condition (i) holds, idling is not performed, | 3575 | * controlled, i.e., provided that condition (i) holds, idling |
3546 | * regardless of whether condition (ii) holds. In other words, | 3576 | * is not performed, regardless of whether condition (ii) |
3547 | * only if condition (i) does not hold, then idling is | 3577 | * holds. In other words, only if condition (i) does not hold, |
3548 | * allowed, and the device tends to be prevented from queueing | 3578 | * then idling is allowed, and the device tends to be |
3549 | * many requests, possibly of several processes. Since there | 3579 | * prevented from queueing many requests, possibly of several |
3550 | * are no active groups, then, to control condition (i) it is | 3580 | * processes. Since there are no groups with requests waiting |
3551 | * enough to check whether all active queues have the same | 3581 | * for completion, then, to control condition (i) it is enough |
3552 | * weight. | 3582 | * to check just whether all the queues with requests waiting |
3583 | * for completion also have the same weight. | ||
3553 | * | 3584 | * |
3554 | * Not checking condition (ii) evidently exposes bfqq to the | 3585 | * Not checking condition (ii) evidently exposes bfqq to the |
3555 | * risk of getting less throughput than its fair share. | 3586 | * risk of getting less throughput than its fair share. |
@@ -3607,10 +3638,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) | |||
3607 | * bfqq is weight-raised is checked explicitly here. More | 3638 | * bfqq is weight-raised is checked explicitly here. More |
3608 | * precisely, the compound condition below takes into account | 3639 | * precisely, the compound condition below takes into account |
3609 | * also the fact that, even if bfqq is being weight-raised, | 3640 | * also the fact that, even if bfqq is being weight-raised, |
3610 | * the scenario is still symmetric if all active queues happen | 3641 | * the scenario is still symmetric if all queues with requests |
3611 | * to be weight-raised. Actually, we should be even more | 3642 | * waiting for completion happen to be |
3612 | * precise here, and differentiate between interactive weight | 3643 | * weight-raised. Actually, we should be even more precise |
3613 | * raising and soft real-time weight raising. | 3644 | * here, and differentiate between interactive weight raising |
3645 | * and soft real-time weight raising. | ||
3614 | * | 3646 | * |
3615 | * As a side note, it is worth considering that the above | 3647 | * As a side note, it is worth considering that the above |
3616 | * device-idling countermeasures may however fail in the | 3648 | * device-idling countermeasures may however fail in the |
@@ -5417,7 +5449,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
5417 | bfqd->idle_slice_timer.function = bfq_idle_slice_timer; | 5449 | bfqd->idle_slice_timer.function = bfq_idle_slice_timer; |
5418 | 5450 | ||
5419 | bfqd->queue_weights_tree = RB_ROOT; | 5451 | bfqd->queue_weights_tree = RB_ROOT; |
5420 | bfqd->num_active_groups = 0; | 5452 | bfqd->num_groups_with_pending_reqs = 0; |
5421 | 5453 | ||
5422 | INIT_LIST_HEAD(&bfqd->active_list); | 5454 | INIT_LIST_HEAD(&bfqd->active_list); |
5423 | INIT_LIST_HEAD(&bfqd->idle_list); | 5455 | INIT_LIST_HEAD(&bfqd->idle_list); |
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 77651d817ecd..0b02bf302de0 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h | |||
@@ -196,6 +196,9 @@ struct bfq_entity { | |||
196 | 196 | ||
197 | /* flag, set to request a weight, ioprio or ioprio_class change */ | 197 | /* flag, set to request a weight, ioprio or ioprio_class change */ |
198 | int prio_changed; | 198 | int prio_changed; |
199 | |||
200 | /* flag, set if the entity is counted in groups_with_pending_reqs */ | ||
201 | bool in_groups_with_pending_reqs; | ||
199 | }; | 202 | }; |
200 | 203 | ||
201 | struct bfq_group; | 204 | struct bfq_group; |
@@ -448,10 +451,54 @@ struct bfq_data { | |||
448 | * bfq_weights_tree_[add|remove] for further details). | 451 | * bfq_weights_tree_[add|remove] for further details). |
449 | */ | 452 | */ |
450 | struct rb_root queue_weights_tree; | 453 | struct rb_root queue_weights_tree; |
454 | |||
451 | /* | 455 | /* |
452 | * number of groups with requests still waiting for completion | 456 | * Number of groups with at least one descendant process that |
457 | * has at least one request waiting for completion. Note that | ||
458 | * this accounts for also requests already dispatched, but not | ||
459 | * yet completed. Therefore this number of groups may differ | ||
460 | * (be larger) than the number of active groups, as a group is | ||
461 | * considered active only if its corresponding entity has | ||
462 | * descendant queues with at least one request queued. This | ||
463 | * number is used to decide whether a scenario is symmetric. | ||
464 | * For a detailed explanation see comments on the computation | ||
465 | * of the variable asymmetric_scenario in the function | ||
466 | * bfq_better_to_idle(). | ||
467 | * | ||
468 | * However, it is hard to compute this number exactly, for | ||
469 | * groups with multiple descendant processes. Consider a group | ||
470 | * that is inactive, i.e., that has no descendant process with | ||
471 | * pending I/O inside BFQ queues. Then suppose that | ||
472 | * num_groups_with_pending_reqs is still accounting for this | ||
473 | * group, because the group has descendant processes with some | ||
474 | * I/O request still in flight. num_groups_with_pending_reqs | ||
475 | * should be decremented when the in-flight request of the | ||
476 | * last descendant process is finally completed (assuming that | ||
477 | * nothing else has changed for the group in the meantime, in | ||
478 | * terms of composition of the group and active/inactive state of child | ||
479 | * groups and processes). To accomplish this, an additional | ||
480 | * pending-request counter must be added to entities, and must | ||
481 | * be updated correctly. To avoid this additional field and operations, | ||
482 | * we resort to the following tradeoff between simplicity and | ||
483 | * accuracy: for an inactive group that is still counted in | ||
484 | * num_groups_with_pending_reqs, we decrement | ||
485 | * num_groups_with_pending_reqs when the first descendant | ||
486 | * process of the group remains with no request waiting for | ||
487 | * completion. | ||
488 | * | ||
489 | * Even this simpler decrement strategy requires a little | ||
490 | * carefulness: to avoid multiple decrements, we flag a group, | ||
491 | * more precisely an entity representing a group, as still | ||
492 | * counted in num_groups_with_pending_reqs when it becomes | ||
493 | * inactive. Then, when the first descendant queue of the | ||
494 | * entity remains with no request waiting for completion, | ||
495 | * num_groups_with_pending_reqs is decremented, and this flag | ||
496 | * is reset. After this flag is reset for the entity, | ||
497 | * num_groups_with_pending_reqs won't be decremented any | ||
498 | * longer in case a new descendant queue of the entity remains | ||
499 | * with no request waiting for completion. | ||
453 | */ | 500 | */ |
454 | unsigned int num_active_groups; | 501 | unsigned int num_groups_with_pending_reqs; |
455 | 502 | ||
456 | /* | 503 | /* |
457 | * Number of bfq_queues containing requests (including the | 504 | * Number of bfq_queues containing requests (including the |
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 4b0d5fb69160..63e0f12be7c9 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c | |||
@@ -1012,7 +1012,10 @@ static void __bfq_activate_entity(struct bfq_entity *entity, | |||
1012 | container_of(entity, struct bfq_group, entity); | 1012 | container_of(entity, struct bfq_group, entity); |
1013 | struct bfq_data *bfqd = bfqg->bfqd; | 1013 | struct bfq_data *bfqd = bfqg->bfqd; |
1014 | 1014 | ||
1015 | bfqd->num_active_groups++; | 1015 | if (!entity->in_groups_with_pending_reqs) { |
1016 | entity->in_groups_with_pending_reqs = true; | ||
1017 | bfqd->num_groups_with_pending_reqs++; | ||
1018 | } | ||
1016 | } | 1019 | } |
1017 | #endif | 1020 | #endif |
1018 | 1021 | ||