summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
authorArianna Avanzini <avanzini.arianna@gmail.com>2017-04-12 12:23:17 -0400
committerJens Axboe <axboe@fb.com>2017-04-19 10:30:26 -0400
commit1de0c4cd9ea65f99910ae0b77fce2cd1a8e5de01 (patch)
treede7568a1b6f30c53eb10016e98dae4d7233f5831 /block/bfq-iosched.c
parent36eca894832351feed9072d0f97eb06fc9482ca4 (diff)
block, bfq: reduce idling only in symmetric scenarios
A seeky queue (i..e, a queue containing random requests) is assigned a very small device-idling slice, for throughput issues. Unfortunately, given the process associated with a seeky queue, this behavior causes the following problem: if the process, say P, performs sync I/O and has a higher weight than some other processes doing I/O and associated with non-seeky queues, then BFQ may fail to guarantee to P its reserved share of the throughput. The reason is that idling is key for providing service guarantees to processes doing sync I/O [1]. This commit addresses this issue by allowing the device-idling slice to be reduced for a seeky queue only if the scenario happens to be symmetric, i.e., if all the queues are to receive the same share of the throughput. [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O Scheduler", Proceedings of the First Workshop on Mobile System Technologies (MST-2015), May 2015. http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com> Signed-off-by: Riccardo Pizzetti <riccardo.pizzetti@gmail.com> Signed-off-by: Samuele Zecchini <samuele.zecchini92@gmail.com> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c287
1 files changed, 280 insertions, 7 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6e7388a1d220..b97801ff3de0 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -183,6 +183,20 @@ struct bfq_sched_data {
183}; 183};
184 184
185/** 185/**
186 * struct bfq_weight_counter - counter of the number of all active entities
187 * with a given weight.
188 */
189struct bfq_weight_counter {
190 unsigned int weight; /* weight of the entities this counter refers to */
191 unsigned int num_active; /* nr of active entities with this weight */
192 /*
193 * Weights tree member (see bfq_data's @queue_weights_tree and
194 * @group_weights_tree)
195 */
196 struct rb_node weights_node;
197};
198
199/**
186 * struct bfq_entity - schedulable entity. 200 * struct bfq_entity - schedulable entity.
187 * 201 *
188 * A bfq_entity is used to represent either a bfq_queue (leaf node in the 202 * A bfq_entity is used to represent either a bfq_queue (leaf node in the
@@ -212,6 +226,8 @@ struct bfq_sched_data {
212struct bfq_entity { 226struct bfq_entity {
213 /* service_tree member */ 227 /* service_tree member */
214 struct rb_node rb_node; 228 struct rb_node rb_node;
229 /* pointer to the weight counter associated with this entity */
230 struct bfq_weight_counter *weight_counter;
215 231
216 /* 232 /*
217 * Flag, true if the entity is on a tree (either the active or 233 * Flag, true if the entity is on a tree (either the active or
@@ -456,6 +472,25 @@ struct bfq_data {
456 struct bfq_group *root_group; 472 struct bfq_group *root_group;
457 473
458 /* 474 /*
475 * rbtree of weight counters of @bfq_queues, sorted by
476 * weight. Used to keep track of whether all @bfq_queues have
477 * the same weight. The tree contains one counter for each
478 * distinct weight associated to some active and not
479 * weight-raised @bfq_queue (see the comments to the functions
480 * bfq_weights_tree_[add|remove] for further details).
481 */
482 struct rb_root queue_weights_tree;
483 /*
484 * rbtree of non-queue @bfq_entity weight counters, sorted by
485 * weight. Used to keep track of whether all @bfq_groups have
486 * the same weight. The tree contains one counter for each
487 * distinct weight associated to some active @bfq_group (see
488 * the comments to the functions bfq_weights_tree_[add|remove]
489 * for further details).
490 */
491 struct rb_root group_weights_tree;
492
493 /*
459 * Number of bfq_queues containing requests (including the 494 * Number of bfq_queues containing requests (including the
460 * queue in service, even if it is idling). 495 * queue in service, even if it is idling).
461 */ 496 */
@@ -791,6 +826,11 @@ struct bfq_group_data {
791 * to avoid too many special cases during group creation/ 826 * to avoid too many special cases during group creation/
792 * migration. 827 * migration.
793 * @stats: stats for this bfqg. 828 * @stats: stats for this bfqg.
829 * @active_entities: number of active entities belonging to the group;
830 * unused for the root group. Used to know whether there
831 * are groups with more than one active @bfq_entity
832 * (see the comments to the function
833 * bfq_bfqq_may_idle()).
794 * @rq_pos_tree: rbtree sorted by next_request position, used when 834 * @rq_pos_tree: rbtree sorted by next_request position, used when
795 * determining if two or more queues have interleaving 835 * determining if two or more queues have interleaving
796 * requests (see bfq_find_close_cooperator()). 836 * requests (see bfq_find_close_cooperator()).
@@ -818,6 +858,8 @@ struct bfq_group {
818 858
819 struct bfq_entity *my_entity; 859 struct bfq_entity *my_entity;
820 860
861 int active_entities;
862
821 struct rb_root rq_pos_tree; 863 struct rb_root rq_pos_tree;
822 864
823 struct bfqg_stats stats; 865 struct bfqg_stats stats;
@@ -1254,12 +1296,27 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
1254 * a candidate for next service (i.e, a candidate entity to serve 1296 * a candidate for next service (i.e, a candidate entity to serve
1255 * after the in-service entity is expired). The function then returns 1297 * after the in-service entity is expired). The function then returns
1256 * true. 1298 * true.
1299 *
1300 * In contrast, the entity could stil be a candidate for next service
1301 * if it is not a queue, and has more than one child. In fact, even if
1302 * one of its children is about to be set in service, other children
1303 * may still be the next to serve. As a consequence, a non-queue
1304 * entity is not a candidate for next-service only if it has only one
1305 * child. And only if this condition holds, then the function returns
1306 * true for a non-queue entity.
1257 */ 1307 */
1258static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) 1308static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
1259{ 1309{
1310 struct bfq_group *bfqg;
1311
1260 if (bfq_entity_to_bfqq(entity)) 1312 if (bfq_entity_to_bfqq(entity))
1261 return true; 1313 return true;
1262 1314
1315 bfqg = container_of(entity, struct bfq_group, entity);
1316
1317 if (bfqg->active_entities == 1)
1318 return true;
1319
1263 return false; 1320 return false;
1264} 1321}
1265 1322
@@ -1498,6 +1555,15 @@ up:
1498 goto up; 1555 goto up;
1499} 1556}
1500 1557
1558static void bfq_weights_tree_add(struct bfq_data *bfqd,
1559 struct bfq_entity *entity,
1560 struct rb_root *root);
1561
1562static void bfq_weights_tree_remove(struct bfq_data *bfqd,
1563 struct bfq_entity *entity,
1564 struct rb_root *root);
1565
1566
1501/** 1567/**
1502 * bfq_active_insert - insert an entity in the active tree of its 1568 * bfq_active_insert - insert an entity in the active tree of its
1503 * group/device. 1569 * group/device.
@@ -1536,6 +1602,13 @@ static void bfq_active_insert(struct bfq_service_tree *st,
1536#endif 1602#endif
1537 if (bfqq) 1603 if (bfqq)
1538 list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); 1604 list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
1605#ifdef CONFIG_BFQ_GROUP_IOSCHED
1606 else /* bfq_group */
1607 bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
1608
1609 if (bfqg != bfqd->root_group)
1610 bfqg->active_entities++;
1611#endif
1539} 1612}
1540 1613
1541/** 1614/**
@@ -1631,6 +1704,14 @@ static void bfq_active_extract(struct bfq_service_tree *st,
1631#endif 1704#endif
1632 if (bfqq) 1705 if (bfqq)
1633 list_del(&bfqq->bfqq_list); 1706 list_del(&bfqq->bfqq_list);
1707#ifdef CONFIG_BFQ_GROUP_IOSCHED
1708 else /* bfq_group */
1709 bfq_weights_tree_remove(bfqd, entity,
1710 &bfqd->group_weights_tree);
1711
1712 if (bfqg != bfqd->root_group)
1713 bfqg->active_entities--;
1714#endif
1634} 1715}
1635 1716
1636/** 1717/**
@@ -1731,6 +1812,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
1731 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); 1812 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
1732 unsigned int prev_weight, new_weight; 1813 unsigned int prev_weight, new_weight;
1733 struct bfq_data *bfqd = NULL; 1814 struct bfq_data *bfqd = NULL;
1815 struct rb_root *root;
1734#ifdef CONFIG_BFQ_GROUP_IOSCHED 1816#ifdef CONFIG_BFQ_GROUP_IOSCHED
1735 struct bfq_sched_data *sd; 1817 struct bfq_sched_data *sd;
1736 struct bfq_group *bfqg; 1818 struct bfq_group *bfqg;
@@ -1780,7 +1862,26 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
1780 prev_weight = entity->weight; 1862 prev_weight = entity->weight;
1781 new_weight = entity->orig_weight * 1863 new_weight = entity->orig_weight *
1782 (bfqq ? bfqq->wr_coeff : 1); 1864 (bfqq ? bfqq->wr_coeff : 1);
1865 /*
1866 * If the weight of the entity changes, remove the entity
1867 * from its old weight counter (if there is a counter
1868 * associated with the entity), and add it to the counter
1869 * associated with its new weight.
1870 */
1871 if (prev_weight != new_weight) {
1872 root = bfqq ? &bfqd->queue_weights_tree :
1873 &bfqd->group_weights_tree;
1874 bfq_weights_tree_remove(bfqd, entity, root);
1875 }
1783 entity->weight = new_weight; 1876 entity->weight = new_weight;
1877 /*
1878 * Add the entity to its weights tree only if it is
1879 * not associated with a weight-raised queue.
1880 */
1881 if (prev_weight != new_weight &&
1882 (bfqq ? bfqq->wr_coeff == 1 : 1))
1883 /* If we get here, root has been initialized. */
1884 bfq_weights_tree_add(bfqd, entity, root);
1784 1885
1785 new_st->wsum += entity->weight; 1886 new_st->wsum += entity->weight;
1786 1887
@@ -2606,6 +2707,10 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
2606 2707
2607 bfqd->busy_queues--; 2708 bfqd->busy_queues--;
2608 2709
2710 if (!bfqq->dispatched)
2711 bfq_weights_tree_remove(bfqd, &bfqq->entity,
2712 &bfqd->queue_weights_tree);
2713
2609 if (bfqq->wr_coeff > 1) 2714 if (bfqq->wr_coeff > 1)
2610 bfqd->wr_busy_queues--; 2715 bfqd->wr_busy_queues--;
2611 2716
@@ -2626,6 +2731,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
2626 bfq_mark_bfqq_busy(bfqq); 2731 bfq_mark_bfqq_busy(bfqq);
2627 bfqd->busy_queues++; 2732 bfqd->busy_queues++;
2628 2733
2734 if (!bfqq->dispatched)
2735 if (bfqq->wr_coeff == 1)
2736 bfq_weights_tree_add(bfqd, &bfqq->entity,
2737 &bfqd->queue_weights_tree);
2738
2629 if (bfqq->wr_coeff > 1) 2739 if (bfqq->wr_coeff > 1)
2630 bfqd->wr_busy_queues++; 2740 bfqd->wr_busy_queues++;
2631} 2741}
@@ -3028,6 +3138,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
3028 * in bfq_init_queue() 3138 * in bfq_init_queue()
3029 */ 3139 */
3030 bfqg->bfqd = bfqd; 3140 bfqg->bfqd = bfqd;
3141 bfqg->active_entities = 0;
3031 bfqg->rq_pos_tree = RB_ROOT; 3142 bfqg->rq_pos_tree = RB_ROOT;
3032} 3143}
3033 3144
@@ -3916,6 +4027,158 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
3916} 4027}
3917 4028
3918/* 4029/*
4030 * Tell whether there are active queues or groups with differentiated weights.
4031 */
4032static bool bfq_differentiated_weights(struct bfq_data *bfqd)
4033{
4034 /*
4035 * For weights to differ, at least one of the trees must contain
4036 * at least two nodes.
4037 */
4038 return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
4039 (bfqd->queue_weights_tree.rb_node->rb_left ||
4040 bfqd->queue_weights_tree.rb_node->rb_right)
4041#ifdef CONFIG_BFQ_GROUP_IOSCHED
4042 ) ||
4043 (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
4044 (bfqd->group_weights_tree.rb_node->rb_left ||
4045 bfqd->group_weights_tree.rb_node->rb_right)
4046#endif
4047 );
4048}
4049
4050/*
4051 * The following function returns true if every queue must receive the
4052 * same share of the throughput (this condition is used when deciding
4053 * whether idling may be disabled, see the comments in the function
4054 * bfq_bfqq_may_idle()).
4055 *
4056 * Such a scenario occurs when:
4057 * 1) all active queues have the same weight,
4058 * 2) all active groups at the same level in the groups tree have the same
4059 * weight,
4060 * 3) all active groups at the same level in the groups tree have the same
4061 * number of children.
4062 *
4063 * Unfortunately, keeping the necessary state for evaluating exactly the
4064 * above symmetry conditions would be quite complex and time-consuming.
4065 * Therefore this function evaluates, instead, the following stronger
4066 * sub-conditions, for which it is much easier to maintain the needed
4067 * state:
4068 * 1) all active queues have the same weight,
4069 * 2) all active groups have the same weight,
4070 * 3) all active groups have at most one active child each.
4071 * In particular, the last two conditions are always true if hierarchical
4072 * support and the cgroups interface are not enabled, thus no state needs
4073 * to be maintained in this case.
4074 */
4075static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
4076{
4077 return !bfq_differentiated_weights(bfqd);
4078}
4079
4080/*
4081 * If the weight-counter tree passed as input contains no counter for
4082 * the weight of the input entity, then add that counter; otherwise just
4083 * increment the existing counter.
4084 *
4085 * Note that weight-counter trees contain few nodes in mostly symmetric
4086 * scenarios. For example, if all queues have the same weight, then the
4087 * weight-counter tree for the queues may contain at most one node.
4088 * This holds even if low_latency is on, because weight-raised queues
4089 * are not inserted in the tree.
4090 * In most scenarios, the rate at which nodes are created/destroyed
4091 * should be low too.
4092 */
4093static void bfq_weights_tree_add(struct bfq_data *bfqd,
4094 struct bfq_entity *entity,
4095 struct rb_root *root)
4096{
4097 struct rb_node **new = &(root->rb_node), *parent = NULL;
4098
4099 /*
4100 * Do not insert if the entity is already associated with a
4101 * counter, which happens if:
4102 * 1) the entity is associated with a queue,
4103 * 2) a request arrival has caused the queue to become both
4104 * non-weight-raised, and hence change its weight, and
4105 * backlogged; in this respect, each of the two events
4106 * causes an invocation of this function,
4107 * 3) this is the invocation of this function caused by the
4108 * second event. This second invocation is actually useless,
4109 * and we handle this fact by exiting immediately. More
4110 * efficient or clearer solutions might possibly be adopted.
4111 */
4112 if (entity->weight_counter)
4113 return;
4114
4115 while (*new) {
4116 struct bfq_weight_counter *__counter = container_of(*new,
4117 struct bfq_weight_counter,
4118 weights_node);
4119 parent = *new;
4120
4121 if (entity->weight == __counter->weight) {
4122 entity->weight_counter = __counter;
4123 goto inc_counter;
4124 }
4125 if (entity->weight < __counter->weight)
4126 new = &((*new)->rb_left);
4127 else
4128 new = &((*new)->rb_right);
4129 }
4130
4131 entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
4132 GFP_ATOMIC);
4133
4134 /*
4135 * In the unlucky event of an allocation failure, we just
4136 * exit. This will cause the weight of entity to not be
4137 * considered in bfq_differentiated_weights, which, in its
4138 * turn, causes the scenario to be deemed wrongly symmetric in
4139 * case entity's weight would have been the only weight making
4140 * the scenario asymmetric. On the bright side, no unbalance
4141 * will however occur when entity becomes inactive again (the
4142 * invocation of this function is triggered by an activation
4143 * of entity). In fact, bfq_weights_tree_remove does nothing
4144 * if !entity->weight_counter.
4145 */
4146 if (unlikely(!entity->weight_counter))
4147 return;
4148
4149 entity->weight_counter->weight = entity->weight;
4150 rb_link_node(&entity->weight_counter->weights_node, parent, new);
4151 rb_insert_color(&entity->weight_counter->weights_node, root);
4152
4153inc_counter:
4154 entity->weight_counter->num_active++;
4155}
4156
4157/*
4158 * Decrement the weight counter associated with the entity, and, if the
4159 * counter reaches 0, remove the counter from the tree.
4160 * See the comments to the function bfq_weights_tree_add() for considerations
4161 * about overhead.
4162 */
4163static void bfq_weights_tree_remove(struct bfq_data *bfqd,
4164 struct bfq_entity *entity,
4165 struct rb_root *root)
4166{
4167 if (!entity->weight_counter)
4168 return;
4169
4170 entity->weight_counter->num_active--;
4171 if (entity->weight_counter->num_active > 0)
4172 goto reset_entity_pointer;
4173
4174 rb_erase(&entity->weight_counter->weights_node, root);
4175 kfree(entity->weight_counter);
4176
4177reset_entity_pointer:
4178 entity->weight_counter = NULL;
4179}
4180
4181/*
3919 * Return expired entry, or NULL to just start from scratch in rbtree. 4182 * Return expired entry, or NULL to just start from scratch in rbtree.
3920 */ 4183 */
3921static struct request *bfq_check_fifo(struct bfq_queue *bfqq, 4184static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
@@ -5293,13 +5556,17 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
5293 */ 5556 */
5294 sl = bfqd->bfq_slice_idle; 5557 sl = bfqd->bfq_slice_idle;
5295 /* 5558 /*
5296 * Unless the queue is being weight-raised, grant only minimum 5559 * Unless the queue is being weight-raised or the scenario is
5297 * idle time if the queue is seeky. A long idling is preserved 5560 * asymmetric, grant only minimum idle time if the queue
5298 * for a weight-raised queue, because it is needed for 5561 * is seeky. A long idling is preserved for a weight-raised
5299 * guaranteeing to the queue its reserved share of the 5562 * queue, or, more in general, in an asymmetric scenario,
5300 * throughput. 5563 * because a long idling is needed for guaranteeing to a queue
5301 */ 5564 * its reserved share of the throughput (in particular, it is
5302 if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1) 5565 * needed if the queue has a higher weight than some other
5566 * queue).
5567 */
5568 if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
5569 bfq_symmetric_scenario(bfqd))
5303 sl = min_t(u64, sl, BFQ_MIN_TT); 5570 sl = min_t(u64, sl, BFQ_MIN_TT);
5304 5571
5305 bfqd->last_idling_start = ktime_get(); 5572 bfqd->last_idling_start = ktime_get();
@@ -7197,6 +7464,9 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
7197 * mechanism). 7464 * mechanism).
7198 */ 7465 */
7199 bfqq->budget_timeout = jiffies; 7466 bfqq->budget_timeout = jiffies;
7467
7468 bfq_weights_tree_remove(bfqd, &bfqq->entity,
7469 &bfqd->queue_weights_tree);
7200 } 7470 }
7201 7471
7202 now_ns = ktime_get_ns(); 7472 now_ns = ktime_get_ns();
@@ -7627,6 +7897,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
7627 HRTIMER_MODE_REL); 7897 HRTIMER_MODE_REL);
7628 bfqd->idle_slice_timer.function = bfq_idle_slice_timer; 7898 bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
7629 7899
7900 bfqd->queue_weights_tree = RB_ROOT;
7901 bfqd->group_weights_tree = RB_ROOT;
7902
7630 INIT_LIST_HEAD(&bfqd->active_list); 7903 INIT_LIST_HEAD(&bfqd->active_list);
7631 INIT_LIST_HEAD(&bfqd->idle_list); 7904 INIT_LIST_HEAD(&bfqd->idle_list);
7632 7905