aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2017-07-29 06:42:56 -0400
committerJens Axboe <axboe@kernel.dk>2017-07-29 17:32:49 -0400
commit46d556e6aaa0ec4dc83648ab1ca3d01dd2fa3ea3 (patch)
treef2264556450a2eba345a5198315dddaa57fba447
parent6ab1d8da972d4c4e318607e96c5ecb32101c80f4 (diff)
block, bfq: consider also in_service_entity to state whether an entity is active
Groups of BFQ queues are represented by generic entities in BFQ. When a queue belonging to a parent entity is deactivated, the parent entity may need to be deactivated too, in case the deactivated queue was the only active queue for the parent entity. This deactivation may need to be propagated upwards if the entity belongs, in its turn, to a further higher-level entity, and so on. In particular, the upward propagation of deactivation stops at the first parent entity that remains active even if one of its child entities has been deactivated. To decide whether the last non-deactivation condition holds for a parent entity, BFQ checks whether the field next_in_service is still not NULL for the parent entity, after the deactivation of one of its child entity. If it is not NULL, then there are certainly other active entities in the parent entity, and deactivations can stop. Unfortunately, this check misses a corner case: if in_service_entity is not NULL, then next_in_service may happen to be NULL, although the parent entity is evidently active. This happens if: 1) the entity pointed by in_service_entity is the only active entity in the parent entity, and 2) according to the definition of next_in_service, the in_service_entity cannot be considered as next_in_service. See the comments on the definition of next_in_service for details on this second point. Hitting the above corner case causes crashes. To address this issue, this commit: 1) Extends the above check on only next_in_service to controlling both next_in_service and in_service_entity (if any of them is not NULL, then no further deactivation is performed) 2) Improves the (important) comments on how next_in_service is defined and updated; in particular it fixes a few rather obscure paragraphs Reported-by: Eric Wheeler <bfq-sched@lists.ewheeler.net> Reported-by: Rick Yiu <rick_yiu@htc.com> Reported-by: Tom X Nguyen <tom81094@gmail.com> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Tested-by: Eric Wheeler <bfq-sched@lists.ewheeler.net> Tested-by: Rick Yiu <rick_yiu@htc.com> Tested-by: Laurentiu Nicola <lnicola@dend.ro> Tested-by: Tom X Nguyen <tom81094@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/bfq-iosched.h22
-rw-r--r--block/bfq-wf2q.c142
2 files changed, 95 insertions, 69 deletions
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 63e771ab56d8..859f0a8c97c8 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -71,17 +71,29 @@ struct bfq_service_tree {
71 * 71 *
72 * bfq_sched_data is the basic scheduler queue. It supports three 72 * bfq_sched_data is the basic scheduler queue. It supports three
73 * ioprio_classes, and can be used either as a toplevel queue or as an 73 * ioprio_classes, and can be used either as a toplevel queue or as an
74 * intermediate queue on a hierarchical setup. @next_in_service 74 * intermediate queue in a hierarchical setup.
75 * points to the active entity of the sched_data service trees that
76 * will be scheduled next. It is used to reduce the number of steps
77 * needed for each hierarchical-schedule update.
78 * 75 *
79 * The supported ioprio_classes are the same as in CFQ, in descending 76 * The supported ioprio_classes are the same as in CFQ, in descending
80 * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. 77 * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
81 * Requests from higher priority queues are served before all the 78 * Requests from higher priority queues are served before all the
82 * requests from lower priority queues; among requests of the same 79 * requests from lower priority queues; among requests of the same
83 * queue requests are served according to B-WF2Q+. 80 * queue requests are served according to B-WF2Q+.
84 * All the fields are protected by the queue lock of the containing bfqd. 81 *
82 * The schedule is implemented by the service trees, plus the field
83 * @next_in_service, which points to the entity on the active trees
84 * that will be served next, if 1) no changes in the schedule occurs
85 * before the current in-service entity is expired, 2) the in-service
86 * queue becomes idle when it expires, and 3) if the entity pointed by
87 * in_service_entity is not a queue, then the in-service child entity
88 * of the entity pointed by in_service_entity becomes idle on
89 * expiration. This peculiar definition allows for the following
90 * optimization, not yet exploited: while a given entity is still in
91 * service, we already know which is the best candidate for next
92 * service among the other active entitities in the same parent
93 * entity. We can then quickly compare the timestamps of the
94 * in-service entity with those of such best candidate.
95 *
96 * All fields are protected by the lock of the containing bfqd.
85 */ 97 */
86struct bfq_sched_data { 98struct bfq_sched_data {
87 /* entity in service */ 99 /* entity in service */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 881bbe5e1827..911aa7431dbe 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
188 188
189/* 189/*
190 * This function tells whether entity stops being a candidate for next 190 * This function tells whether entity stops being a candidate for next
191 * service, according to the following logic. 191 * service, according to the restrictive definition of the field
192 * next_in_service. In particular, this function is invoked for an
193 * entity that is about to be set in service.
192 * 194 *
193 * This function is invoked for an entity that is about to be set in 195 * If entity is a queue, then the entity is no longer a candidate for
194 * service. If such an entity is a queue, then the entity is no longer 196 * next service according to the that definition, because entity is
195 * a candidate for next service (i.e, a candidate entity to serve 197 * about to become the in-service queue. This function then returns
196 * after the in-service entity is expired). The function then returns 198 * true if entity is a queue.
197 * true.
198 * 199 *
199 * In contrast, the entity could stil be a candidate for next service 200 * In contrast, entity could still be a candidate for next service if
200 * if it is not a queue, and has more than one child. In fact, even if 201 * it is not a queue, and has more than one active child. In fact,
201 * one of its children is about to be set in service, other children 202 * even if one of its children is about to be set in service, other
202 * may still be the next to serve. As a consequence, a non-queue 203 * active children may still be the next to serve, for the parent
203 * entity is not a candidate for next-service only if it has only one 204 * entity, even according to the above definition. As a consequence, a
204 * child. And only if this condition holds, then the function returns 205 * non-queue entity is not a candidate for next-service only if it has
205 * true for a non-queue entity. 206 * only one active child. And only if this condition holds, then this
207 * function returns true for a non-queue entity.
206 */ 208 */
207static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) 209static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
208{ 210{
@@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
213 215
214 bfqg = container_of(entity, struct bfq_group, entity); 216 bfqg = container_of(entity, struct bfq_group, entity);
215 217
218 /*
219 * The field active_entities does not always contain the
220 * actual number of active children entities: it happens to
221 * not account for the in-service entity in case the latter is
222 * removed from its active tree (which may get done after
223 * invoking the function bfq_no_longer_next_in_service in
224 * bfq_get_next_queue). Fortunately, here, i.e., while
225 * bfq_no_longer_next_in_service is not yet completed in
226 * bfq_get_next_queue, bfq_active_extract has not yet been
227 * invoked, and thus active_entities still coincides with the
228 * actual number of active entities.
229 */
216 if (bfqg->active_entities == 1) 230 if (bfqg->active_entities == 1)
217 return true; 231 return true;
218 232
@@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
954 * one of its children receives a new request. 968 * one of its children receives a new request.
955 * 969 *
956 * Basically, this function updates the timestamps of entity and 970 * Basically, this function updates the timestamps of entity and
957 * inserts entity into its active tree, ater possible extracting it 971 * inserts entity into its active tree, ater possibly extracting it
958 * from its idle tree. 972 * from its idle tree.
959 */ 973 */
960static void __bfq_activate_entity(struct bfq_entity *entity, 974static void __bfq_activate_entity(struct bfq_entity *entity,
@@ -1048,7 +1062,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
1048 entity->start = entity->finish; 1062 entity->start = entity->finish;
1049 /* 1063 /*
1050 * In addition, if the entity had more than one child 1064 * In addition, if the entity had more than one child
1051 * when set in service, then was not extracted from 1065 * when set in service, then it was not extracted from
1052 * the active tree. This implies that the position of 1066 * the active tree. This implies that the position of
1053 * the entity in the active tree may need to be 1067 * the entity in the active tree may need to be
1054 * changed now, because we have just updated the start 1068 * changed now, because we have just updated the start
@@ -1056,9 +1070,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
1056 * time in a moment (the requeueing is then, more 1070 * time in a moment (the requeueing is then, more
1057 * precisely, a repositioning in this case). To 1071 * precisely, a repositioning in this case). To
1058 * implement this repositioning, we: 1) dequeue the 1072 * implement this repositioning, we: 1) dequeue the
1059 * entity here, 2) update the finish time and 1073 * entity here, 2) update the finish time and requeue
1060 * requeue the entity according to the new 1074 * the entity according to the new timestamps below.
1061 * timestamps below.
1062 */ 1075 */
1063 if (entity->tree) 1076 if (entity->tree)
1064 bfq_active_extract(st, entity); 1077 bfq_active_extract(st, entity);
@@ -1105,9 +1118,10 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
1105 1118
1106 1119
1107/** 1120/**
1108 * bfq_activate_entity - activate or requeue an entity representing a bfq_queue, 1121 * bfq_activate_requeue_entity - activate or requeue an entity representing a
1109 * and activate, requeue or reposition all ancestors 1122 * bfq_queue, and activate, requeue or reposition
1110 * for which such an update becomes necessary. 1123 * all ancestors for which such an update becomes
1124 * necessary.
1111 * @entity: the entity to activate. 1125 * @entity: the entity to activate.
1112 * @non_blocking_wait_rq: true if this entity was waiting for a request 1126 * @non_blocking_wait_rq: true if this entity was waiting for a request
1113 * @requeue: true if this is a requeue, which implies that bfqq is 1127 * @requeue: true if this is a requeue, which implies that bfqq is
@@ -1135,9 +1149,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
1135 * @ins_into_idle_tree: if false, the entity will not be put into the 1149 * @ins_into_idle_tree: if false, the entity will not be put into the
1136 * idle tree. 1150 * idle tree.
1137 * 1151 *
1138 * Deactivates an entity, independently from its previous state. Must 1152 * Deactivates an entity, independently of its previous state. Must
1139 * be invoked only if entity is on a service tree. Extracts the entity 1153 * be invoked only if entity is on a service tree. Extracts the entity
1140 * from that tree, and if necessary and allowed, puts it on the idle 1154 * from that tree, and if necessary and allowed, puts it into the idle
1141 * tree. 1155 * tree.
1142 */ 1156 */
1143bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) 1157bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
@@ -1179,7 +1193,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
1179/** 1193/**
1180 * bfq_deactivate_entity - deactivate an entity representing a bfq_queue. 1194 * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
1181 * @entity: the entity to deactivate. 1195 * @entity: the entity to deactivate.
1182 * @ins_into_idle_tree: true if the entity can be put on the idle tree 1196 * @ins_into_idle_tree: true if the entity can be put into the idle tree
1183 */ 1197 */
1184static void bfq_deactivate_entity(struct bfq_entity *entity, 1198static void bfq_deactivate_entity(struct bfq_entity *entity,
1185 bool ins_into_idle_tree, 1199 bool ins_into_idle_tree,
@@ -1210,16 +1224,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
1210 */ 1224 */
1211 bfq_update_next_in_service(sd, NULL); 1225 bfq_update_next_in_service(sd, NULL);
1212 1226
1213 if (sd->next_in_service) 1227 if (sd->next_in_service || sd->in_service_entity) {
1214 /* 1228 /*
1215 * The parent entity is still backlogged, 1229 * The parent entity is still active, because
1216 * because next_in_service is not NULL. So, no 1230 * either next_in_service or in_service_entity
1217 * further upwards deactivation must be 1231 * is not NULL. So, no further upwards
1218 * performed. Yet, next_in_service has 1232 * deactivation must be performed. Yet,
1219 * changed. Then the schedule does need to be 1233 * next_in_service has changed. Then the
1220 * updated upwards. 1234 * schedule does need to be updated upwards.
1235 *
1236 * NOTE If in_service_entity is not NULL, then
1237 * next_in_service may happen to be NULL,
1238 * although the parent entity is evidently
1239 * active. This happens if 1) the entity
1240 * pointed by in_service_entity is the only
1241 * active entity in the parent entity, and 2)
1242 * according to the definition of
1243 * next_in_service, the in_service_entity
1244 * cannot be considered as
1245 * next_in_service. See the comments on the
1246 * definition of next_in_service for details.
1221 */ 1247 */
1222 break; 1248 break;
1249 }
1223 1250
1224 /* 1251 /*
1225 * If we get here, then the parent is no more 1252 * If we get here, then the parent is no more
@@ -1496,47 +1523,34 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
1496 1523
1497 /* 1524 /*
1498 * If entity is no longer a candidate for next 1525 * If entity is no longer a candidate for next
1499 * service, then we extract it from its active tree, 1526 * service, then it must be extracted from its active
1500 * for the following reason. To further boost the 1527 * tree, so as to make sure that it won't be
1501 * throughput in some special case, BFQ needs to know 1528 * considered when computing next_in_service. See the
1502 * which is the next candidate entity to serve, while 1529 * comments on the function
1503 * there is already an entity in service. In this 1530 * bfq_no_longer_next_in_service() for details.
1504 * respect, to make it easy to compute/update the next
1505 * candidate entity to serve after the current
1506 * candidate has been set in service, there is a case
1507 * where it is necessary to extract the current
1508 * candidate from its service tree. Such a case is
1509 * when the entity just set in service cannot be also
1510 * a candidate for next service. Details about when
1511 * this conditions holds are reported in the comments
1512 * on the function bfq_no_longer_next_in_service()
1513 * invoked below.
1514 */ 1531 */
1515 if (bfq_no_longer_next_in_service(entity)) 1532 if (bfq_no_longer_next_in_service(entity))
1516 bfq_active_extract(bfq_entity_service_tree(entity), 1533 bfq_active_extract(bfq_entity_service_tree(entity),
1517 entity); 1534 entity);
1518 1535
1519 /* 1536 /*
1520 * For the same reason why we may have just extracted 1537 * Even if entity is not to be extracted according to
1521 * entity from its active tree, we may need to update 1538 * the above check, a descendant entity may get
1522 * next_in_service for the sched_data of entity too, 1539 * extracted in one of the next iterations of this
1523 * regardless of whether entity has been extracted. 1540 * loop. Such an event could cause a change in
1524 * In fact, even if entity has not been extracted, a 1541 * next_in_service for the level of the descendant
1525 * descendant entity may get extracted. Such an event 1542 * entity, and thus possibly back to this level.
1526 * would cause a change in next_in_service for the
1527 * level of the descendant entity, and thus possibly
1528 * back to upper levels.
1529 * 1543 *
1530 * We cannot perform the resulting needed update 1544 * However, we cannot perform the resulting needed
1531 * before the end of this loop, because, to know which 1545 * update of next_in_service for this level before the
1532 * is the correct next-to-serve candidate entity for 1546 * end of the whole loop, because, to know which is
1533 * each level, we need first to find the leaf entity 1547 * the correct next-to-serve candidate entity for each
1534 * to set in service. In fact, only after we know 1548 * level, we need first to find the leaf entity to set
1535 * which is the next-to-serve leaf entity, we can 1549 * in service. In fact, only after we know which is
1536 * discover whether the parent entity of the leaf 1550 * the next-to-serve leaf entity, we can discover
1537 * entity becomes the next-to-serve, and so on. 1551 * whether the parent entity of the leaf entity
1552 * becomes the next-to-serve, and so on.
1538 */ 1553 */
1539
1540 } 1554 }
1541 1555
1542 bfqq = bfq_entity_to_bfqq(entity); 1556 bfqq = bfq_entity_to_bfqq(entity);