diff options
-rw-r--r-- | block/bfq-iosched.c | 387 |
1 files changed, 203 insertions, 184 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 6a3d05023300..72840ebf953e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -3210,7 +3210,186 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) | |||
3210 | bfq_remove_request(q, rq); | 3210 | bfq_remove_request(q, rq); |
3211 | } | 3211 | } |
3212 | 3212 | ||
3213 | static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) | 3213 | /* |
3214 | * There is a case where idling does not have to be performed for | ||
3215 | * throughput concerns, but to preserve the throughput share of | ||
3216 | * the process associated with bfqq. | ||
3217 | * | ||
3218 | * To introduce this case, we can note that allowing the drive | ||
3219 | * to enqueue more than one request at a time, and hence | ||
3220 | * delegating de facto final scheduling decisions to the | ||
3221 | * drive's internal scheduler, entails loss of control on the | ||
3222 | * actual request service order. In particular, the critical | ||
3223 | * situation is when requests from different processes happen | ||
3224 | * to be present, at the same time, in the internal queue(s) | ||
3225 | * of the drive. In such a situation, the drive, by deciding | ||
3226 | * the service order of the internally-queued requests, does | ||
3227 | * determine also the actual throughput distribution among | ||
3228 | * these processes. But the drive typically has no notion or | ||
3229 | * concern about per-process throughput distribution, and | ||
3230 | * makes its decisions only on a per-request basis. Therefore, | ||
3231 | * the service distribution enforced by the drive's internal | ||
3232 | * scheduler is likely to coincide with the desired throughput | ||
3233 | * distribution only in a completely symmetric, or favorably | ||
3234 | * skewed scenario where: | ||
3235 | * (i-a) each of these processes must get the same throughput as | ||
3236 | * the others, | ||
3237 | * (i-b) in case (i-a) does not hold, it holds that the process | ||
3238 | * associated with bfqq must receive a lower or equal | ||
3239 | * throughput than any of the other processes; | ||
3240 | * (ii) the I/O of each process has the same properties, in | ||
3241 | * terms of locality (sequential or random), direction | ||
3242 | * (reads or writes), request sizes, greediness | ||
3243 | * (from I/O-bound to sporadic), and so on; | ||
3244 | |||
3245 | * In fact, in such a scenario, the drive tends to treat the requests | ||
3246 | * of each process in about the same way as the requests of the | ||
3247 | * others, and thus to provide each of these processes with about the | ||
3248 | * same throughput. This is exactly the desired throughput | ||
3249 | * distribution if (i-a) holds, or, if (i-b) holds instead, this is an | ||
3250 | * even more convenient distribution for (the process associated with) | ||
3251 | * bfqq. | ||
3252 | * | ||
3253 | * In contrast, in any asymmetric or unfavorable scenario, device | ||
3254 | * idling (I/O-dispatch plugging) is certainly needed to guarantee | ||
3255 | * that bfqq receives its assigned fraction of the device throughput | ||
3256 | * (see [1] for details). | ||
3257 | * | ||
3258 | * The problem is that idling may significantly reduce throughput with | ||
3259 | * certain combinations of types of I/O and devices. An important | ||
3260 | * example is sync random I/O on flash storage with command | ||
3261 | * queueing. So, unless bfqq falls in cases where idling also boosts | ||
3262 | * throughput, it is important to check conditions (i-a), i(-b) and | ||
3263 | * (ii) accurately, so as to avoid idling when not strictly needed for | ||
3264 | * service guarantees. | ||
3265 | * | ||
3266 | * Unfortunately, it is extremely difficult to thoroughly check | ||
3267 | * condition (ii). And, in case there are active groups, it becomes | ||
3268 | * very difficult to check conditions (i-a) and (i-b) too. In fact, | ||
3269 | * if there are active groups, then, for conditions (i-a) or (i-b) to | ||
3270 | * become false 'indirectly', it is enough that an active group | ||
3271 | * contains more active processes or sub-groups than some other active | ||
3272 | * group. More precisely, for conditions (i-a) or (i-b) to become | ||
3273 | * false because of such a group, it is not even necessary that the | ||
3274 | * group is (still) active: it is sufficient that, even if the group | ||
3275 | * has become inactive, some of its descendant processes still have | ||
3276 | * some request already dispatched but still waiting for | ||
3277 | * completion. In fact, requests have still to be guaranteed their | ||
3278 | * share of the throughput even after being dispatched. In this | ||
3279 | * respect, it is easy to show that, if a group frequently becomes | ||
3280 | * inactive while still having in-flight requests, and if, when this | ||
3281 | * happens, the group is not considered in the calculation of whether | ||
3282 | * the scenario is asymmetric, then the group may fail to be | ||
3283 | * guaranteed its fair share of the throughput (basically because | ||
3284 | * idling may not be performed for the descendant processes of the | ||
3285 | * group, but it had to be). We address this issue with the following | ||
3286 | * bi-modal behavior, implemented in the function | ||
3287 | * bfq_asymmetric_scenario(). | ||
3288 | * | ||
3289 | * If there are groups with requests waiting for completion | ||
3290 | * (as commented above, some of these groups may even be | ||
3291 | * already inactive), then the scenario is tagged as | ||
3292 | * asymmetric, conservatively, without checking any of the | ||
3293 | * conditions (i-a), (i-b) or (ii). So the device is idled for bfqq. | ||
3294 | * This behavior matches also the fact that groups are created | ||
3295 | * exactly if controlling I/O is a primary concern (to | ||
3296 | * preserve bandwidth and latency guarantees). | ||
3297 | * | ||
3298 | * On the opposite end, if there are no groups with requests waiting | ||
3299 | * for completion, then only conditions (i-a) and (i-b) are actually | ||
3300 | * controlled, i.e., provided that conditions (i-a) or (i-b) holds, | ||
3301 | * idling is not performed, regardless of whether condition (ii) | ||
3302 | * holds. In other words, only if conditions (i-a) and (i-b) do not | ||
3303 | * hold, then idling is allowed, and the device tends to be prevented | ||
3304 | * from queueing many requests, possibly of several processes. Since | ||
3305 | * there are no groups with requests waiting for completion, then, to | ||
3306 | * control conditions (i-a) and (i-b) it is enough to check just | ||
3307 | * whether all the queues with requests waiting for completion also | ||
3308 | * have the same weight. | ||
3309 | * | ||
3310 | * Not checking condition (ii) evidently exposes bfqq to the | ||
3311 | * risk of getting less throughput than its fair share. | ||
3312 | * However, for queues with the same weight, a further | ||
3313 | * mechanism, preemption, mitigates or even eliminates this | ||
3314 | * problem. And it does so without consequences on overall | ||
3315 | * throughput. This mechanism and its benefits are explained | ||
3316 | * in the next three paragraphs. | ||
3317 | * | ||
3318 | * Even if a queue, say Q, is expired when it remains idle, Q | ||
3319 | * can still preempt the new in-service queue if the next | ||
3320 | * request of Q arrives soon (see the comments on | ||
3321 | * bfq_bfqq_update_budg_for_activation). If all queues and | ||
3322 | * groups have the same weight, this form of preemption, | ||
3323 | * combined with the hole-recovery heuristic described in the | ||
3324 | * comments on function bfq_bfqq_update_budg_for_activation, | ||
3325 | * are enough to preserve a correct bandwidth distribution in | ||
3326 | * the mid term, even without idling. In fact, even if not | ||
3327 | * idling allows the internal queues of the device to contain | ||
3328 | * many requests, and thus to reorder requests, we can rather | ||
3329 | * safely assume that the internal scheduler still preserves a | ||
3330 | * minimum of mid-term fairness. | ||
3331 | * | ||
3332 | * More precisely, this preemption-based, idleless approach | ||
3333 | * provides fairness in terms of IOPS, and not sectors per | ||
3334 | * second. This can be seen with a simple example. Suppose | ||
3335 | * that there are two queues with the same weight, but that | ||
3336 | * the first queue receives requests of 8 sectors, while the | ||
3337 | * second queue receives requests of 1024 sectors. In | ||
3338 | * addition, suppose that each of the two queues contains at | ||
3339 | * most one request at a time, which implies that each queue | ||
3340 | * always remains idle after it is served. Finally, after | ||
3341 | * remaining idle, each queue receives very quickly a new | ||
3342 | * request. It follows that the two queues are served | ||
3343 | * alternatively, preempting each other if needed. This | ||
3344 | * implies that, although both queues have the same weight, | ||
3345 | * the queue with large requests receives a service that is | ||
3346 | * 1024/8 times as high as the service received by the other | ||
3347 | * queue. | ||
3348 | * | ||
3349 | * The motivation for using preemption instead of idling (for | ||
3350 | * queues with the same weight) is that, by not idling, | ||
3351 | * service guarantees are preserved (completely or at least in | ||
3352 | * part) without minimally sacrificing throughput. And, if | ||
3353 | * there is no active group, then the primary expectation for | ||
3354 | * this device is probably a high throughput. | ||
3355 | * | ||
3356 | * We are now left only with explaining the additional | ||
3357 | * compound condition that is checked below for deciding | ||
3358 | * whether the scenario is asymmetric. To explain this | ||
3359 | * compound condition, we need to add that the function | ||
3360 | * bfq_asymmetric_scenario checks the weights of only | ||
3361 | * non-weight-raised queues, for efficiency reasons (see | ||
3362 | * comments on bfq_weights_tree_add()). Then the fact that | ||
3363 | * bfqq is weight-raised is checked explicitly here. More | ||
3364 | * precisely, the compound condition below takes into account | ||
3365 | * also the fact that, even if bfqq is being weight-raised, | ||
3366 | * the scenario is still symmetric if all queues with requests | ||
3367 | * waiting for completion happen to be | ||
3368 | * weight-raised. Actually, we should be even more precise | ||
3369 | * here, and differentiate between interactive weight raising | ||
3370 | * and soft real-time weight raising. | ||
3371 | * | ||
3372 | * As a side note, it is worth considering that the above | ||
3373 | * device-idling countermeasures may however fail in the | ||
3374 | * following unlucky scenario: if idling is (correctly) | ||
3375 | * disabled in a time period during which all symmetry | ||
3376 | * sub-conditions hold, and hence the device is allowed to | ||
3377 | * enqueue many requests, but at some later point in time some | ||
3378 | * sub-condition stops to hold, then it may become impossible | ||
3379 | * to let requests be served in the desired order until all | ||
3380 | * the requests already queued in the device have been served. | ||
3381 | */ | ||
3382 | static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, | ||
3383 | struct bfq_queue *bfqq) | ||
3384 | { | ||
3385 | return (bfqq->wr_coeff > 1 && | ||
3386 | bfqd->wr_busy_queues < | ||
3387 | bfq_tot_busy_queues(bfqd)) || | ||
3388 | bfq_asymmetric_scenario(bfqd, bfqq); | ||
3389 | } | ||
3390 | |||
3391 | static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, | ||
3392 | enum bfqq_expiration reason) | ||
3214 | { | 3393 | { |
3215 | /* | 3394 | /* |
3216 | * If this bfqq is shared between multiple processes, check | 3395 | * If this bfqq is shared between multiple processes, check |
@@ -3221,7 +3400,22 @@ static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
3221 | if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) | 3400 | if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) |
3222 | bfq_mark_bfqq_split_coop(bfqq); | 3401 | bfq_mark_bfqq_split_coop(bfqq); |
3223 | 3402 | ||
3224 | if (RB_EMPTY_ROOT(&bfqq->sort_list)) { | 3403 | /* |
3404 | * Consider queues with a higher finish virtual time than | ||
3405 | * bfqq. If idling_needed_for_service_guarantees(bfqq) returns | ||
3406 | * true, then bfqq's bandwidth would be violated if an | ||
3407 | * uncontrolled amount of I/O from these queues were | ||
3408 | * dispatched while bfqq is waiting for its new I/O to | ||
3409 | * arrive. This is exactly what may happen if this is a forced | ||
3410 | * expiration caused by a preemption attempt, and if bfqq is | ||
3411 | * not re-scheduled. To prevent this from happening, re-queue | ||
3412 | * bfqq if it needs I/O-dispatch plugging, even if it is | ||
3413 | * empty. By doing so, bfqq is granted to be served before the | ||
3414 | * above queues (provided that bfqq is of course eligible). | ||
3415 | */ | ||
3416 | if (RB_EMPTY_ROOT(&bfqq->sort_list) && | ||
3417 | !(reason == BFQQE_PREEMPTED && | ||
3418 | idling_needed_for_service_guarantees(bfqd, bfqq))) { | ||
3225 | if (bfqq->dispatched == 0) | 3419 | if (bfqq->dispatched == 0) |
3226 | /* | 3420 | /* |
3227 | * Overloading budget_timeout field to store | 3421 | * Overloading budget_timeout field to store |
@@ -3238,7 +3432,8 @@ static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
3238 | * Resort priority tree of potential close cooperators. | 3432 | * Resort priority tree of potential close cooperators. |
3239 | * See comments on bfq_pos_tree_add_move() for the unlikely(). | 3433 | * See comments on bfq_pos_tree_add_move() for the unlikely(). |
3240 | */ | 3434 | */ |
3241 | if (unlikely(!bfqd->nonrot_with_queueing)) | 3435 | if (unlikely(!bfqd->nonrot_with_queueing && |
3436 | !RB_EMPTY_ROOT(&bfqq->sort_list))) | ||
3242 | bfq_pos_tree_add_move(bfqd, bfqq); | 3437 | bfq_pos_tree_add_move(bfqd, bfqq); |
3243 | } | 3438 | } |
3244 | 3439 | ||
@@ -3739,7 +3934,7 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, | |||
3739 | * reason. | 3934 | * reason. |
3740 | */ | 3935 | */ |
3741 | __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); | 3936 | __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); |
3742 | if (__bfq_bfqq_expire(bfqd, bfqq)) | 3937 | if (__bfq_bfqq_expire(bfqd, bfqq, reason)) |
3743 | /* bfqq is gone, no more actions on it */ | 3938 | /* bfqq is gone, no more actions on it */ |
3744 | return; | 3939 | return; |
3745 | 3940 | ||
@@ -3886,184 +4081,6 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, | |||
3886 | } | 4081 | } |
3887 | 4082 | ||
3888 | /* | 4083 | /* |
3889 | * There is a case where idling does not have to be performed for | ||
3890 | * throughput concerns, but to preserve the throughput share of | ||
3891 | * the process associated with bfqq. | ||
3892 | * | ||
3893 | * To introduce this case, we can note that allowing the drive | ||
3894 | * to enqueue more than one request at a time, and hence | ||
3895 | * delegating de facto final scheduling decisions to the | ||
3896 | * drive's internal scheduler, entails loss of control on the | ||
3897 | * actual request service order. In particular, the critical | ||
3898 | * situation is when requests from different processes happen | ||
3899 | * to be present, at the same time, in the internal queue(s) | ||
3900 | * of the drive. In such a situation, the drive, by deciding | ||
3901 | * the service order of the internally-queued requests, does | ||
3902 | * determine also the actual throughput distribution among | ||
3903 | * these processes. But the drive typically has no notion or | ||
3904 | * concern about per-process throughput distribution, and | ||
3905 | * makes its decisions only on a per-request basis. Therefore, | ||
3906 | * the service distribution enforced by the drive's internal | ||
3907 | * scheduler is likely to coincide with the desired throughput | ||
3908 | * distribution only in a completely symmetric, or favorably | ||
3909 | * skewed scenario where: | ||
3910 | * (i-a) each of these processes must get the same throughput as | ||
3911 | * the others, | ||
3912 | * (i-b) in case (i-a) does not hold, it holds that the process | ||
3913 | * associated with bfqq must receive a lower or equal | ||
3914 | * throughput than any of the other processes; | ||
3915 | * (ii) the I/O of each process has the same properties, in | ||
3916 | * terms of locality (sequential or random), direction | ||
3917 | * (reads or writes), request sizes, greediness | ||
3918 | * (from I/O-bound to sporadic), and so on; | ||
3919 | |||
3920 | * In fact, in such a scenario, the drive tends to treat the requests | ||
3921 | * of each process in about the same way as the requests of the | ||
3922 | * others, and thus to provide each of these processes with about the | ||
3923 | * same throughput. This is exactly the desired throughput | ||
3924 | * distribution if (i-a) holds, or, if (i-b) holds instead, this is an | ||
3925 | * even more convenient distribution for (the process associated with) | ||
3926 | * bfqq. | ||
3927 | * | ||
3928 | * In contrast, in any asymmetric or unfavorable scenario, device | ||
3929 | * idling (I/O-dispatch plugging) is certainly needed to guarantee | ||
3930 | * that bfqq receives its assigned fraction of the device throughput | ||
3931 | * (see [1] for details). | ||
3932 | * | ||
3933 | * The problem is that idling may significantly reduce throughput with | ||
3934 | * certain combinations of types of I/O and devices. An important | ||
3935 | * example is sync random I/O on flash storage with command | ||
3936 | * queueing. So, unless bfqq falls in cases where idling also boosts | ||
3937 | * throughput, it is important to check conditions (i-a), i(-b) and | ||
3938 | * (ii) accurately, so as to avoid idling when not strictly needed for | ||
3939 | * service guarantees. | ||
3940 | * | ||
3941 | * Unfortunately, it is extremely difficult to thoroughly check | ||
3942 | * condition (ii). And, in case there are active groups, it becomes | ||
3943 | * very difficult to check conditions (i-a) and (i-b) too. In fact, | ||
3944 | * if there are active groups, then, for conditions (i-a) or (i-b) to | ||
3945 | * become false 'indirectly', it is enough that an active group | ||
3946 | * contains more active processes or sub-groups than some other active | ||
3947 | * group. More precisely, for conditions (i-a) or (i-b) to become | ||
3948 | * false because of such a group, it is not even necessary that the | ||
3949 | * group is (still) active: it is sufficient that, even if the group | ||
3950 | * has become inactive, some of its descendant processes still have | ||
3951 | * some request already dispatched but still waiting for | ||
3952 | * completion. In fact, requests have still to be guaranteed their | ||
3953 | * share of the throughput even after being dispatched. In this | ||
3954 | * respect, it is easy to show that, if a group frequently becomes | ||
3955 | * inactive while still having in-flight requests, and if, when this | ||
3956 | * happens, the group is not considered in the calculation of whether | ||
3957 | * the scenario is asymmetric, then the group may fail to be | ||
3958 | * guaranteed its fair share of the throughput (basically because | ||
3959 | * idling may not be performed for the descendant processes of the | ||
3960 | * group, but it had to be). We address this issue with the following | ||
3961 | * bi-modal behavior, implemented in the function | ||
3962 | * bfq_asymmetric_scenario(). | ||
3963 | * | ||
3964 | * If there are groups with requests waiting for completion | ||
3965 | * (as commented above, some of these groups may even be | ||
3966 | * already inactive), then the scenario is tagged as | ||
3967 | * asymmetric, conservatively, without checking any of the | ||
3968 | * conditions (i-a), (i-b) or (ii). So the device is idled for bfqq. | ||
3969 | * This behavior matches also the fact that groups are created | ||
3970 | * exactly if controlling I/O is a primary concern (to | ||
3971 | * preserve bandwidth and latency guarantees). | ||
3972 | * | ||
3973 | * On the opposite end, if there are no groups with requests waiting | ||
3974 | * for completion, then only conditions (i-a) and (i-b) are actually | ||
3975 | * controlled, i.e., provided that conditions (i-a) or (i-b) holds, | ||
3976 | * idling is not performed, regardless of whether condition (ii) | ||
3977 | * holds. In other words, only if conditions (i-a) and (i-b) do not | ||
3978 | * hold, then idling is allowed, and the device tends to be prevented | ||
3979 | * from queueing many requests, possibly of several processes. Since | ||
3980 | * there are no groups with requests waiting for completion, then, to | ||
3981 | * control conditions (i-a) and (i-b) it is enough to check just | ||
3982 | * whether all the queues with requests waiting for completion also | ||
3983 | * have the same weight. | ||
3984 | * | ||
3985 | * Not checking condition (ii) evidently exposes bfqq to the | ||
3986 | * risk of getting less throughput than its fair share. | ||
3987 | * However, for queues with the same weight, a further | ||
3988 | * mechanism, preemption, mitigates or even eliminates this | ||
3989 | * problem. And it does so without consequences on overall | ||
3990 | * throughput. This mechanism and its benefits are explained | ||
3991 | * in the next three paragraphs. | ||
3992 | * | ||
3993 | * Even if a queue, say Q, is expired when it remains idle, Q | ||
3994 | * can still preempt the new in-service queue if the next | ||
3995 | * request of Q arrives soon (see the comments on | ||
3996 | * bfq_bfqq_update_budg_for_activation). If all queues and | ||
3997 | * groups have the same weight, this form of preemption, | ||
3998 | * combined with the hole-recovery heuristic described in the | ||
3999 | * comments on function bfq_bfqq_update_budg_for_activation, | ||
4000 | * are enough to preserve a correct bandwidth distribution in | ||
4001 | * the mid term, even without idling. In fact, even if not | ||
4002 | * idling allows the internal queues of the device to contain | ||
4003 | * many requests, and thus to reorder requests, we can rather | ||
4004 | * safely assume that the internal scheduler still preserves a | ||
4005 | * minimum of mid-term fairness. | ||
4006 | * | ||
4007 | * More precisely, this preemption-based, idleless approach | ||
4008 | * provides fairness in terms of IOPS, and not sectors per | ||
4009 | * second. This can be seen with a simple example. Suppose | ||
4010 | * that there are two queues with the same weight, but that | ||
4011 | * the first queue receives requests of 8 sectors, while the | ||
4012 | * second queue receives requests of 1024 sectors. In | ||
4013 | * addition, suppose that each of the two queues contains at | ||
4014 | * most one request at a time, which implies that each queue | ||
4015 | * always remains idle after it is served. Finally, after | ||
4016 | * remaining idle, each queue receives very quickly a new | ||
4017 | * request. It follows that the two queues are served | ||
4018 | * alternatively, preempting each other if needed. This | ||
4019 | * implies that, although both queues have the same weight, | ||
4020 | * the queue with large requests receives a service that is | ||
4021 | * 1024/8 times as high as the service received by the other | ||
4022 | * queue. | ||
4023 | * | ||
4024 | * The motivation for using preemption instead of idling (for | ||
4025 | * queues with the same weight) is that, by not idling, | ||
4026 | * service guarantees are preserved (completely or at least in | ||
4027 | * part) without minimally sacrificing throughput. And, if | ||
4028 | * there is no active group, then the primary expectation for | ||
4029 | * this device is probably a high throughput. | ||
4030 | * | ||
4031 | * We are now left only with explaining the additional | ||
4032 | * compound condition that is checked below for deciding | ||
4033 | * whether the scenario is asymmetric. To explain this | ||
4034 | * compound condition, we need to add that the function | ||
4035 | * bfq_asymmetric_scenario checks the weights of only | ||
4036 | * non-weight-raised queues, for efficiency reasons (see | ||
4037 | * comments on bfq_weights_tree_add()). Then the fact that | ||
4038 | * bfqq is weight-raised is checked explicitly here. More | ||
4039 | * precisely, the compound condition below takes into account | ||
4040 | * also the fact that, even if bfqq is being weight-raised, | ||
4041 | * the scenario is still symmetric if all queues with requests | ||
4042 | * waiting for completion happen to be | ||
4043 | * weight-raised. Actually, we should be even more precise | ||
4044 | * here, and differentiate between interactive weight raising | ||
4045 | * and soft real-time weight raising. | ||
4046 | * | ||
4047 | * As a side note, it is worth considering that the above | ||
4048 | * device-idling countermeasures may however fail in the | ||
4049 | * following unlucky scenario: if idling is (correctly) | ||
4050 | * disabled in a time period during which all symmetry | ||
4051 | * sub-conditions hold, and hence the device is allowed to | ||
4052 | * enqueue many requests, but at some later point in time some | ||
4053 | * sub-condition stops to hold, then it may become impossible | ||
4054 | * to let requests be served in the desired order until all | ||
4055 | * the requests already queued in the device have been served. | ||
4056 | */ | ||
4057 | static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, | ||
4058 | struct bfq_queue *bfqq) | ||
4059 | { | ||
4060 | return (bfqq->wr_coeff > 1 && | ||
4061 | bfqd->wr_busy_queues < | ||
4062 | bfq_tot_busy_queues(bfqd)) || | ||
4063 | bfq_asymmetric_scenario(bfqd, bfqq); | ||
4064 | } | ||
4065 | |||
4066 | /* | ||
4067 | * For a queue that becomes empty, device idling is allowed only if | 4084 | * For a queue that becomes empty, device idling is allowed only if |
4068 | * this function returns true for that queue. As a consequence, since | 4085 | * this function returns true for that queue. As a consequence, since |
4069 | * device idling plays a critical role for both throughput boosting | 4086 | * device idling plays a critical role for both throughput boosting |
@@ -4321,7 +4338,8 @@ check_queue: | |||
4321 | (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { | 4338 | (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { |
4322 | struct bfq_queue *async_bfqq = | 4339 | struct bfq_queue *async_bfqq = |
4323 | bfqq->bic && bfqq->bic->bfqq[0] && | 4340 | bfqq->bic && bfqq->bic->bfqq[0] && |
4324 | bfq_bfqq_busy(bfqq->bic->bfqq[0]) ? | 4341 | bfq_bfqq_busy(bfqq->bic->bfqq[0]) && |
4342 | bfqq->bic->bfqq[0]->next_rq ? | ||
4325 | bfqq->bic->bfqq[0] : NULL; | 4343 | bfqq->bic->bfqq[0] : NULL; |
4326 | 4344 | ||
4327 | /* | 4345 | /* |
@@ -4403,6 +4421,7 @@ check_queue: | |||
4403 | bfqq = bfqq->bic->bfqq[0]; | 4421 | bfqq = bfqq->bic->bfqq[0]; |
4404 | else if (bfq_bfqq_has_waker(bfqq) && | 4422 | else if (bfq_bfqq_has_waker(bfqq) && |
4405 | bfq_bfqq_busy(bfqq->waker_bfqq) && | 4423 | bfq_bfqq_busy(bfqq->waker_bfqq) && |
4424 | bfqq->next_rq && | ||
4406 | bfq_serv_to_charge(bfqq->waker_bfqq->next_rq, | 4425 | bfq_serv_to_charge(bfqq->waker_bfqq->next_rq, |
4407 | bfqq->waker_bfqq) <= | 4426 | bfqq->waker_bfqq) <= |
4408 | bfq_bfqq_budget_left(bfqq->waker_bfqq) | 4427 | bfq_bfqq_budget_left(bfqq->waker_bfqq) |
@@ -4800,7 +4819,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
4800 | struct hlist_node *n; | 4819 | struct hlist_node *n; |
4801 | 4820 | ||
4802 | if (bfqq == bfqd->in_service_queue) { | 4821 | if (bfqq == bfqd->in_service_queue) { |
4803 | __bfq_bfqq_expire(bfqd, bfqq); | 4822 | __bfq_bfqq_expire(bfqd, bfqq, BFQQE_BUDGET_TIMEOUT); |
4804 | bfq_schedule_dispatch(bfqd); | 4823 | bfq_schedule_dispatch(bfqd); |
4805 | } | 4824 | } |
4806 | 4825 | ||