diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 102 |
1 files changed, 73 insertions, 29 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4037e19bbca2..2fe3aa853e4d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -3413,9 +3413,9 @@ void set_task_rq_fair(struct sched_entity *se, | |||
3413 | * _IFF_ we look at the pure running and runnable sums. Because they | 3413 | * _IFF_ we look at the pure running and runnable sums. Because they |
3414 | * represent the very same entity, just at different points in the hierarchy. | 3414 | * represent the very same entity, just at different points in the hierarchy. |
3415 | * | 3415 | * |
3416 | * | 3416 | * Per the above update_tg_cfs_util() is trivial and simply copies the running |
3417 | * Per the above update_tg_cfs_util() is trivial (and still 'wrong') and | 3417 | * sum over (but still wrong, because the group entity and group rq do not have |
3418 | * simply copies the running sum over. | 3418 | * their PELT windows aligned). |
3419 | * | 3419 | * |
3420 | * However, update_tg_cfs_runnable() is more complex. So we have: | 3420 | * However, update_tg_cfs_runnable() is more complex. So we have: |
3421 | * | 3421 | * |
@@ -3424,11 +3424,11 @@ void set_task_rq_fair(struct sched_entity *se, | |||
3424 | * And since, like util, the runnable part should be directly transferable, | 3424 | * And since, like util, the runnable part should be directly transferable, |
3425 | * the following would _appear_ to be the straight forward approach: | 3425 | * the following would _appear_ to be the straight forward approach: |
3426 | * | 3426 | * |
3427 | * grq->avg.load_avg = grq->load.weight * grq->avg.running_avg (3) | 3427 | * grq->avg.load_avg = grq->load.weight * grq->avg.runnable_avg (3) |
3428 | * | 3428 | * |
3429 | * And per (1) we have: | 3429 | * And per (1) we have: |
3430 | * | 3430 | * |
3431 | * ge->avg.running_avg == grq->avg.running_avg | 3431 | * ge->avg.runnable_avg == grq->avg.runnable_avg |
3432 | * | 3432 | * |
3433 | * Which gives: | 3433 | * Which gives: |
3434 | * | 3434 | * |
@@ -3447,27 +3447,28 @@ void set_task_rq_fair(struct sched_entity *se, | |||
3447 | * to (shortly) return to us. This only works by keeping the weights as | 3447 | * to (shortly) return to us. This only works by keeping the weights as |
3448 | * integral part of the sum. We therefore cannot decompose as per (3). | 3448 | * integral part of the sum. We therefore cannot decompose as per (3). |
3449 | * | 3449 | * |
3450 | * OK, so what then? | 3450 | * Another reason this doesn't work is that runnable isn't a 0-sum entity. |
3451 | * Imagine a rq with 2 tasks that each are runnable 2/3 of the time. Then the | ||
3452 | * rq itself is runnable anywhere between 2/3 and 1 depending on how the | ||
3453 | * runnable section of these tasks overlap (or not). If they were to perfectly | ||
3454 | * align the rq as a whole would be runnable 2/3 of the time. If however we | ||
3455 | * always have at least 1 runnable task, the rq as a whole is always runnable. | ||
3451 | * | 3456 | * |
3457 | * So we'll have to approximate.. :/ | ||
3452 | * | 3458 | * |
3453 | * Another way to look at things is: | 3459 | * Given the constraint: |
3454 | * | 3460 | * |
3455 | * grq->avg.load_avg = \Sum se->avg.load_avg | 3461 | * ge->avg.running_sum <= ge->avg.runnable_sum <= LOAD_AVG_MAX |
3456 | * | 3462 | * |
3457 | * Therefore, per (2): | 3463 | * We can construct a rule that adds runnable to a rq by assuming minimal |
3464 | * overlap. | ||
3458 | * | 3465 | * |
3459 | * grq->avg.load_avg = \Sum se->load.weight * se->avg.runnable_avg | 3466 | * On removal, we'll assume each task is equally runnable; which yields: |
3460 | * | 3467 | * |
3461 | * And the very thing we're propagating is a change in that sum (someone | 3468 | * grq->avg.runnable_sum = grq->avg.load_sum / grq->load.weight |
3462 | * joined/left). So we can easily know the runnable change, which would be, per | ||
3463 | * (2) the already tracked se->load_avg divided by the corresponding | ||
3464 | * se->weight. | ||
3465 | * | 3469 | * |
3466 | * Basically (4) but in differential form: | 3470 | * XXX: only do this for the part of runnable > running ? |
3467 | * | 3471 | * |
3468 | * d(runnable_avg) += se->avg.load_avg / se->load.weight | ||
3469 | * (5) | ||
3470 | * ge->avg.load_avg += ge->load.weight * d(runnable_avg) | ||
3471 | */ | 3472 | */ |
3472 | 3473 | ||
3473 | static inline void | 3474 | static inline void |
@@ -3479,6 +3480,14 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq | |||
3479 | if (!delta) | 3480 | if (!delta) |
3480 | return; | 3481 | return; |
3481 | 3482 | ||
3483 | /* | ||
3484 | * The relation between sum and avg is: | ||
3485 | * | ||
3486 | * LOAD_AVG_MAX - 1024 + sa->period_contrib | ||
3487 | * | ||
3488 | * however, the PELT windows are not aligned between grq and gse. | ||
3489 | */ | ||
3490 | |||
3482 | /* Set new sched_entity's utilization */ | 3491 | /* Set new sched_entity's utilization */ |
3483 | se->avg.util_avg = gcfs_rq->avg.util_avg; | 3492 | se->avg.util_avg = gcfs_rq->avg.util_avg; |
3484 | se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX; | 3493 | se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX; |
@@ -3491,33 +3500,68 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq | |||
3491 | static inline void | 3500 | static inline void |
3492 | update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) | 3501 | update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) |
3493 | { | 3502 | { |
3494 | long runnable_sum = gcfs_rq->prop_runnable_sum; | 3503 | long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; |
3495 | long runnable_load_avg, load_avg; | 3504 | unsigned long runnable_load_avg, load_avg; |
3496 | s64 runnable_load_sum, load_sum; | 3505 | u64 runnable_load_sum, load_sum = 0; |
3506 | s64 delta_sum; | ||
3497 | 3507 | ||
3498 | if (!runnable_sum) | 3508 | if (!runnable_sum) |
3499 | return; | 3509 | return; |
3500 | 3510 | ||
3501 | gcfs_rq->prop_runnable_sum = 0; | 3511 | gcfs_rq->prop_runnable_sum = 0; |
3502 | 3512 | ||
3513 | if (runnable_sum >= 0) { | ||
3514 | /* | ||
3515 | * Add runnable; clip at LOAD_AVG_MAX. Reflects that until | ||
3516 | * the CPU is saturated running == runnable. | ||
3517 | */ | ||
3518 | runnable_sum += se->avg.load_sum; | ||
3519 | runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX); | ||
3520 | } else { | ||
3521 | /* | ||
3522 | * Estimate the new unweighted runnable_sum of the gcfs_rq by | ||
3523 | * assuming all tasks are equally runnable. | ||
3524 | */ | ||
3525 | if (scale_load_down(gcfs_rq->load.weight)) { | ||
3526 | load_sum = div_s64(gcfs_rq->avg.load_sum, | ||
3527 | scale_load_down(gcfs_rq->load.weight)); | ||
3528 | } | ||
3529 | |||
3530 | /* But make sure to not inflate se's runnable */ | ||
3531 | runnable_sum = min(se->avg.load_sum, load_sum); | ||
3532 | } | ||
3533 | |||
3534 | /* | ||
3535 | * runnable_sum can't be lower than running_sum | ||
3536 | * As running sum is scale with cpu capacity wehreas the runnable sum | ||
3537 | * is not we rescale running_sum 1st | ||
3538 | */ | ||
3539 | running_sum = se->avg.util_sum / | ||
3540 | arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq))); | ||
3541 | runnable_sum = max(runnable_sum, running_sum); | ||
3542 | |||
3503 | load_sum = (s64)se_weight(se) * runnable_sum; | 3543 | load_sum = (s64)se_weight(se) * runnable_sum; |
3504 | load_avg = div_s64(load_sum, LOAD_AVG_MAX); | 3544 | load_avg = div_s64(load_sum, LOAD_AVG_MAX); |
3505 | 3545 | ||
3506 | add_positive(&se->avg.load_sum, runnable_sum); | 3546 | delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum; |
3507 | add_positive(&se->avg.load_avg, load_avg); | 3547 | delta_avg = load_avg - se->avg.load_avg; |
3508 | 3548 | ||
3509 | add_positive(&cfs_rq->avg.load_avg, load_avg); | 3549 | se->avg.load_sum = runnable_sum; |
3510 | add_positive(&cfs_rq->avg.load_sum, load_sum); | 3550 | se->avg.load_avg = load_avg; |
3551 | add_positive(&cfs_rq->avg.load_avg, delta_avg); | ||
3552 | add_positive(&cfs_rq->avg.load_sum, delta_sum); | ||
3511 | 3553 | ||
3512 | runnable_load_sum = (s64)se_runnable(se) * runnable_sum; | 3554 | runnable_load_sum = (s64)se_runnable(se) * runnable_sum; |
3513 | runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX); | 3555 | runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX); |
3556 | delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum; | ||
3557 | delta_avg = runnable_load_avg - se->avg.runnable_load_avg; | ||
3514 | 3558 | ||
3515 | add_positive(&se->avg.runnable_load_sum, runnable_sum); | 3559 | se->avg.runnable_load_sum = runnable_sum; |
3516 | add_positive(&se->avg.runnable_load_avg, runnable_load_avg); | 3560 | se->avg.runnable_load_avg = runnable_load_avg; |
3517 | 3561 | ||
3518 | if (se->on_rq) { | 3562 | if (se->on_rq) { |
3519 | add_positive(&cfs_rq->avg.runnable_load_avg, runnable_load_avg); | 3563 | add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg); |
3520 | add_positive(&cfs_rq->avg.runnable_load_sum, runnable_load_sum); | 3564 | add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum); |
3521 | } | 3565 | } |
3522 | } | 3566 | } |
3523 | 3567 | ||