diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-09-20 15:58:12 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-09-30 05:03:29 -0400 |
commit | b60205c7c558330e4e2b5df498355ec959457358 (patch) | |
tree | 3d54676c0af39e738ea7dded622a1685c0fc483a | |
parent | 9148a3a10e0b74c5722174a0bbef16d821f8a48b (diff) |
sched/fair: Fix min_vruntime tracking
While going through enqueue/dequeue to review the movement of
set_curr_task() I noticed that the (2nd) update_min_vruntime() call in
dequeue_entity() is suspect.
It turns out, its actually wrong because it will consider
cfs_rq->curr, which could be the entry we just normalized. This mixes
different vruntime forms and leads to fail.
The purpose of the second update_min_vruntime() is to move
min_vruntime forward if the entity we just removed is the one that was
holding it back; _except_ for the DEQUEUE_SAVE case, because then we
know its a temporary removal and it will come back.
However, since we do put_prev_task() _after_ dequeue(), cfs_rq->curr
will still be set (and per the above, can be tranformed into a
different unit), so update_min_vruntime() should also consider
curr->on_rq. This also fixes another corner case where the enqueue
(which also does update_curr()->update_min_vruntime()) happens on the
rq->lock break in schedule(), between dequeue and put_prev_task.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: 1e876231785d ("sched: Fix ->min_vruntime calculation in dequeue_entity()")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/fair.c | 29 |
1 files changed, 22 insertions, 7 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a6789485fcae..543b2f291152 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -460,17 +460,23 @@ static inline int entity_before(struct sched_entity *a, | |||
460 | 460 | ||
461 | static void update_min_vruntime(struct cfs_rq *cfs_rq) | 461 | static void update_min_vruntime(struct cfs_rq *cfs_rq) |
462 | { | 462 | { |
463 | struct sched_entity *curr = cfs_rq->curr; | ||
464 | |||
463 | u64 vruntime = cfs_rq->min_vruntime; | 465 | u64 vruntime = cfs_rq->min_vruntime; |
464 | 466 | ||
465 | if (cfs_rq->curr) | 467 | if (curr) { |
466 | vruntime = cfs_rq->curr->vruntime; | 468 | if (curr->on_rq) |
469 | vruntime = curr->vruntime; | ||
470 | else | ||
471 | curr = NULL; | ||
472 | } | ||
467 | 473 | ||
468 | if (cfs_rq->rb_leftmost) { | 474 | if (cfs_rq->rb_leftmost) { |
469 | struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, | 475 | struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, |
470 | struct sched_entity, | 476 | struct sched_entity, |
471 | run_node); | 477 | run_node); |
472 | 478 | ||
473 | if (!cfs_rq->curr) | 479 | if (!curr) |
474 | vruntime = se->vruntime; | 480 | vruntime = se->vruntime; |
475 | else | 481 | else |
476 | vruntime = min_vruntime(vruntime, se->vruntime); | 482 | vruntime = min_vruntime(vruntime, se->vruntime); |
@@ -3478,9 +3484,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
3478 | account_entity_dequeue(cfs_rq, se); | 3484 | account_entity_dequeue(cfs_rq, se); |
3479 | 3485 | ||
3480 | /* | 3486 | /* |
3481 | * Normalize the entity after updating the min_vruntime because the | 3487 | * Normalize after update_curr(); which will also have moved |
3482 | * update can refer to the ->curr item and we need to reflect this | 3488 | * min_vruntime if @se is the one holding it back. But before doing |
3483 | * movement in our normalized position. | 3489 | * update_min_vruntime() again, which will discount @se's position and |
3490 | * can move min_vruntime forward still more. | ||
3484 | */ | 3491 | */ |
3485 | if (!(flags & DEQUEUE_SLEEP)) | 3492 | if (!(flags & DEQUEUE_SLEEP)) |
3486 | se->vruntime -= cfs_rq->min_vruntime; | 3493 | se->vruntime -= cfs_rq->min_vruntime; |
@@ -3488,8 +3495,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
3488 | /* return excess runtime on last dequeue */ | 3495 | /* return excess runtime on last dequeue */ |
3489 | return_cfs_rq_runtime(cfs_rq); | 3496 | return_cfs_rq_runtime(cfs_rq); |
3490 | 3497 | ||
3491 | update_min_vruntime(cfs_rq); | ||
3492 | update_cfs_shares(cfs_rq); | 3498 | update_cfs_shares(cfs_rq); |
3499 | |||
3500 | /* | ||
3501 | * Now advance min_vruntime if @se was the entity holding it back, | ||
3502 | * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be | ||
3503 | * put back on, and if we advance min_vruntime, we'll be placed back | ||
3504 | * further than we started -- ie. we'll be penalized. | ||
3505 | */ | ||
3506 | if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE) | ||
3507 | update_min_vruntime(cfs_rq); | ||
3493 | } | 3508 | } |
3494 | 3509 | ||
3495 | /* | 3510 | /* |