summaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-05-11 11:57:24 -0400
committerIngo Molnar <mingo@kernel.org>2017-09-29 13:35:16 -0400
commit144d8487bc6e9b741895709cb46d4e19b748a725 (patch)
tree00e02dd5dfbfa99e3be67ed6e2015bf60b7bed2f /kernel/sched/fair.c
parent1ea6c46a23f1213d1972bfae220db5c165e27bba (diff)
sched/fair: Implement synchonous PELT detach on load-balance migrate
Vincent wondered why his self migrating task had a roughly 50% dip in load_avg when landing on the new CPU. This is because we uncondionally take the asynchronous detatch_entity route, which can lead to the attach on the new CPU still seeing the old CPU's contribution to tg->load_avg, effectively halving the new CPU's shares. While in general this is something we have to live with, there is the special case of runnable migration where we can do better. Tested-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c33
1 files changed, 21 insertions, 12 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 10d2000fca2d..92dbcc0fea46 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3746,10 +3746,6 @@ void remove_entity_load_avg(struct sched_entity *se)
3746 * Similarly for groups, they will have passed through 3746 * Similarly for groups, they will have passed through
3747 * post_init_entity_util_avg() before unregister_sched_fair_group() 3747 * post_init_entity_util_avg() before unregister_sched_fair_group()
3748 * calls this. 3748 * calls this.
3749 *
3750 * XXX in case entity_is_task(se) && task_of(se)->on_rq == MIGRATING
3751 * we could actually get the right time, since we're called with
3752 * rq->lock held, see detach_task().
3753 */ 3749 */
3754 3750
3755 sync_entity_load_avg(se); 3751 sync_entity_load_avg(se);
@@ -6292,6 +6288,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
6292 return new_cpu; 6288 return new_cpu;
6293} 6289}
6294 6290
6291static void detach_entity_cfs_rq(struct sched_entity *se);
6292
6295/* 6293/*
6296 * Called immediately before a task is migrated to a new cpu; task_cpu(p) and 6294 * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
6297 * cfs_rq_of(p) references at time of call are still valid and identify the 6295 * cfs_rq_of(p) references at time of call are still valid and identify the
@@ -6325,14 +6323,25 @@ static void migrate_task_rq_fair(struct task_struct *p)
6325 se->vruntime -= min_vruntime; 6323 se->vruntime -= min_vruntime;
6326 } 6324 }
6327 6325
6328 /* 6326 if (p->on_rq == TASK_ON_RQ_MIGRATING) {
6329 * We are supposed to update the task to "current" time, then its up to date 6327 /*
6330 * and ready to go to new CPU/cfs_rq. But we have difficulty in getting 6328 * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
6331 * what current time is, so simply throw away the out-of-date time. This 6329 * rq->lock and can modify state directly.
6332 * will result in the wakee task is less decayed, but giving the wakee more 6330 */
6333 * load sounds not bad. 6331 lockdep_assert_held(&task_rq(p)->lock);
6334 */ 6332 detach_entity_cfs_rq(&p->se);
6335 remove_entity_load_avg(&p->se); 6333
6334 } else {
6335 /*
6336 * We are supposed to update the task to "current" time, then
6337 * its up to date and ready to go to new CPU/cfs_rq. But we
6338 * have difficulty in getting what current time is, so simply
6339 * throw away the out-of-date time. This will result in the
6340 * wakee task is less decayed, but giving the wakee more load
6341 * sounds not bad.
6342 */
6343 remove_entity_load_avg(&p->se);
6344 }
6336 6345
6337 /* Tell new CPU we are migrated */ 6346 /* Tell new CPU we are migrated */
6338 p->se.avg.last_update_time = 0; 6347 p->se.avg.last_update_time = 0;