aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-05-11 13:27:56 -0400
committerIngo Molnar <mingo@kernel.org>2016-05-12 03:55:32 -0400
commit2f950354e6d535b892f133d20bd6a8b09430424c (patch)
tree6b83142e85d46aa0375a0e9b0907cced8e3420de
parent59efa0bac9cf8b2ef8d08f7632826c6d90f6a9bb (diff)
sched/fair: Fix fairness issue on migration
Pavan reported that in the presence of very light tasks (or cgroups) the placement of migrated tasks can cause severe fairness issues. The problem is that enqueue_entity() places the task before it updates time, thereby it can place the task far in the past (remember that light tasks will shoot virtual time forward at a high speed, so in relation to the pre-existing light task, we can land far in the past). This is done because update_curr() needs the current task, and we might be placing the current task. The obvious solution is to differentiate between the current and any other task; placing the current before we update time, and placing any other task after, such that !curr tasks end up at the current moment in time, and not in the past. This commit re-introduces the previously reverted commit: 3a47d5124a95 ("sched/fair: Fix fairness issue on migration") ... which is now safe to do, after we've also fixed another underlying bug first, in: sched/fair: Prepare to fix fairness problems on migration and cleaned up other details in the migration code: sched/core: Kill sched_class::task_waking Reported-by: Pavan Kondeti <pkondeti@codeaurora.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/fair.c22
1 files changed, 16 insertions, 6 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 24ce01b73906..d28d89d774aa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3288,17 +3288,27 @@ static inline void check_schedstat_required(void)
3288static void 3288static void
3289enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) 3289enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
3290{ 3290{
3291 bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
3292 bool curr = cfs_rq->curr == se;
3293
3291 /* 3294 /*
3292 * Update the normalized vruntime before updating min_vruntime 3295 * If we're the current task, we must renormalise before calling
3293 * through calling update_curr(). 3296 * update_curr().
3294 */ 3297 */
3295 if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED)) 3298 if (renorm && curr)
3296 se->vruntime += cfs_rq->min_vruntime; 3299 se->vruntime += cfs_rq->min_vruntime;
3297 3300
3301 update_curr(cfs_rq);
3302
3298 /* 3303 /*
3299 * Update run-time statistics of the 'current'. 3304 * Otherwise, renormalise after, such that we're placed at the current
3305 * moment in time, instead of some random moment in the past. Being
3306 * placed in the past could significantly boost this task to the
3307 * fairness detriment of existing tasks.
3300 */ 3308 */
3301 update_curr(cfs_rq); 3309 if (renorm && !curr)
3310 se->vruntime += cfs_rq->min_vruntime;
3311
3302 enqueue_entity_load_avg(cfs_rq, se); 3312 enqueue_entity_load_avg(cfs_rq, se);
3303 account_entity_enqueue(cfs_rq, se); 3313 account_entity_enqueue(cfs_rq, se);
3304 update_cfs_shares(cfs_rq); 3314 update_cfs_shares(cfs_rq);
@@ -3314,7 +3324,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
3314 update_stats_enqueue(cfs_rq, se); 3324 update_stats_enqueue(cfs_rq, se);
3315 check_spread(cfs_rq, se); 3325 check_spread(cfs_rq, se);
3316 } 3326 }
3317 if (se != cfs_rq->curr) 3327 if (!curr)
3318 __enqueue_entity(cfs_rq, se); 3328 __enqueue_entity(cfs_rq, se);
3319 se->on_rq = 1; 3329 se->on_rq = 1;
3320 3330