diff options
author | Alex Shi <alex.shi@intel.com> | 2013-06-19 22:18:47 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-06-27 04:07:30 -0400 |
commit | a75cdaa915e42ef0e6f38dc7f2a6a1deca91d648 (patch) | |
tree | fe4e17990b233e5998b8372ce5be39cb9ff32a68 /kernel/sched | |
parent | fa6bddeb14d59d701f846b174b59c9982e926e66 (diff) |
sched: Set an initial value of runnable avg for new forked task
We need to initialize the se.avg.{decay_count, load_avg_contrib} for a
new forked task. Otherwise random values of above variables cause a
mess when a new task is enqueued:
enqueue_task_fair
enqueue_entity
enqueue_entity_load_avg
and make fork balancing imbalance due to incorrect load_avg_contrib.
Further more, Morten Rasmussen notice some tasks were not launched at
once after created. So Paul and Peter suggest giving a start value for
new task runnable avg time same as sched_slice().
PeterZ said:
> So the 'problem' is that our running avg is a 'floating' average; ie. it
> decays with time. Now we have to guess about the future of our newly
> spawned task -- something that is nigh impossible seeing these CPU
> vendors keep refusing to implement the crystal ball instruction.
>
> So there's two asymptotic cases we want to deal well with; 1) the case
> where the newly spawned program will be 'nearly' idle for its lifetime;
> and 2) the case where its cpu-bound.
>
> Since we have to guess, we'll go for worst case and assume its
> cpu-bound; now we don't want to make the avg so heavy adjusting to the
> near-idle case takes forever. We want to be able to quickly adjust and
> lower our running avg.
>
> Now we also don't want to make our avg too light, such that it gets
> decremented just for the new task not having had a chance to run yet --
> even if when it would run, it would be more cpu-bound than not.
>
> So what we do is we make the initial avg of the same duration as that we
> guess it takes to run each task on the system at least once -- aka
> sched_slice().
>
> Of course we can defeat this with wakeup/fork bombs, but in the 'normal'
> case it should be good enough.
Paul also contributed most of the code comments in this commit.
Signed-off-by: Alex Shi <alex.shi@intel.com>
Reviewed-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Reviewed-by: Paul Turner <pjt@google.com>
[peterz; added explanation of sched_slice() usage]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1371694737-29336-4-git-send-email-alex.shi@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 6 | ||||
-rw-r--r-- | kernel/sched/fair.c | 24 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 |
3 files changed, 28 insertions, 4 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0241b1b55a04..729e7fc7634b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1611,10 +1611,6 @@ static void __sched_fork(struct task_struct *p) | |||
1611 | p->se.vruntime = 0; | 1611 | p->se.vruntime = 0; |
1612 | INIT_LIST_HEAD(&p->se.group_node); | 1612 | INIT_LIST_HEAD(&p->se.group_node); |
1613 | 1613 | ||
1614 | #ifdef CONFIG_SMP | ||
1615 | p->se.avg.runnable_avg_period = 0; | ||
1616 | p->se.avg.runnable_avg_sum = 0; | ||
1617 | #endif | ||
1618 | #ifdef CONFIG_SCHEDSTATS | 1614 | #ifdef CONFIG_SCHEDSTATS |
1619 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 1615 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
1620 | #endif | 1616 | #endif |
@@ -1758,6 +1754,8 @@ void wake_up_new_task(struct task_struct *p) | |||
1758 | set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); | 1754 | set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); |
1759 | #endif | 1755 | #endif |
1760 | 1756 | ||
1757 | /* Initialize new task's runnable average */ | ||
1758 | init_task_runnable_average(p); | ||
1761 | rq = __task_rq_lock(p); | 1759 | rq = __task_rq_lock(p); |
1762 | activate_task(rq, p, 0); | 1760 | activate_task(rq, p, 0); |
1763 | p->on_rq = 1; | 1761 | p->on_rq = 1; |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 36eadaaa4e5b..e1602a0fdbf8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -680,6 +680,26 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
680 | return calc_delta_fair(sched_slice(cfs_rq, se), se); | 680 | return calc_delta_fair(sched_slice(cfs_rq, se), se); |
681 | } | 681 | } |
682 | 682 | ||
683 | #ifdef CONFIG_SMP | ||
684 | static inline void __update_task_entity_contrib(struct sched_entity *se); | ||
685 | |||
686 | /* Give new task start runnable values to heavy its load in infant time */ | ||
687 | void init_task_runnable_average(struct task_struct *p) | ||
688 | { | ||
689 | u32 slice; | ||
690 | |||
691 | p->se.avg.decay_count = 0; | ||
692 | slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; | ||
693 | p->se.avg.runnable_avg_sum = slice; | ||
694 | p->se.avg.runnable_avg_period = slice; | ||
695 | __update_task_entity_contrib(&p->se); | ||
696 | } | ||
697 | #else | ||
698 | void init_task_runnable_average(struct task_struct *p) | ||
699 | { | ||
700 | } | ||
701 | #endif | ||
702 | |||
683 | /* | 703 | /* |
684 | * Update the current task's runtime statistics. Skip current tasks that | 704 | * Update the current task's runtime statistics. Skip current tasks that |
685 | * are not in our scheduling class. | 705 | * are not in our scheduling class. |
@@ -1527,6 +1547,10 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, | |||
1527 | * We track migrations using entity decay_count <= 0, on a wake-up | 1547 | * We track migrations using entity decay_count <= 0, on a wake-up |
1528 | * migration we use a negative decay count to track the remote decays | 1548 | * migration we use a negative decay count to track the remote decays |
1529 | * accumulated while sleeping. | 1549 | * accumulated while sleeping. |
1550 | * | ||
1551 | * Newly forked tasks are enqueued with se->avg.decay_count == 0, they | ||
1552 | * are seen by enqueue_entity_load_avg() as a migration with an already | ||
1553 | * constructed load_avg_contrib. | ||
1530 | */ | 1554 | */ |
1531 | if (unlikely(se->avg.decay_count <= 0)) { | 1555 | if (unlikely(se->avg.decay_count <= 0)) { |
1532 | se->avg.last_runnable_update = rq_clock_task(rq_of(cfs_rq)); | 1556 | se->avg.last_runnable_update = rq_clock_task(rq_of(cfs_rq)); |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 31d25f80a7c6..9c65d46504b1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1048,6 +1048,8 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime | |||
1048 | 1048 | ||
1049 | extern void update_idle_cpu_load(struct rq *this_rq); | 1049 | extern void update_idle_cpu_load(struct rq *this_rq); |
1050 | 1050 | ||
1051 | extern void init_task_runnable_average(struct task_struct *p); | ||
1052 | |||
1051 | #ifdef CONFIG_PARAVIRT | 1053 | #ifdef CONFIG_PARAVIRT |
1052 | static inline u64 steal_ticks(u64 steal) | 1054 | static inline u64 steal_ticks(u64 steal) |
1053 | { | 1055 | { |