diff options
author | Mike Galbraith <efault@gmx.de> | 2009-03-10 14:08:11 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-03-11 06:31:50 -0400 |
commit | df1c99d416500da8d26a4d78777467c53ee7689e (patch) | |
tree | 4f61ea769c43bfc985b760649d2c4ba5089ff608 | |
parent | 57310a98a354e84279d7c8af2f48805a62372e53 (diff) |
sched: add avg_overlap decay
Impact: more precise avg_overlap metric - better load-balancing
avg_overlap is used to measure the runtime overlap of the waker and
wakee.
However, when a process changes behaviour, eg a pipe becomes
un-congested and we don't need to go to sleep after a wakeup
for a while, the avg_overlap value grows stale.
When running we use the avg runtime between preemption as a
measure for avg_overlap since the amount of runtime can be
correlated to cache footprint.
The longer we run, the less likely we'll be wanting to be
migrated to another CPU.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1236709131.25234.576.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index af5cd1b2d03e..2f28351892c9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4620,6 +4620,28 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4620 | #endif | 4620 | #endif |
4621 | } | 4621 | } |
4622 | 4622 | ||
4623 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | ||
4624 | { | ||
4625 | if (prev->state == TASK_RUNNING) { | ||
4626 | u64 runtime = prev->se.sum_exec_runtime; | ||
4627 | |||
4628 | runtime -= prev->se.prev_sum_exec_runtime; | ||
4629 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
4630 | |||
4631 | /* | ||
4632 | * In order to avoid avg_overlap growing stale when we are | ||
4633 | * indeed overlapping and hence not getting put to sleep, grow | ||
4634 | * the avg_overlap on preemption. | ||
4635 | * | ||
4636 | * We use the average preemption runtime because that | ||
4637 | * correlates to the amount of cache footprint a task can | ||
4638 | * build up. | ||
4639 | */ | ||
4640 | update_avg(&prev->se.avg_overlap, runtime); | ||
4641 | } | ||
4642 | prev->sched_class->put_prev_task(rq, prev); | ||
4643 | } | ||
4644 | |||
4623 | /* | 4645 | /* |
4624 | * Pick up the highest-prio task: | 4646 | * Pick up the highest-prio task: |
4625 | */ | 4647 | */ |
@@ -4698,7 +4720,7 @@ need_resched_nonpreemptible: | |||
4698 | if (unlikely(!rq->nr_running)) | 4720 | if (unlikely(!rq->nr_running)) |
4699 | idle_balance(cpu, rq); | 4721 | idle_balance(cpu, rq); |
4700 | 4722 | ||
4701 | prev->sched_class->put_prev_task(rq, prev); | 4723 | put_prev_task(rq, prev); |
4702 | next = pick_next_task(rq); | 4724 | next = pick_next_task(rq); |
4703 | 4725 | ||
4704 | if (likely(prev != next)) { | 4726 | if (likely(prev != next)) { |