diff options
author | Venkatesh Pallipadi <venki@google.com> | 2010-10-04 20:03:22 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-10-18 14:52:27 -0400 |
commit | aa483808516ca5cacfa0e5849691f64fec25828e (patch) | |
tree | 38a1d9f1cb1157bd68dc2475447bacbb4b63c64a | |
parent | 305e6835e05513406fa12820e40e4a8ecb63743c (diff) |
sched: Remove irq time from available CPU power
The idea was suggested by Peter Zijlstra here:
http://marc.info/?l=linux-kernel&m=127476934517534&w=2
irq time is technically not available to the tasks running on the CPU.
This patch removes irq time from CPU power piggybacking on
sched_rt_avg_update().
Tested this by keeping CPU X busy with a network intensive task having 75%
oa a single CPU irq processing (hard+soft) on a 4-way system. And start seven
cycle soakers on the system. Without this change, there will be two tasks on
each CPU. With this change, there is a single task on irq busy CPU X and
remaining 7 tasks are spread around among other 3 CPUs.
Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-8-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 18 | ||||
-rw-r--r-- | kernel/sched_fair.c | 8 | ||||
-rw-r--r-- | kernel/sched_features.h | 5 |
3 files changed, 30 insertions, 1 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 9e01b7100ef6..bff9ef537df0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -519,6 +519,10 @@ struct rq { | |||
519 | u64 avg_idle; | 519 | u64 avg_idle; |
520 | #endif | 520 | #endif |
521 | 521 | ||
522 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
523 | u64 prev_irq_time; | ||
524 | #endif | ||
525 | |||
522 | /* calc_load related fields */ | 526 | /* calc_load related fields */ |
523 | unsigned long calc_load_update; | 527 | unsigned long calc_load_update; |
524 | long calc_load_active; | 528 | long calc_load_active; |
@@ -643,6 +647,7 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
643 | #endif /* CONFIG_CGROUP_SCHED */ | 647 | #endif /* CONFIG_CGROUP_SCHED */ |
644 | 648 | ||
645 | static u64 irq_time_cpu(int cpu); | 649 | static u64 irq_time_cpu(int cpu); |
650 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
646 | 651 | ||
647 | inline void update_rq_clock(struct rq *rq) | 652 | inline void update_rq_clock(struct rq *rq) |
648 | { | 653 | { |
@@ -654,6 +659,8 @@ inline void update_rq_clock(struct rq *rq) | |||
654 | irq_time = irq_time_cpu(cpu); | 659 | irq_time = irq_time_cpu(cpu); |
655 | if (rq->clock - irq_time > rq->clock_task) | 660 | if (rq->clock - irq_time > rq->clock_task) |
656 | rq->clock_task = rq->clock - irq_time; | 661 | rq->clock_task = rq->clock - irq_time; |
662 | |||
663 | sched_irq_time_avg_update(rq, irq_time); | ||
657 | } | 664 | } |
658 | } | 665 | } |
659 | 666 | ||
@@ -1985,6 +1992,15 @@ void account_system_vtime(struct task_struct *curr) | |||
1985 | local_irq_restore(flags); | 1992 | local_irq_restore(flags); |
1986 | } | 1993 | } |
1987 | 1994 | ||
1995 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | ||
1996 | { | ||
1997 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | ||
1998 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | ||
1999 | rq->prev_irq_time = curr_irq_time; | ||
2000 | sched_rt_avg_update(rq, delta_irq); | ||
2001 | } | ||
2002 | } | ||
2003 | |||
1988 | #else | 2004 | #else |
1989 | 2005 | ||
1990 | static u64 irq_time_cpu(int cpu) | 2006 | static u64 irq_time_cpu(int cpu) |
@@ -1992,6 +2008,8 @@ static u64 irq_time_cpu(int cpu) | |||
1992 | return 0; | 2008 | return 0; |
1993 | } | 2009 | } |
1994 | 2010 | ||
2011 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | ||
2012 | |||
1995 | #endif | 2013 | #endif |
1996 | 2014 | ||
1997 | #include "sched_idletask.c" | 2015 | #include "sched_idletask.c" |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c358d4081b81..74cccfae87a8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -2275,7 +2275,13 @@ unsigned long scale_rt_power(int cpu) | |||
2275 | u64 total, available; | 2275 | u64 total, available; |
2276 | 2276 | ||
2277 | total = sched_avg_period() + (rq->clock - rq->age_stamp); | 2277 | total = sched_avg_period() + (rq->clock - rq->age_stamp); |
2278 | available = total - rq->rt_avg; | 2278 | |
2279 | if (unlikely(total < rq->rt_avg)) { | ||
2280 | /* Ensures that power won't end up being negative */ | ||
2281 | available = 0; | ||
2282 | } else { | ||
2283 | available = total - rq->rt_avg; | ||
2284 | } | ||
2279 | 2285 | ||
2280 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) | 2286 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) |
2281 | total = SCHED_LOAD_SCALE; | 2287 | total = SCHED_LOAD_SCALE; |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 83c66e8ad3ee..185f920ec1a2 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -61,3 +61,8 @@ SCHED_FEAT(ASYM_EFF_LOAD, 1) | |||
61 | * release the lock. Decreases scheduling overhead. | 61 | * release the lock. Decreases scheduling overhead. |
62 | */ | 62 | */ |
63 | SCHED_FEAT(OWNER_SPIN, 1) | 63 | SCHED_FEAT(OWNER_SPIN, 1) |
64 | |||
65 | /* | ||
66 | * Decrement CPU power based on irq activity | ||
67 | */ | ||
68 | SCHED_FEAT(NONIRQ_POWER, 1) | ||