aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorGlauber Costa <glommer@redhat.com>2011-07-11 15:28:18 -0400
committerAvi Kivity <avi@redhat.com>2011-07-14 05:59:47 -0400
commit095c0aa83e52d6c3dd7168610746703921f570af (patch)
tree7461604a6040f1724bdf66c6f87e3abe60e2ad50 /kernel
parente6e6685accfa81f509fadfc9624bc7c3862d75c4 (diff)
sched: adjust scheduler cpu power for stolen time
This patch makes update_rq_clock() aware of steal time. The mechanism of operation is not different from irq_time, and follows the same principles. This lives in a CONFIG option itself, and can be compiled out independently of the rest of steal time reporting. The effect of disabling it is that the scheduler will still report steal time (that cannot be disabled), but won't use this information for cpu power adjustments. Everytime update_rq_clock_task() is invoked, we query information about how much time was stolen since last call, and feed it into sched_rt_avg_update(). Although steal time reporting in account_process_tick() keeps track of the last time we read the steal clock, in prev_steal_time, this patch do it independently using another field, prev_steal_time_rq. This is because otherwise, information about time accounted in update_process_tick() would never reach us in update_rq_clock(). Signed-off-by: Glauber Costa <glommer@redhat.com> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Tested-by: Eric B Munson <emunson@mgebm.net> CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> CC: Anthony Liguori <aliguori@us.ibm.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c47
-rw-r--r--kernel/sched_features.h4
2 files changed, 39 insertions, 12 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index f98a28b19b2a..b35ac50b26c8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -534,6 +534,9 @@ struct rq {
534#ifdef CONFIG_PARAVIRT 534#ifdef CONFIG_PARAVIRT
535 u64 prev_steal_time; 535 u64 prev_steal_time;
536#endif 536#endif
537#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
538 u64 prev_steal_time_rq;
539#endif
537 540
538 /* calc_load related fields */ 541 /* calc_load related fields */
539 unsigned long calc_load_update; 542 unsigned long calc_load_update;
@@ -1973,8 +1976,14 @@ static inline u64 steal_ticks(u64 steal)
1973 1976
1974static void update_rq_clock_task(struct rq *rq, s64 delta) 1977static void update_rq_clock_task(struct rq *rq, s64 delta)
1975{ 1978{
1976 s64 irq_delta; 1979/*
1977 1980 * In theory, the compile should just see 0 here, and optimize out the call
1981 * to sched_rt_avg_update. But I don't trust it...
1982 */
1983#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
1984 s64 steal = 0, irq_delta = 0;
1985#endif
1986#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1978 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; 1987 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
1979 1988
1980 /* 1989 /*
@@ -1997,12 +2006,35 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
1997 2006
1998 rq->prev_irq_time += irq_delta; 2007 rq->prev_irq_time += irq_delta;
1999 delta -= irq_delta; 2008 delta -= irq_delta;
2009#endif
2010#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
2011 if (static_branch((&paravirt_steal_rq_enabled))) {
2012 u64 st;
2013
2014 steal = paravirt_steal_clock(cpu_of(rq));
2015 steal -= rq->prev_steal_time_rq;
2016
2017 if (unlikely(steal > delta))
2018 steal = delta;
2019
2020 st = steal_ticks(steal);
2021 steal = st * TICK_NSEC;
2022
2023 rq->prev_steal_time_rq += steal;
2024
2025 delta -= steal;
2026 }
2027#endif
2028
2000 rq->clock_task += delta; 2029 rq->clock_task += delta;
2001 2030
2002 if (irq_delta && sched_feat(NONIRQ_POWER)) 2031#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
2003 sched_rt_avg_update(rq, irq_delta); 2032 if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
2033 sched_rt_avg_update(rq, irq_delta + steal);
2034#endif
2004} 2035}
2005 2036
2037#ifdef CONFIG_IRQ_TIME_ACCOUNTING
2006static int irqtime_account_hi_update(void) 2038static int irqtime_account_hi_update(void)
2007{ 2039{
2008 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2040 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
@@ -2037,12 +2069,7 @@ static int irqtime_account_si_update(void)
2037 2069
2038#define sched_clock_irqtime (0) 2070#define sched_clock_irqtime (0)
2039 2071
2040static void update_rq_clock_task(struct rq *rq, s64 delta) 2072#endif
2041{
2042 rq->clock_task += delta;
2043}
2044
2045#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
2046 2073
2047#include "sched_idletask.c" 2074#include "sched_idletask.c"
2048#include "sched_fair.c" 2075#include "sched_fair.c"
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index be40f7371ee1..ca3b025f8669 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -61,9 +61,9 @@ SCHED_FEAT(LB_BIAS, 1)
61SCHED_FEAT(OWNER_SPIN, 1) 61SCHED_FEAT(OWNER_SPIN, 1)
62 62
63/* 63/*
64 * Decrement CPU power based on irq activity 64 * Decrement CPU power based on time not spent running tasks
65 */ 65 */
66SCHED_FEAT(NONIRQ_POWER, 1) 66SCHED_FEAT(NONTASK_POWER, 1)
67 67
68/* 68/*
69 * Queue remote wakeups on the target CPU and process them 69 * Queue remote wakeups on the target CPU and process them