diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-12-09 08:15:34 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-12-16 05:17:47 -0500 |
commit | 8e92c20183ed0579d94501311b81c42b65cb2129 (patch) | |
tree | 3a00ccaf8222c994fd7071129dc357321c10c9df | |
parent | fe44d62122829959e960bc699318d58966922a69 (diff) |
sched: Fix the irqtime code for 32bit
Since the irqtime accounting is using non-atomic u64 and can be read
from remote cpus (writes are strictly cpu local, reads are not) we
have to deal with observing partial updates.
When we do observe partial updates the clock movement (in particular,
->clock_task movement) will go funny (in either direction), a
subsequent clock update (observing the full update) will make it go
funny in the oposite direction.
Since we rely on these clocks to be strictly monotonic we cannot
suffer backwards motion. One possible solution would be to simply
ignore all backwards deltas, but that will lead to accounting
artefacts, most notable: clock_task + irq_time != clock, this
inaccuracy would end up in user visible stats.
Therefore serialize the reads using a seqcount.
Reviewed-by: Venkatesh Pallipadi <venki@google.com>
Reported-by: Mikael Pettersson <mikpe@it.uu.se>
Tested-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1292242434.6803.200.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 51 |
1 files changed, 45 insertions, 6 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 79b557c63381..456c99054160 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1920,10 +1920,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1920 | * They are read and saved off onto struct rq in update_rq_clock(). | 1920 | * They are read and saved off onto struct rq in update_rq_clock(). |
1921 | * This may result in other CPU reading this CPU's irq time and can | 1921 | * This may result in other CPU reading this CPU's irq time and can |
1922 | * race with irq/account_system_vtime on this CPU. We would either get old | 1922 | * race with irq/account_system_vtime on this CPU. We would either get old |
1923 | * or new value (or semi updated value on 32 bit) with a side effect of | 1923 | * or new value with a side effect of accounting a slice of irq time to wrong |
1924 | * accounting a slice of irq time to wrong task when irq is in progress | 1924 | * task when irq is in progress while we read rq->clock. That is a worthy |
1925 | * while we read rq->clock. That is a worthy compromise in place of having | 1925 | * compromise in place of having locks on each irq in account_system_time. |
1926 | * locks on each irq in account_system_time. | ||
1927 | */ | 1926 | */ |
1928 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | 1927 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); |
1929 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | 1928 | static DEFINE_PER_CPU(u64, cpu_softirq_time); |
@@ -1941,10 +1940,48 @@ void disable_sched_clock_irqtime(void) | |||
1941 | sched_clock_irqtime = 0; | 1940 | sched_clock_irqtime = 0; |
1942 | } | 1941 | } |
1943 | 1942 | ||
1944 | static inline u64 irq_time_cpu(int cpu) | 1943 | #ifndef CONFIG_64BIT |
1944 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
1945 | |||
1946 | static inline void irq_time_write_begin(void) | ||
1947 | { | ||
1948 | __this_cpu_inc(irq_time_seq.sequence); | ||
1949 | smp_wmb(); | ||
1950 | } | ||
1951 | |||
1952 | static inline void irq_time_write_end(void) | ||
1953 | { | ||
1954 | smp_wmb(); | ||
1955 | __this_cpu_inc(irq_time_seq.sequence); | ||
1956 | } | ||
1957 | |||
1958 | static inline u64 irq_time_read(int cpu) | ||
1959 | { | ||
1960 | u64 irq_time; | ||
1961 | unsigned seq; | ||
1962 | |||
1963 | do { | ||
1964 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
1965 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
1966 | per_cpu(cpu_hardirq_time, cpu); | ||
1967 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
1968 | |||
1969 | return irq_time; | ||
1970 | } | ||
1971 | #else /* CONFIG_64BIT */ | ||
1972 | static inline void irq_time_write_begin(void) | ||
1973 | { | ||
1974 | } | ||
1975 | |||
1976 | static inline void irq_time_write_end(void) | ||
1977 | { | ||
1978 | } | ||
1979 | |||
1980 | static inline u64 irq_time_read(int cpu) | ||
1945 | { | 1981 | { |
1946 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | 1982 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); |
1947 | } | 1983 | } |
1984 | #endif /* CONFIG_64BIT */ | ||
1948 | 1985 | ||
1949 | /* | 1986 | /* |
1950 | * Called before incrementing preempt_count on {soft,}irq_enter | 1987 | * Called before incrementing preempt_count on {soft,}irq_enter |
@@ -1965,6 +2002,7 @@ void account_system_vtime(struct task_struct *curr) | |||
1965 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | 2002 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); |
1966 | __this_cpu_add(irq_start_time, delta); | 2003 | __this_cpu_add(irq_start_time, delta); |
1967 | 2004 | ||
2005 | irq_time_write_begin(); | ||
1968 | /* | 2006 | /* |
1969 | * We do not account for softirq time from ksoftirqd here. | 2007 | * We do not account for softirq time from ksoftirqd here. |
1970 | * We want to continue accounting softirq time to ksoftirqd thread | 2008 | * We want to continue accounting softirq time to ksoftirqd thread |
@@ -1976,6 +2014,7 @@ void account_system_vtime(struct task_struct *curr) | |||
1976 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 2014 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) |
1977 | __this_cpu_add(cpu_softirq_time, delta); | 2015 | __this_cpu_add(cpu_softirq_time, delta); |
1978 | 2016 | ||
2017 | irq_time_write_end(); | ||
1979 | local_irq_restore(flags); | 2018 | local_irq_restore(flags); |
1980 | } | 2019 | } |
1981 | EXPORT_SYMBOL_GPL(account_system_vtime); | 2020 | EXPORT_SYMBOL_GPL(account_system_vtime); |
@@ -1984,7 +2023,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
1984 | { | 2023 | { |
1985 | s64 irq_delta; | 2024 | s64 irq_delta; |
1986 | 2025 | ||
1987 | irq_delta = irq_time_cpu(cpu_of(rq)) - rq->prev_irq_time; | 2026 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; |
1988 | 2027 | ||
1989 | /* | 2028 | /* |
1990 | * Since irq_time is only updated on {soft,}irq_exit, we might run into | 2029 | * Since irq_time is only updated on {soft,}irq_exit, we might run into |