diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-12-09 08:15:34 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-12-16 05:17:46 -0500 |
commit | fe44d62122829959e960bc699318d58966922a69 (patch) | |
tree | bacb96098cffa7b7b8d80506a2026b5125557f2a /kernel | |
parent | dbd87b5af055a0cc9bba17795c9a2b0d17795389 (diff) |
sched: Fix the irqtime code to deal with u64 wraps
Some ARM systems have a short sched_clock() [ which needs to be fixed
too ], but this exposed a bug in the irq_time code as well, it doesn't
deal with wraps at all.
Fix the irq_time code to deal with u64 wraps by re-writing the code to
only use delta increments, which avoids the whole issue.
Reviewed-by: Venkatesh Pallipadi <venki@google.com>
Reported-by: Mikael Pettersson <mikpe@it.uu.se>
Tested-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1292242433.6803.199.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 83 |
1 files changed, 50 insertions, 33 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index da14302a9857..79b557c63381 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -636,23 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
636 | 636 | ||
637 | #endif /* CONFIG_CGROUP_SCHED */ | 637 | #endif /* CONFIG_CGROUP_SCHED */ |
638 | 638 | ||
639 | static u64 irq_time_cpu(int cpu); | 639 | static void update_rq_clock_task(struct rq *rq, s64 delta); |
640 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
641 | 640 | ||
642 | inline void update_rq_clock(struct rq *rq) | 641 | static void update_rq_clock(struct rq *rq) |
643 | { | 642 | { |
644 | int cpu = cpu_of(rq); | 643 | s64 delta; |
645 | u64 irq_time; | ||
646 | 644 | ||
647 | if (rq->skip_clock_update) | 645 | if (rq->skip_clock_update) |
648 | return; | 646 | return; |
649 | 647 | ||
650 | rq->clock = sched_clock_cpu(cpu); | 648 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
651 | irq_time = irq_time_cpu(cpu); | 649 | rq->clock += delta; |
652 | if (rq->clock - irq_time > rq->clock_task) | 650 | update_rq_clock_task(rq, delta); |
653 | rq->clock_task = rq->clock - irq_time; | ||
654 | |||
655 | sched_irq_time_avg_update(rq, irq_time); | ||
656 | } | 651 | } |
657 | 652 | ||
658 | /* | 653 | /* |
@@ -1946,19 +1941,20 @@ void disable_sched_clock_irqtime(void) | |||
1946 | sched_clock_irqtime = 0; | 1941 | sched_clock_irqtime = 0; |
1947 | } | 1942 | } |
1948 | 1943 | ||
1949 | static u64 irq_time_cpu(int cpu) | 1944 | static inline u64 irq_time_cpu(int cpu) |
1950 | { | 1945 | { |
1951 | if (!sched_clock_irqtime) | ||
1952 | return 0; | ||
1953 | |||
1954 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | 1946 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); |
1955 | } | 1947 | } |
1956 | 1948 | ||
1949 | /* | ||
1950 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
1951 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
1952 | */ | ||
1957 | void account_system_vtime(struct task_struct *curr) | 1953 | void account_system_vtime(struct task_struct *curr) |
1958 | { | 1954 | { |
1959 | unsigned long flags; | 1955 | unsigned long flags; |
1956 | s64 delta; | ||
1960 | int cpu; | 1957 | int cpu; |
1961 | u64 now, delta; | ||
1962 | 1958 | ||
1963 | if (!sched_clock_irqtime) | 1959 | if (!sched_clock_irqtime) |
1964 | return; | 1960 | return; |
@@ -1966,9 +1962,9 @@ void account_system_vtime(struct task_struct *curr) | |||
1966 | local_irq_save(flags); | 1962 | local_irq_save(flags); |
1967 | 1963 | ||
1968 | cpu = smp_processor_id(); | 1964 | cpu = smp_processor_id(); |
1969 | now = sched_clock_cpu(cpu); | 1965 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); |
1970 | delta = now - per_cpu(irq_start_time, cpu); | 1966 | __this_cpu_add(irq_start_time, delta); |
1971 | per_cpu(irq_start_time, cpu) = now; | 1967 | |
1972 | /* | 1968 | /* |
1973 | * We do not account for softirq time from ksoftirqd here. | 1969 | * We do not account for softirq time from ksoftirqd here. |
1974 | * We want to continue accounting softirq time to ksoftirqd thread | 1970 | * We want to continue accounting softirq time to ksoftirqd thread |
@@ -1976,33 +1972,54 @@ void account_system_vtime(struct task_struct *curr) | |||
1976 | * that do not consume any time, but still wants to run. | 1972 | * that do not consume any time, but still wants to run. |
1977 | */ | 1973 | */ |
1978 | if (hardirq_count()) | 1974 | if (hardirq_count()) |
1979 | per_cpu(cpu_hardirq_time, cpu) += delta; | 1975 | __this_cpu_add(cpu_hardirq_time, delta); |
1980 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 1976 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) |
1981 | per_cpu(cpu_softirq_time, cpu) += delta; | 1977 | __this_cpu_add(cpu_softirq_time, delta); |
1982 | 1978 | ||
1983 | local_irq_restore(flags); | 1979 | local_irq_restore(flags); |
1984 | } | 1980 | } |
1985 | EXPORT_SYMBOL_GPL(account_system_vtime); | 1981 | EXPORT_SYMBOL_GPL(account_system_vtime); |
1986 | 1982 | ||
1987 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | 1983 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
1988 | { | 1984 | { |
1989 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | 1985 | s64 irq_delta; |
1990 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | 1986 | |
1991 | rq->prev_irq_time = curr_irq_time; | 1987 | irq_delta = irq_time_cpu(cpu_of(rq)) - rq->prev_irq_time; |
1992 | sched_rt_avg_update(rq, delta_irq); | 1988 | |
1993 | } | 1989 | /* |
1990 | * Since irq_time is only updated on {soft,}irq_exit, we might run into | ||
1991 | * this case when a previous update_rq_clock() happened inside a | ||
1992 | * {soft,}irq region. | ||
1993 | * | ||
1994 | * When this happens, we stop ->clock_task and only update the | ||
1995 | * prev_irq_time stamp to account for the part that fit, so that a next | ||
1996 | * update will consume the rest. This ensures ->clock_task is | ||
1997 | * monotonic. | ||
1998 | * | ||
1999 | * It does however cause some slight miss-attribution of {soft,}irq | ||
2000 | * time, a more accurate solution would be to update the irq_time using | ||
2001 | * the current rq->clock timestamp, except that would require using | ||
2002 | * atomic ops. | ||
2003 | */ | ||
2004 | if (irq_delta > delta) | ||
2005 | irq_delta = delta; | ||
2006 | |||
2007 | rq->prev_irq_time += irq_delta; | ||
2008 | delta -= irq_delta; | ||
2009 | rq->clock_task += delta; | ||
2010 | |||
2011 | if (irq_delta && sched_feat(NONIRQ_POWER)) | ||
2012 | sched_rt_avg_update(rq, irq_delta); | ||
1994 | } | 2013 | } |
1995 | 2014 | ||
1996 | #else | 2015 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
1997 | 2016 | ||
1998 | static u64 irq_time_cpu(int cpu) | 2017 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
1999 | { | 2018 | { |
2000 | return 0; | 2019 | rq->clock_task += delta; |
2001 | } | 2020 | } |
2002 | 2021 | ||
2003 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | 2022 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
2004 | |||
2005 | #endif | ||
2006 | 2023 | ||
2007 | #include "sched_idletask.c" | 2024 | #include "sched_idletask.c" |
2008 | #include "sched_fair.c" | 2025 | #include "sched_fair.c" |