diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2017-04-25 10:10:48 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-04-27 03:08:26 -0400 |
commit | 25e2d8c1b9e327ed260edd13169cc22bc7a78bc6 (patch) | |
tree | 5b3188a9ed2b656c729820daab75993bb4d73363 | |
parent | ea839b41744dffe5c77b8d9842c9bb7073460901 (diff) |
sched/cputime: Fix ksoftirqd cputime accounting regression
irq_time_read() returns the irqtime minus the ksoftirqd time. This
is necessary because irq_time_read() is used to substract the IRQ time
from the sum_exec_runtime of a task. If we were to include the softirq
time of ksoftirqd, this task would substract its own CPU time everytime
it updates ksoftirqd->sum_exec_runtime which would therefore never
progress.
But this behaviour got broken by:
a499a5a14db ("sched/cputime: Increment kcpustat directly on irqtime account")
... which now includes ksoftirqd softirq time in the time returned by
irq_time_read().
This has resulted in wrong ksoftirqd cputime reported to userspace
through /proc/stat and thus "top" not showing ksoftirqd when it should
after intense networking load.
ksoftirqd->stime happens to be correct but it gets scaled down by
sum_exec_runtime through task_cputime_adjusted().
To fix this, just account the strict IRQ time in a separate counter and
use it to report the IRQ time.
Reported-and-tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1493129448-5356-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/cputime.c | 27 | ||||
-rw-r--r-- | kernel/sched/sched.h | 9 |
2 files changed, 23 insertions, 13 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index f3778e2b46c8..aea3135c5d90 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void) | |||
34 | sched_clock_irqtime = 0; | 34 | sched_clock_irqtime = 0; |
35 | } | 35 | } |
36 | 36 | ||
37 | static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, | ||
38 | enum cpu_usage_stat idx) | ||
39 | { | ||
40 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
41 | |||
42 | u64_stats_update_begin(&irqtime->sync); | ||
43 | cpustat[idx] += delta; | ||
44 | irqtime->total += delta; | ||
45 | irqtime->tick_delta += delta; | ||
46 | u64_stats_update_end(&irqtime->sync); | ||
47 | } | ||
48 | |||
37 | /* | 49 | /* |
38 | * Called before incrementing preempt_count on {soft,}irq_enter | 50 | * Called before incrementing preempt_count on {soft,}irq_enter |
39 | * and before decrementing preempt_count on {soft,}irq_exit. | 51 | * and before decrementing preempt_count on {soft,}irq_exit. |
@@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void) | |||
41 | void irqtime_account_irq(struct task_struct *curr) | 53 | void irqtime_account_irq(struct task_struct *curr) |
42 | { | 54 | { |
43 | struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); | 55 | struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
44 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
45 | s64 delta; | 56 | s64 delta; |
46 | int cpu; | 57 | int cpu; |
47 | 58 | ||
@@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr) | |||
52 | delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; | 63 | delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; |
53 | irqtime->irq_start_time += delta; | 64 | irqtime->irq_start_time += delta; |
54 | 65 | ||
55 | u64_stats_update_begin(&irqtime->sync); | ||
56 | /* | 66 | /* |
57 | * We do not account for softirq time from ksoftirqd here. | 67 | * We do not account for softirq time from ksoftirqd here. |
58 | * We want to continue accounting softirq time to ksoftirqd thread | 68 | * We want to continue accounting softirq time to ksoftirqd thread |
59 | * in that case, so as not to confuse scheduler with a special task | 69 | * in that case, so as not to confuse scheduler with a special task |
60 | * that do not consume any time, but still wants to run. | 70 | * that do not consume any time, but still wants to run. |
61 | */ | 71 | */ |
62 | if (hardirq_count()) { | 72 | if (hardirq_count()) |
63 | cpustat[CPUTIME_IRQ] += delta; | 73 | irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); |
64 | irqtime->tick_delta += delta; | 74 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
65 | } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) { | 75 | irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); |
66 | cpustat[CPUTIME_SOFTIRQ] += delta; | ||
67 | irqtime->tick_delta += delta; | ||
68 | } | ||
69 | |||
70 | u64_stats_update_end(&irqtime->sync); | ||
71 | } | 76 | } |
72 | EXPORT_SYMBOL_GPL(irqtime_account_irq); | 77 | EXPORT_SYMBOL_GPL(irqtime_account_irq); |
73 | 78 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 5cbf92214ad8..767aab3505a8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1869,6 +1869,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { } | |||
1869 | 1869 | ||
1870 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 1870 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
1871 | struct irqtime { | 1871 | struct irqtime { |
1872 | u64 total; | ||
1872 | u64 tick_delta; | 1873 | u64 tick_delta; |
1873 | u64 irq_start_time; | 1874 | u64 irq_start_time; |
1874 | struct u64_stats_sync sync; | 1875 | struct u64_stats_sync sync; |
@@ -1876,16 +1877,20 @@ struct irqtime { | |||
1876 | 1877 | ||
1877 | DECLARE_PER_CPU(struct irqtime, cpu_irqtime); | 1878 | DECLARE_PER_CPU(struct irqtime, cpu_irqtime); |
1878 | 1879 | ||
1880 | /* | ||
1881 | * Returns the irqtime minus the softirq time computed by ksoftirqd. | ||
1882 | * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime | ||
1883 | * and never move forward. | ||
1884 | */ | ||
1879 | static inline u64 irq_time_read(int cpu) | 1885 | static inline u64 irq_time_read(int cpu) |
1880 | { | 1886 | { |
1881 | struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); | 1887 | struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); |
1882 | u64 *cpustat = kcpustat_cpu(cpu).cpustat; | ||
1883 | unsigned int seq; | 1888 | unsigned int seq; |
1884 | u64 total; | 1889 | u64 total; |
1885 | 1890 | ||
1886 | do { | 1891 | do { |
1887 | seq = __u64_stats_fetch_begin(&irqtime->sync); | 1892 | seq = __u64_stats_fetch_begin(&irqtime->sync); |
1888 | total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ]; | 1893 | total = irqtime->total; |
1889 | } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); | 1894 | } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); |
1890 | 1895 | ||
1891 | return total; | 1896 | return total; |