aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2017-04-25 10:10:48 -0400
committerIngo Molnar <mingo@kernel.org>2017-04-27 03:08:26 -0400
commit25e2d8c1b9e327ed260edd13169cc22bc7a78bc6 (patch)
tree5b3188a9ed2b656c729820daab75993bb4d73363
parentea839b41744dffe5c77b8d9842c9bb7073460901 (diff)
sched/cputime: Fix ksoftirqd cputime accounting regression
irq_time_read() returns the irqtime minus the ksoftirqd time. This is necessary because irq_time_read() is used to substract the IRQ time from the sum_exec_runtime of a task. If we were to include the softirq time of ksoftirqd, this task would substract its own CPU time everytime it updates ksoftirqd->sum_exec_runtime which would therefore never progress. But this behaviour got broken by: a499a5a14db ("sched/cputime: Increment kcpustat directly on irqtime account") ... which now includes ksoftirqd softirq time in the time returned by irq_time_read(). This has resulted in wrong ksoftirqd cputime reported to userspace through /proc/stat and thus "top" not showing ksoftirqd when it should after intense networking load. ksoftirqd->stime happens to be correct but it gets scaled down by sum_exec_runtime through task_cputime_adjusted(). To fix this, just account the strict IRQ time in a separate counter and use it to report the IRQ time. Reported-and-tested-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stanislaw Gruszka <sgruszka@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wanpeng Li <wanpeng.li@hotmail.com> Link: http://lkml.kernel.org/r/1493129448-5356-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/cputime.c27
-rw-r--r--kernel/sched/sched.h9
2 files changed, 23 insertions, 13 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f3778e2b46c8..aea3135c5d90 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void)
34 sched_clock_irqtime = 0; 34 sched_clock_irqtime = 0;
35} 35}
36 36
37static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
38 enum cpu_usage_stat idx)
39{
40 u64 *cpustat = kcpustat_this_cpu->cpustat;
41
42 u64_stats_update_begin(&irqtime->sync);
43 cpustat[idx] += delta;
44 irqtime->total += delta;
45 irqtime->tick_delta += delta;
46 u64_stats_update_end(&irqtime->sync);
47}
48
37/* 49/*
38 * Called before incrementing preempt_count on {soft,}irq_enter 50 * Called before incrementing preempt_count on {soft,}irq_enter
39 * and before decrementing preempt_count on {soft,}irq_exit. 51 * and before decrementing preempt_count on {soft,}irq_exit.
@@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void)
41void irqtime_account_irq(struct task_struct *curr) 53void irqtime_account_irq(struct task_struct *curr)
42{ 54{
43 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); 55 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
44 u64 *cpustat = kcpustat_this_cpu->cpustat;
45 s64 delta; 56 s64 delta;
46 int cpu; 57 int cpu;
47 58
@@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr)
52 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; 63 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
53 irqtime->irq_start_time += delta; 64 irqtime->irq_start_time += delta;
54 65
55 u64_stats_update_begin(&irqtime->sync);
56 /* 66 /*
57 * We do not account for softirq time from ksoftirqd here. 67 * We do not account for softirq time from ksoftirqd here.
58 * We want to continue accounting softirq time to ksoftirqd thread 68 * We want to continue accounting softirq time to ksoftirqd thread
59 * in that case, so as not to confuse scheduler with a special task 69 * in that case, so as not to confuse scheduler with a special task
60 * that do not consume any time, but still wants to run. 70 * that do not consume any time, but still wants to run.
61 */ 71 */
62 if (hardirq_count()) { 72 if (hardirq_count())
63 cpustat[CPUTIME_IRQ] += delta; 73 irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
64 irqtime->tick_delta += delta; 74 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
65 } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) { 75 irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
66 cpustat[CPUTIME_SOFTIRQ] += delta;
67 irqtime->tick_delta += delta;
68 }
69
70 u64_stats_update_end(&irqtime->sync);
71} 76}
72EXPORT_SYMBOL_GPL(irqtime_account_irq); 77EXPORT_SYMBOL_GPL(irqtime_account_irq);
73 78
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5cbf92214ad8..767aab3505a8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1869,6 +1869,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
1869 1869
1870#ifdef CONFIG_IRQ_TIME_ACCOUNTING 1870#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1871struct irqtime { 1871struct irqtime {
1872 u64 total;
1872 u64 tick_delta; 1873 u64 tick_delta;
1873 u64 irq_start_time; 1874 u64 irq_start_time;
1874 struct u64_stats_sync sync; 1875 struct u64_stats_sync sync;
@@ -1876,16 +1877,20 @@ struct irqtime {
1876 1877
1877DECLARE_PER_CPU(struct irqtime, cpu_irqtime); 1878DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
1878 1879
1880/*
1881 * Returns the irqtime minus the softirq time computed by ksoftirqd.
1882 * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
1883 * and never move forward.
1884 */
1879static inline u64 irq_time_read(int cpu) 1885static inline u64 irq_time_read(int cpu)
1880{ 1886{
1881 struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); 1887 struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
1882 u64 *cpustat = kcpustat_cpu(cpu).cpustat;
1883 unsigned int seq; 1888 unsigned int seq;
1884 u64 total; 1889 u64 total;
1885 1890
1886 do { 1891 do {
1887 seq = __u64_stats_fetch_begin(&irqtime->sync); 1892 seq = __u64_stats_fetch_begin(&irqtime->sync);
1888 total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ]; 1893 total = irqtime->total;
1889 } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); 1894 } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
1890 1895
1891 return total; 1896 return total;