diff options
author | Venkatesh Pallipadi <venki@google.com> | 2010-10-04 20:03:16 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-10-18 14:52:20 -0400 |
commit | 75e1056f5c57050415b64cb761a3acc35d91f013 (patch) | |
tree | e7cd483e38f9bf9131a3b212301a009890b8de49 | |
parent | 75dd321d79d495a0ee579e6249ebc38ddbb2667f (diff) |
sched: Fix softirq time accounting
Peter Zijlstra found a bug in the way softirq time is accounted in
VIRT_CPU_ACCOUNTING on this thread:
http://lkml.indiana.edu/hypermail//linux/kernel/1009.2/01366.html
The problem is, softirq processing uses local_bh_disable internally. There
is no way, later in the flow, to differentiate between whether softirq is
being processed or is it just that bh has been disabled. So, a hardirq when bh
is disabled results in time being wrongly accounted as softirq.
Looking at the code a bit more, the problem exists in !VIRT_CPU_ACCOUNTING
as well. As account_system_time() in normal tick based accouting also uses
softirq_count, which will be set even when not in softirq with bh disabled.
Peter also suggested solution of using 2*SOFTIRQ_OFFSET as irq count
for local_bh_{disable,enable} and using just SOFTIRQ_OFFSET while softirq
processing. The patch below does that and adds API in_serving_softirq() which
returns whether we are currently processing softirq or not.
Also changes one of the usages of softirq_count in net/sched/cls_cgroup.c
to in_serving_softirq.
Looks like many usages of in_softirq really want in_serving_softirq. Those
changes can be made individually on a case by case basis.
Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-2-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/hardirq.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 6 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/softirq.c | 51 | ||||
-rw-r--r-- | net/sched/cls_cgroup.c | 2 |
5 files changed, 44 insertions, 22 deletions
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..e37a77cbd588 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
@@ -64,6 +64,8 @@ | |||
64 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | 64 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) |
65 | #define NMI_OFFSET (1UL << NMI_SHIFT) | 65 | #define NMI_OFFSET (1UL << NMI_SHIFT) |
66 | 66 | ||
67 | #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | ||
68 | |||
67 | #ifndef PREEMPT_ACTIVE | 69 | #ifndef PREEMPT_ACTIVE |
68 | #define PREEMPT_ACTIVE_BITS 1 | 70 | #define PREEMPT_ACTIVE_BITS 1 |
69 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) | 71 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) |
@@ -82,10 +84,13 @@ | |||
82 | /* | 84 | /* |
83 | * Are we doing bottom half or hardware interrupt processing? | 85 | * Are we doing bottom half or hardware interrupt processing? |
84 | * Are we in a softirq context? Interrupt context? | 86 | * Are we in a softirq context? Interrupt context? |
87 | * in_softirq - Are we currently processing softirq or have bh disabled? | ||
88 | * in_serving_softirq - Are we currently processing softirq? | ||
85 | */ | 89 | */ |
86 | #define in_irq() (hardirq_count()) | 90 | #define in_irq() (hardirq_count()) |
87 | #define in_softirq() (softirq_count()) | 91 | #define in_softirq() (softirq_count()) |
88 | #define in_interrupt() (irq_count()) | 92 | #define in_interrupt() (irq_count()) |
93 | #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | ||
89 | 94 | ||
90 | /* | 95 | /* |
91 | * Are we in NMI context? | 96 | * Are we in NMI context? |
diff --git a/include/linux/sched.h b/include/linux/sched.h index cdf56693ecbf..8744e50cb083 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2366,9 +2366,9 @@ extern int __cond_resched_lock(spinlock_t *lock); | |||
2366 | 2366 | ||
2367 | extern int __cond_resched_softirq(void); | 2367 | extern int __cond_resched_softirq(void); |
2368 | 2368 | ||
2369 | #define cond_resched_softirq() ({ \ | 2369 | #define cond_resched_softirq() ({ \ |
2370 | __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ | 2370 | __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ |
2371 | __cond_resched_softirq(); \ | 2371 | __cond_resched_softirq(); \ |
2372 | }) | 2372 | }) |
2373 | 2373 | ||
2374 | /* | 2374 | /* |
diff --git a/kernel/sched.c b/kernel/sched.c index 771b518e5f1f..089be8adb074 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3422,7 +3422,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3422 | tmp = cputime_to_cputime64(cputime); | 3422 | tmp = cputime_to_cputime64(cputime); |
3423 | if (hardirq_count() - hardirq_offset) | 3423 | if (hardirq_count() - hardirq_offset) |
3424 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3424 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
3425 | else if (softirq_count()) | 3425 | else if (in_serving_softirq()) |
3426 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3426 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
3427 | else | 3427 | else |
3428 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3428 | cpustat->system = cputime64_add(cpustat->system, tmp); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 07b4f1b1a73a..988dfbe6bbe8 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -77,11 +77,21 @@ void wakeup_softirqd(void) | |||
77 | } | 77 | } |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * preempt_count and SOFTIRQ_OFFSET usage: | ||
81 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | ||
82 | * softirq processing. | ||
83 | * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) | ||
84 | * on local_bh_disable or local_bh_enable. | ||
85 | * This lets us distinguish between whether we are currently processing | ||
86 | * softirq and whether we just have bh disabled. | ||
87 | */ | ||
88 | |||
89 | /* | ||
80 | * This one is for softirq.c-internal use, | 90 | * This one is for softirq.c-internal use, |
81 | * where hardirqs are disabled legitimately: | 91 | * where hardirqs are disabled legitimately: |
82 | */ | 92 | */ |
83 | #ifdef CONFIG_TRACE_IRQFLAGS | 93 | #ifdef CONFIG_TRACE_IRQFLAGS |
84 | static void __local_bh_disable(unsigned long ip) | 94 | static void __local_bh_disable(unsigned long ip, unsigned int cnt) |
85 | { | 95 | { |
86 | unsigned long flags; | 96 | unsigned long flags; |
87 | 97 | ||
@@ -95,32 +105,43 @@ static void __local_bh_disable(unsigned long ip) | |||
95 | * We must manually increment preempt_count here and manually | 105 | * We must manually increment preempt_count here and manually |
96 | * call the trace_preempt_off later. | 106 | * call the trace_preempt_off later. |
97 | */ | 107 | */ |
98 | preempt_count() += SOFTIRQ_OFFSET; | 108 | preempt_count() += cnt; |
99 | /* | 109 | /* |
100 | * Were softirqs turned off above: | 110 | * Were softirqs turned off above: |
101 | */ | 111 | */ |
102 | if (softirq_count() == SOFTIRQ_OFFSET) | 112 | if (softirq_count() == cnt) |
103 | trace_softirqs_off(ip); | 113 | trace_softirqs_off(ip); |
104 | raw_local_irq_restore(flags); | 114 | raw_local_irq_restore(flags); |
105 | 115 | ||
106 | if (preempt_count() == SOFTIRQ_OFFSET) | 116 | if (preempt_count() == cnt) |
107 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | 117 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); |
108 | } | 118 | } |
109 | #else /* !CONFIG_TRACE_IRQFLAGS */ | 119 | #else /* !CONFIG_TRACE_IRQFLAGS */ |
110 | static inline void __local_bh_disable(unsigned long ip) | 120 | static inline void __local_bh_disable(unsigned long ip, unsigned int cnt) |
111 | { | 121 | { |
112 | add_preempt_count(SOFTIRQ_OFFSET); | 122 | add_preempt_count(cnt); |
113 | barrier(); | 123 | barrier(); |
114 | } | 124 | } |
115 | #endif /* CONFIG_TRACE_IRQFLAGS */ | 125 | #endif /* CONFIG_TRACE_IRQFLAGS */ |
116 | 126 | ||
117 | void local_bh_disable(void) | 127 | void local_bh_disable(void) |
118 | { | 128 | { |
119 | __local_bh_disable((unsigned long)__builtin_return_address(0)); | 129 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
130 | SOFTIRQ_DISABLE_OFFSET); | ||
120 | } | 131 | } |
121 | 132 | ||
122 | EXPORT_SYMBOL(local_bh_disable); | 133 | EXPORT_SYMBOL(local_bh_disable); |
123 | 134 | ||
135 | static void __local_bh_enable(unsigned int cnt) | ||
136 | { | ||
137 | WARN_ON_ONCE(in_irq()); | ||
138 | WARN_ON_ONCE(!irqs_disabled()); | ||
139 | |||
140 | if (softirq_count() == cnt) | ||
141 | trace_softirqs_on((unsigned long)__builtin_return_address(0)); | ||
142 | sub_preempt_count(cnt); | ||
143 | } | ||
144 | |||
124 | /* | 145 | /* |
125 | * Special-case - softirqs can safely be enabled in | 146 | * Special-case - softirqs can safely be enabled in |
126 | * cond_resched_softirq(), or by __do_softirq(), | 147 | * cond_resched_softirq(), or by __do_softirq(), |
@@ -128,12 +149,7 @@ EXPORT_SYMBOL(local_bh_disable); | |||
128 | */ | 149 | */ |
129 | void _local_bh_enable(void) | 150 | void _local_bh_enable(void) |
130 | { | 151 | { |
131 | WARN_ON_ONCE(in_irq()); | 152 | __local_bh_enable(SOFTIRQ_DISABLE_OFFSET); |
132 | WARN_ON_ONCE(!irqs_disabled()); | ||
133 | |||
134 | if (softirq_count() == SOFTIRQ_OFFSET) | ||
135 | trace_softirqs_on((unsigned long)__builtin_return_address(0)); | ||
136 | sub_preempt_count(SOFTIRQ_OFFSET); | ||
137 | } | 153 | } |
138 | 154 | ||
139 | EXPORT_SYMBOL(_local_bh_enable); | 155 | EXPORT_SYMBOL(_local_bh_enable); |
@@ -147,13 +163,13 @@ static inline void _local_bh_enable_ip(unsigned long ip) | |||
147 | /* | 163 | /* |
148 | * Are softirqs going to be turned on now: | 164 | * Are softirqs going to be turned on now: |
149 | */ | 165 | */ |
150 | if (softirq_count() == SOFTIRQ_OFFSET) | 166 | if (softirq_count() == SOFTIRQ_DISABLE_OFFSET) |
151 | trace_softirqs_on(ip); | 167 | trace_softirqs_on(ip); |
152 | /* | 168 | /* |
153 | * Keep preemption disabled until we are done with | 169 | * Keep preemption disabled until we are done with |
154 | * softirq processing: | 170 | * softirq processing: |
155 | */ | 171 | */ |
156 | sub_preempt_count(SOFTIRQ_OFFSET - 1); | 172 | sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1); |
157 | 173 | ||
158 | if (unlikely(!in_interrupt() && local_softirq_pending())) | 174 | if (unlikely(!in_interrupt() && local_softirq_pending())) |
159 | do_softirq(); | 175 | do_softirq(); |
@@ -198,7 +214,8 @@ asmlinkage void __do_softirq(void) | |||
198 | pending = local_softirq_pending(); | 214 | pending = local_softirq_pending(); |
199 | account_system_vtime(current); | 215 | account_system_vtime(current); |
200 | 216 | ||
201 | __local_bh_disable((unsigned long)__builtin_return_address(0)); | 217 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
218 | SOFTIRQ_OFFSET); | ||
202 | lockdep_softirq_enter(); | 219 | lockdep_softirq_enter(); |
203 | 220 | ||
204 | cpu = smp_processor_id(); | 221 | cpu = smp_processor_id(); |
@@ -245,7 +262,7 @@ restart: | |||
245 | lockdep_softirq_exit(); | 262 | lockdep_softirq_exit(); |
246 | 263 | ||
247 | account_system_vtime(current); | 264 | account_system_vtime(current); |
248 | _local_bh_enable(); | 265 | __local_bh_enable(SOFTIRQ_OFFSET); |
249 | } | 266 | } |
250 | 267 | ||
251 | #ifndef __ARCH_HAS_DO_SOFTIRQ | 268 | #ifndef __ARCH_HAS_DO_SOFTIRQ |
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 78ef2c5e130b..37dff78e9cb1 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c | |||
@@ -123,7 +123,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
123 | * calls by looking at the number of nested bh disable calls because | 123 | * calls by looking at the number of nested bh disable calls because |
124 | * softirqs always disables bh. | 124 | * softirqs always disables bh. |
125 | */ | 125 | */ |
126 | if (softirq_count() != SOFTIRQ_OFFSET) { | 126 | if (in_serving_softirq()) { |
127 | /* If there is an sk_classid we'll use that. */ | 127 | /* If there is an sk_classid we'll use that. */ |
128 | if (!skb->sk) | 128 | if (!skb->sk) |
129 | return -1; | 129 | return -1; |