diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2010-04-05 18:10:17 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-04-05 18:15:37 -0400 |
commit | bd6d29c25bb1a24a4c160ec5de43e0004e01f72b (patch) | |
tree | 0aa96c7e9fdfbe7dc9c7e40151aed928903240f0 /kernel | |
parent | ced918eb748ce30b3aace549fd17540e40ffdca0 (diff) |
lockstat: Make lockstat counting per cpu
Locking statistics are implemented using global atomic
variables. This is usually fine unless some path write them very
often.
This is the case for the function and function graph tracers
that disable irqs for each entry saved (except if the function
tracer is in preempt disabled only mode).
And calls to local_irq_save/restore() increment
hardirqs_on_events and hardirqs_off_events stats (or similar
stats for redundant versions).
Incrementing these global vars for each function ends up in too
much cache bouncing if lockstats are enabled.
To solve this, implement the debug_atomic_*() operations using
per cpu vars.
-v2: Use per_cpu() instead of get_cpu_var() to fetch the desired
cpu vars on debug_atomic_read()
-v3: Store the stats in a structure. No need for local_t as we
are NMI/irq safe.
-v4: Fix tons of build errors. I thought I had tested it but I
probably forgot to select the relevant config.
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1270505417-8144-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/lockdep.c | 47 | ||||
-rw-r--r-- | kernel/lockdep_internals.h | 74 | ||||
-rw-r--r-- | kernel/lockdep_proc.c | 58 |
3 files changed, 99 insertions, 80 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 0c30d0455de1..069af0276bf7 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -430,20 +430,7 @@ static struct stack_trace lockdep_init_trace = { | |||
430 | /* | 430 | /* |
431 | * Various lockdep statistics: | 431 | * Various lockdep statistics: |
432 | */ | 432 | */ |
433 | atomic_t chain_lookup_hits; | 433 | DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); |
434 | atomic_t chain_lookup_misses; | ||
435 | atomic_t hardirqs_on_events; | ||
436 | atomic_t hardirqs_off_events; | ||
437 | atomic_t redundant_hardirqs_on; | ||
438 | atomic_t redundant_hardirqs_off; | ||
439 | atomic_t softirqs_on_events; | ||
440 | atomic_t softirqs_off_events; | ||
441 | atomic_t redundant_softirqs_on; | ||
442 | atomic_t redundant_softirqs_off; | ||
443 | atomic_t nr_unused_locks; | ||
444 | atomic_t nr_cyclic_checks; | ||
445 | atomic_t nr_find_usage_forwards_checks; | ||
446 | atomic_t nr_find_usage_backwards_checks; | ||
447 | #endif | 434 | #endif |
448 | 435 | ||
449 | /* | 436 | /* |
@@ -758,7 +745,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
758 | return NULL; | 745 | return NULL; |
759 | } | 746 | } |
760 | class = lock_classes + nr_lock_classes++; | 747 | class = lock_classes + nr_lock_classes++; |
761 | debug_atomic_inc(&nr_unused_locks); | 748 | debug_atomic_inc(nr_unused_locks); |
762 | class->key = key; | 749 | class->key = key; |
763 | class->name = lock->name; | 750 | class->name = lock->name; |
764 | class->subclass = subclass; | 751 | class->subclass = subclass; |
@@ -1215,7 +1202,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target, | |||
1215 | { | 1202 | { |
1216 | int result; | 1203 | int result; |
1217 | 1204 | ||
1218 | debug_atomic_inc(&nr_cyclic_checks); | 1205 | debug_atomic_inc(nr_cyclic_checks); |
1219 | 1206 | ||
1220 | result = __bfs_forwards(root, target, class_equal, target_entry); | 1207 | result = __bfs_forwards(root, target, class_equal, target_entry); |
1221 | 1208 | ||
@@ -1252,7 +1239,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, | |||
1252 | { | 1239 | { |
1253 | int result; | 1240 | int result; |
1254 | 1241 | ||
1255 | debug_atomic_inc(&nr_find_usage_forwards_checks); | 1242 | debug_atomic_inc(nr_find_usage_forwards_checks); |
1256 | 1243 | ||
1257 | result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); | 1244 | result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); |
1258 | 1245 | ||
@@ -1275,7 +1262,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, | |||
1275 | { | 1262 | { |
1276 | int result; | 1263 | int result; |
1277 | 1264 | ||
1278 | debug_atomic_inc(&nr_find_usage_backwards_checks); | 1265 | debug_atomic_inc(nr_find_usage_backwards_checks); |
1279 | 1266 | ||
1280 | result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); | 1267 | result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); |
1281 | 1268 | ||
@@ -1835,7 +1822,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
1835 | list_for_each_entry(chain, hash_head, entry) { | 1822 | list_for_each_entry(chain, hash_head, entry) { |
1836 | if (chain->chain_key == chain_key) { | 1823 | if (chain->chain_key == chain_key) { |
1837 | cache_hit: | 1824 | cache_hit: |
1838 | debug_atomic_inc(&chain_lookup_hits); | 1825 | debug_atomic_inc(chain_lookup_hits); |
1839 | if (very_verbose(class)) | 1826 | if (very_verbose(class)) |
1840 | printk("\nhash chain already cached, key: " | 1827 | printk("\nhash chain already cached, key: " |
1841 | "%016Lx tail class: [%p] %s\n", | 1828 | "%016Lx tail class: [%p] %s\n", |
@@ -1900,7 +1887,7 @@ cache_hit: | |||
1900 | chain_hlocks[chain->base + j] = class - lock_classes; | 1887 | chain_hlocks[chain->base + j] = class - lock_classes; |
1901 | } | 1888 | } |
1902 | list_add_tail_rcu(&chain->entry, hash_head); | 1889 | list_add_tail_rcu(&chain->entry, hash_head); |
1903 | debug_atomic_inc(&chain_lookup_misses); | 1890 | debug_atomic_inc(chain_lookup_misses); |
1904 | inc_chains(); | 1891 | inc_chains(); |
1905 | 1892 | ||
1906 | return 1; | 1893 | return 1; |
@@ -2321,7 +2308,7 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2321 | return; | 2308 | return; |
2322 | 2309 | ||
2323 | if (unlikely(curr->hardirqs_enabled)) { | 2310 | if (unlikely(curr->hardirqs_enabled)) { |
2324 | debug_atomic_inc(&redundant_hardirqs_on); | 2311 | debug_atomic_inc(redundant_hardirqs_on); |
2325 | return; | 2312 | return; |
2326 | } | 2313 | } |
2327 | /* we'll do an OFF -> ON transition: */ | 2314 | /* we'll do an OFF -> ON transition: */ |
@@ -2348,7 +2335,7 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2348 | 2335 | ||
2349 | curr->hardirq_enable_ip = ip; | 2336 | curr->hardirq_enable_ip = ip; |
2350 | curr->hardirq_enable_event = ++curr->irq_events; | 2337 | curr->hardirq_enable_event = ++curr->irq_events; |
2351 | debug_atomic_inc(&hardirqs_on_events); | 2338 | debug_atomic_inc(hardirqs_on_events); |
2352 | } | 2339 | } |
2353 | EXPORT_SYMBOL(trace_hardirqs_on_caller); | 2340 | EXPORT_SYMBOL(trace_hardirqs_on_caller); |
2354 | 2341 | ||
@@ -2380,9 +2367,9 @@ void trace_hardirqs_off_caller(unsigned long ip) | |||
2380 | curr->hardirqs_enabled = 0; | 2367 | curr->hardirqs_enabled = 0; |
2381 | curr->hardirq_disable_ip = ip; | 2368 | curr->hardirq_disable_ip = ip; |
2382 | curr->hardirq_disable_event = ++curr->irq_events; | 2369 | curr->hardirq_disable_event = ++curr->irq_events; |
2383 | debug_atomic_inc(&hardirqs_off_events); | 2370 | debug_atomic_inc(hardirqs_off_events); |
2384 | } else | 2371 | } else |
2385 | debug_atomic_inc(&redundant_hardirqs_off); | 2372 | debug_atomic_inc(redundant_hardirqs_off); |
2386 | } | 2373 | } |
2387 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | 2374 | EXPORT_SYMBOL(trace_hardirqs_off_caller); |
2388 | 2375 | ||
@@ -2406,7 +2393,7 @@ void trace_softirqs_on(unsigned long ip) | |||
2406 | return; | 2393 | return; |
2407 | 2394 | ||
2408 | if (curr->softirqs_enabled) { | 2395 | if (curr->softirqs_enabled) { |
2409 | debug_atomic_inc(&redundant_softirqs_on); | 2396 | debug_atomic_inc(redundant_softirqs_on); |
2410 | return; | 2397 | return; |
2411 | } | 2398 | } |
2412 | 2399 | ||
@@ -2416,7 +2403,7 @@ void trace_softirqs_on(unsigned long ip) | |||
2416 | curr->softirqs_enabled = 1; | 2403 | curr->softirqs_enabled = 1; |
2417 | curr->softirq_enable_ip = ip; | 2404 | curr->softirq_enable_ip = ip; |
2418 | curr->softirq_enable_event = ++curr->irq_events; | 2405 | curr->softirq_enable_event = ++curr->irq_events; |
2419 | debug_atomic_inc(&softirqs_on_events); | 2406 | debug_atomic_inc(softirqs_on_events); |
2420 | /* | 2407 | /* |
2421 | * We are going to turn softirqs on, so set the | 2408 | * We are going to turn softirqs on, so set the |
2422 | * usage bit for all held locks, if hardirqs are | 2409 | * usage bit for all held locks, if hardirqs are |
@@ -2446,10 +2433,10 @@ void trace_softirqs_off(unsigned long ip) | |||
2446 | curr->softirqs_enabled = 0; | 2433 | curr->softirqs_enabled = 0; |
2447 | curr->softirq_disable_ip = ip; | 2434 | curr->softirq_disable_ip = ip; |
2448 | curr->softirq_disable_event = ++curr->irq_events; | 2435 | curr->softirq_disable_event = ++curr->irq_events; |
2449 | debug_atomic_inc(&softirqs_off_events); | 2436 | debug_atomic_inc(softirqs_off_events); |
2450 | DEBUG_LOCKS_WARN_ON(!softirq_count()); | 2437 | DEBUG_LOCKS_WARN_ON(!softirq_count()); |
2451 | } else | 2438 | } else |
2452 | debug_atomic_inc(&redundant_softirqs_off); | 2439 | debug_atomic_inc(redundant_softirqs_off); |
2453 | } | 2440 | } |
2454 | 2441 | ||
2455 | static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) | 2442 | static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) |
@@ -2654,7 +2641,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
2654 | return 0; | 2641 | return 0; |
2655 | break; | 2642 | break; |
2656 | case LOCK_USED: | 2643 | case LOCK_USED: |
2657 | debug_atomic_dec(&nr_unused_locks); | 2644 | debug_atomic_dec(nr_unused_locks); |
2658 | break; | 2645 | break; |
2659 | default: | 2646 | default: |
2660 | if (!debug_locks_off_graph_unlock()) | 2647 | if (!debug_locks_off_graph_unlock()) |
@@ -2760,7 +2747,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2760 | if (!class) | 2747 | if (!class) |
2761 | return 0; | 2748 | return 0; |
2762 | } | 2749 | } |
2763 | debug_atomic_inc((atomic_t *)&class->ops); | 2750 | atomic_inc((atomic_t *)&class->ops); |
2764 | if (very_verbose(class)) { | 2751 | if (very_verbose(class)) { |
2765 | printk("\nacquire class [%p] %s", class->key, class->name); | 2752 | printk("\nacquire class [%p] %s", class->key, class->name); |
2766 | if (class->name_version > 1) | 2753 | if (class->name_version > 1) |
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index a2ee95ad1313..8d7d4b6c741a 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h | |||
@@ -110,29 +110,61 @@ lockdep_count_backward_deps(struct lock_class *class) | |||
110 | #endif | 110 | #endif |
111 | 111 | ||
112 | #ifdef CONFIG_DEBUG_LOCKDEP | 112 | #ifdef CONFIG_DEBUG_LOCKDEP |
113 | |||
114 | #include <asm/local.h> | ||
113 | /* | 115 | /* |
114 | * Various lockdep statistics: | 116 | * Various lockdep statistics. |
117 | * We want them per cpu as they are often accessed in fast path | ||
118 | * and we want to avoid too much cache bouncing. | ||
115 | */ | 119 | */ |
116 | extern atomic_t chain_lookup_hits; | 120 | struct lockdep_stats { |
117 | extern atomic_t chain_lookup_misses; | 121 | int chain_lookup_hits; |
118 | extern atomic_t hardirqs_on_events; | 122 | int chain_lookup_misses; |
119 | extern atomic_t hardirqs_off_events; | 123 | int hardirqs_on_events; |
120 | extern atomic_t redundant_hardirqs_on; | 124 | int hardirqs_off_events; |
121 | extern atomic_t redundant_hardirqs_off; | 125 | int redundant_hardirqs_on; |
122 | extern atomic_t softirqs_on_events; | 126 | int redundant_hardirqs_off; |
123 | extern atomic_t softirqs_off_events; | 127 | int softirqs_on_events; |
124 | extern atomic_t redundant_softirqs_on; | 128 | int softirqs_off_events; |
125 | extern atomic_t redundant_softirqs_off; | 129 | int redundant_softirqs_on; |
126 | extern atomic_t nr_unused_locks; | 130 | int redundant_softirqs_off; |
127 | extern atomic_t nr_cyclic_checks; | 131 | int nr_unused_locks; |
128 | extern atomic_t nr_cyclic_check_recursions; | 132 | int nr_cyclic_checks; |
129 | extern atomic_t nr_find_usage_forwards_checks; | 133 | int nr_cyclic_check_recursions; |
130 | extern atomic_t nr_find_usage_forwards_recursions; | 134 | int nr_find_usage_forwards_checks; |
131 | extern atomic_t nr_find_usage_backwards_checks; | 135 | int nr_find_usage_forwards_recursions; |
132 | extern atomic_t nr_find_usage_backwards_recursions; | 136 | int nr_find_usage_backwards_checks; |
133 | # define debug_atomic_inc(ptr) atomic_inc(ptr) | 137 | int nr_find_usage_backwards_recursions; |
134 | # define debug_atomic_dec(ptr) atomic_dec(ptr) | 138 | }; |
135 | # define debug_atomic_read(ptr) atomic_read(ptr) | 139 | |
140 | DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); | ||
141 | |||
142 | #define debug_atomic_inc(ptr) { \ | ||
143 | struct lockdep_stats *__cpu_lockdep_stats; \ | ||
144 | \ | ||
145 | WARN_ON_ONCE(!irqs_disabled()); \ | ||
146 | __cpu_lockdep_stats = &__get_cpu_var(lockdep_stats); \ | ||
147 | __cpu_lockdep_stats->ptr++; \ | ||
148 | } | ||
149 | |||
150 | #define debug_atomic_dec(ptr) { \ | ||
151 | struct lockdep_stats *__cpu_lockdep_stats; \ | ||
152 | \ | ||
153 | WARN_ON_ONCE(!irqs_disabled()); \ | ||
154 | __cpu_lockdep_stats = &__get_cpu_var(lockdep_stats); \ | ||
155 | __cpu_lockdep_stats->ptr--; \ | ||
156 | } | ||
157 | |||
158 | #define debug_atomic_read(ptr) ({ \ | ||
159 | struct lockdep_stats *__cpu_lockdep_stats; \ | ||
160 | unsigned long long __total = 0; \ | ||
161 | int __cpu; \ | ||
162 | for_each_possible_cpu(__cpu) { \ | ||
163 | __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \ | ||
164 | __total += __cpu_lockdep_stats->ptr; \ | ||
165 | } \ | ||
166 | __total; \ | ||
167 | }) | ||
136 | #else | 168 | #else |
137 | # define debug_atomic_inc(ptr) do { } while (0) | 169 | # define debug_atomic_inc(ptr) do { } while (0) |
138 | # define debug_atomic_dec(ptr) do { } while (0) | 170 | # define debug_atomic_dec(ptr) do { } while (0) |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index d4aba4f3584c..59b76c8ce9d7 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -184,34 +184,34 @@ static const struct file_operations proc_lockdep_chains_operations = { | |||
184 | static void lockdep_stats_debug_show(struct seq_file *m) | 184 | static void lockdep_stats_debug_show(struct seq_file *m) |
185 | { | 185 | { |
186 | #ifdef CONFIG_DEBUG_LOCKDEP | 186 | #ifdef CONFIG_DEBUG_LOCKDEP |
187 | unsigned int hi1 = debug_atomic_read(&hardirqs_on_events), | 187 | unsigned long long hi1 = debug_atomic_read(hardirqs_on_events), |
188 | hi2 = debug_atomic_read(&hardirqs_off_events), | 188 | hi2 = debug_atomic_read(hardirqs_off_events), |
189 | hr1 = debug_atomic_read(&redundant_hardirqs_on), | 189 | hr1 = debug_atomic_read(redundant_hardirqs_on), |
190 | hr2 = debug_atomic_read(&redundant_hardirqs_off), | 190 | hr2 = debug_atomic_read(redundant_hardirqs_off), |
191 | si1 = debug_atomic_read(&softirqs_on_events), | 191 | si1 = debug_atomic_read(softirqs_on_events), |
192 | si2 = debug_atomic_read(&softirqs_off_events), | 192 | si2 = debug_atomic_read(softirqs_off_events), |
193 | sr1 = debug_atomic_read(&redundant_softirqs_on), | 193 | sr1 = debug_atomic_read(redundant_softirqs_on), |
194 | sr2 = debug_atomic_read(&redundant_softirqs_off); | 194 | sr2 = debug_atomic_read(redundant_softirqs_off); |
195 | 195 | ||
196 | seq_printf(m, " chain lookup misses: %11u\n", | 196 | seq_printf(m, " chain lookup misses: %11llu\n", |
197 | debug_atomic_read(&chain_lookup_misses)); | 197 | debug_atomic_read(chain_lookup_misses)); |
198 | seq_printf(m, " chain lookup hits: %11u\n", | 198 | seq_printf(m, " chain lookup hits: %11llu\n", |
199 | debug_atomic_read(&chain_lookup_hits)); | 199 | debug_atomic_read(chain_lookup_hits)); |
200 | seq_printf(m, " cyclic checks: %11u\n", | 200 | seq_printf(m, " cyclic checks: %11llu\n", |
201 | debug_atomic_read(&nr_cyclic_checks)); | 201 | debug_atomic_read(nr_cyclic_checks)); |
202 | seq_printf(m, " find-mask forwards checks: %11u\n", | 202 | seq_printf(m, " find-mask forwards checks: %11llu\n", |
203 | debug_atomic_read(&nr_find_usage_forwards_checks)); | 203 | debug_atomic_read(nr_find_usage_forwards_checks)); |
204 | seq_printf(m, " find-mask backwards checks: %11u\n", | 204 | seq_printf(m, " find-mask backwards checks: %11llu\n", |
205 | debug_atomic_read(&nr_find_usage_backwards_checks)); | 205 | debug_atomic_read(nr_find_usage_backwards_checks)); |
206 | 206 | ||
207 | seq_printf(m, " hardirq on events: %11u\n", hi1); | 207 | seq_printf(m, " hardirq on events: %11llu\n", hi1); |
208 | seq_printf(m, " hardirq off events: %11u\n", hi2); | 208 | seq_printf(m, " hardirq off events: %11llu\n", hi2); |
209 | seq_printf(m, " redundant hardirq ons: %11u\n", hr1); | 209 | seq_printf(m, " redundant hardirq ons: %11llu\n", hr1); |
210 | seq_printf(m, " redundant hardirq offs: %11u\n", hr2); | 210 | seq_printf(m, " redundant hardirq offs: %11llu\n", hr2); |
211 | seq_printf(m, " softirq on events: %11u\n", si1); | 211 | seq_printf(m, " softirq on events: %11llu\n", si1); |
212 | seq_printf(m, " softirq off events: %11u\n", si2); | 212 | seq_printf(m, " softirq off events: %11llu\n", si2); |
213 | seq_printf(m, " redundant softirq ons: %11u\n", sr1); | 213 | seq_printf(m, " redundant softirq ons: %11llu\n", sr1); |
214 | seq_printf(m, " redundant softirq offs: %11u\n", sr2); | 214 | seq_printf(m, " redundant softirq offs: %11llu\n", sr2); |
215 | #endif | 215 | #endif |
216 | } | 216 | } |
217 | 217 | ||
@@ -263,7 +263,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
263 | #endif | 263 | #endif |
264 | } | 264 | } |
265 | #ifdef CONFIG_DEBUG_LOCKDEP | 265 | #ifdef CONFIG_DEBUG_LOCKDEP |
266 | DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); | 266 | DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); |
267 | #endif | 267 | #endif |
268 | seq_printf(m, " lock-classes: %11lu [max: %lu]\n", | 268 | seq_printf(m, " lock-classes: %11lu [max: %lu]\n", |
269 | nr_lock_classes, MAX_LOCKDEP_KEYS); | 269 | nr_lock_classes, MAX_LOCKDEP_KEYS); |