aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-04-05 18:10:17 -0400
committerIngo Molnar <mingo@elte.hu>2010-04-05 18:15:37 -0400
commitbd6d29c25bb1a24a4c160ec5de43e0004e01f72b (patch)
tree0aa96c7e9fdfbe7dc9c7e40151aed928903240f0
parentced918eb748ce30b3aace549fd17540e40ffdca0 (diff)
lockstat: Make lockstat counting per cpu
Locking statistics are implemented using global atomic variables. This is usually fine unless some path write them very often. This is the case for the function and function graph tracers that disable irqs for each entry saved (except if the function tracer is in preempt disabled only mode). And calls to local_irq_save/restore() increment hardirqs_on_events and hardirqs_off_events stats (or similar stats for redundant versions). Incrementing these global vars for each function ends up in too much cache bouncing if lockstats are enabled. To solve this, implement the debug_atomic_*() operations using per cpu vars. -v2: Use per_cpu() instead of get_cpu_var() to fetch the desired cpu vars on debug_atomic_read() -v3: Store the stats in a structure. No need for local_t as we are NMI/irq safe. -v4: Fix tons of build errors. I thought I had tested it but I probably forgot to select the relevant config. Suggested-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Steven Rostedt <rostedt@goodmis.org> LKML-Reference: <1270505417-8144-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--kernel/lockdep.c47
-rw-r--r--kernel/lockdep_internals.h74
-rw-r--r--kernel/lockdep_proc.c58
3 files changed, 99 insertions, 80 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0c30d0455de1..069af0276bf7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -430,20 +430,7 @@ static struct stack_trace lockdep_init_trace = {
430/* 430/*
431 * Various lockdep statistics: 431 * Various lockdep statistics:
432 */ 432 */
433atomic_t chain_lookup_hits; 433DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
434atomic_t chain_lookup_misses;
435atomic_t hardirqs_on_events;
436atomic_t hardirqs_off_events;
437atomic_t redundant_hardirqs_on;
438atomic_t redundant_hardirqs_off;
439atomic_t softirqs_on_events;
440atomic_t softirqs_off_events;
441atomic_t redundant_softirqs_on;
442atomic_t redundant_softirqs_off;
443atomic_t nr_unused_locks;
444atomic_t nr_cyclic_checks;
445atomic_t nr_find_usage_forwards_checks;
446atomic_t nr_find_usage_backwards_checks;
447#endif 434#endif
448 435
449/* 436/*
@@ -758,7 +745,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
758 return NULL; 745 return NULL;
759 } 746 }
760 class = lock_classes + nr_lock_classes++; 747 class = lock_classes + nr_lock_classes++;
761 debug_atomic_inc(&nr_unused_locks); 748 debug_atomic_inc(nr_unused_locks);
762 class->key = key; 749 class->key = key;
763 class->name = lock->name; 750 class->name = lock->name;
764 class->subclass = subclass; 751 class->subclass = subclass;
@@ -1215,7 +1202,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
1215{ 1202{
1216 int result; 1203 int result;
1217 1204
1218 debug_atomic_inc(&nr_cyclic_checks); 1205 debug_atomic_inc(nr_cyclic_checks);
1219 1206
1220 result = __bfs_forwards(root, target, class_equal, target_entry); 1207 result = __bfs_forwards(root, target, class_equal, target_entry);
1221 1208
@@ -1252,7 +1239,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
1252{ 1239{
1253 int result; 1240 int result;
1254 1241
1255 debug_atomic_inc(&nr_find_usage_forwards_checks); 1242 debug_atomic_inc(nr_find_usage_forwards_checks);
1256 1243
1257 result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); 1244 result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
1258 1245
@@ -1275,7 +1262,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
1275{ 1262{
1276 int result; 1263 int result;
1277 1264
1278 debug_atomic_inc(&nr_find_usage_backwards_checks); 1265 debug_atomic_inc(nr_find_usage_backwards_checks);
1279 1266
1280 result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); 1267 result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
1281 1268
@@ -1835,7 +1822,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
1835 list_for_each_entry(chain, hash_head, entry) { 1822 list_for_each_entry(chain, hash_head, entry) {
1836 if (chain->chain_key == chain_key) { 1823 if (chain->chain_key == chain_key) {
1837cache_hit: 1824cache_hit:
1838 debug_atomic_inc(&chain_lookup_hits); 1825 debug_atomic_inc(chain_lookup_hits);
1839 if (very_verbose(class)) 1826 if (very_verbose(class))
1840 printk("\nhash chain already cached, key: " 1827 printk("\nhash chain already cached, key: "
1841 "%016Lx tail class: [%p] %s\n", 1828 "%016Lx tail class: [%p] %s\n",
@@ -1900,7 +1887,7 @@ cache_hit:
1900 chain_hlocks[chain->base + j] = class - lock_classes; 1887 chain_hlocks[chain->base + j] = class - lock_classes;
1901 } 1888 }
1902 list_add_tail_rcu(&chain->entry, hash_head); 1889 list_add_tail_rcu(&chain->entry, hash_head);
1903 debug_atomic_inc(&chain_lookup_misses); 1890 debug_atomic_inc(chain_lookup_misses);
1904 inc_chains(); 1891 inc_chains();
1905 1892
1906 return 1; 1893 return 1;
@@ -2321,7 +2308,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2321 return; 2308 return;
2322 2309
2323 if (unlikely(curr->hardirqs_enabled)) { 2310 if (unlikely(curr->hardirqs_enabled)) {
2324 debug_atomic_inc(&redundant_hardirqs_on); 2311 debug_atomic_inc(redundant_hardirqs_on);
2325 return; 2312 return;
2326 } 2313 }
2327 /* we'll do an OFF -> ON transition: */ 2314 /* we'll do an OFF -> ON transition: */
@@ -2348,7 +2335,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2348 2335
2349 curr->hardirq_enable_ip = ip; 2336 curr->hardirq_enable_ip = ip;
2350 curr->hardirq_enable_event = ++curr->irq_events; 2337 curr->hardirq_enable_event = ++curr->irq_events;
2351 debug_atomic_inc(&hardirqs_on_events); 2338 debug_atomic_inc(hardirqs_on_events);
2352} 2339}
2353EXPORT_SYMBOL(trace_hardirqs_on_caller); 2340EXPORT_SYMBOL(trace_hardirqs_on_caller);
2354 2341
@@ -2380,9 +2367,9 @@ void trace_hardirqs_off_caller(unsigned long ip)
2380 curr->hardirqs_enabled = 0; 2367 curr->hardirqs_enabled = 0;
2381 curr->hardirq_disable_ip = ip; 2368 curr->hardirq_disable_ip = ip;
2382 curr->hardirq_disable_event = ++curr->irq_events; 2369 curr->hardirq_disable_event = ++curr->irq_events;
2383 debug_atomic_inc(&hardirqs_off_events); 2370 debug_atomic_inc(hardirqs_off_events);
2384 } else 2371 } else
2385 debug_atomic_inc(&redundant_hardirqs_off); 2372 debug_atomic_inc(redundant_hardirqs_off);
2386} 2373}
2387EXPORT_SYMBOL(trace_hardirqs_off_caller); 2374EXPORT_SYMBOL(trace_hardirqs_off_caller);
2388 2375
@@ -2406,7 +2393,7 @@ void trace_softirqs_on(unsigned long ip)
2406 return; 2393 return;
2407 2394
2408 if (curr->softirqs_enabled) { 2395 if (curr->softirqs_enabled) {
2409 debug_atomic_inc(&redundant_softirqs_on); 2396 debug_atomic_inc(redundant_softirqs_on);
2410 return; 2397 return;
2411 } 2398 }
2412 2399
@@ -2416,7 +2403,7 @@ void trace_softirqs_on(unsigned long ip)
2416 curr->softirqs_enabled = 1; 2403 curr->softirqs_enabled = 1;
2417 curr->softirq_enable_ip = ip; 2404 curr->softirq_enable_ip = ip;
2418 curr->softirq_enable_event = ++curr->irq_events; 2405 curr->softirq_enable_event = ++curr->irq_events;
2419 debug_atomic_inc(&softirqs_on_events); 2406 debug_atomic_inc(softirqs_on_events);
2420 /* 2407 /*
2421 * We are going to turn softirqs on, so set the 2408 * We are going to turn softirqs on, so set the
2422 * usage bit for all held locks, if hardirqs are 2409 * usage bit for all held locks, if hardirqs are
@@ -2446,10 +2433,10 @@ void trace_softirqs_off(unsigned long ip)
2446 curr->softirqs_enabled = 0; 2433 curr->softirqs_enabled = 0;
2447 curr->softirq_disable_ip = ip; 2434 curr->softirq_disable_ip = ip;
2448 curr->softirq_disable_event = ++curr->irq_events; 2435 curr->softirq_disable_event = ++curr->irq_events;
2449 debug_atomic_inc(&softirqs_off_events); 2436 debug_atomic_inc(softirqs_off_events);
2450 DEBUG_LOCKS_WARN_ON(!softirq_count()); 2437 DEBUG_LOCKS_WARN_ON(!softirq_count());
2451 } else 2438 } else
2452 debug_atomic_inc(&redundant_softirqs_off); 2439 debug_atomic_inc(redundant_softirqs_off);
2453} 2440}
2454 2441
2455static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) 2442static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
@@ -2654,7 +2641,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2654 return 0; 2641 return 0;
2655 break; 2642 break;
2656 case LOCK_USED: 2643 case LOCK_USED:
2657 debug_atomic_dec(&nr_unused_locks); 2644 debug_atomic_dec(nr_unused_locks);
2658 break; 2645 break;
2659 default: 2646 default:
2660 if (!debug_locks_off_graph_unlock()) 2647 if (!debug_locks_off_graph_unlock())
@@ -2760,7 +2747,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2760 if (!class) 2747 if (!class)
2761 return 0; 2748 return 0;
2762 } 2749 }
2763 debug_atomic_inc((atomic_t *)&class->ops); 2750 atomic_inc((atomic_t *)&class->ops);
2764 if (very_verbose(class)) { 2751 if (very_verbose(class)) {
2765 printk("\nacquire class [%p] %s", class->key, class->name); 2752 printk("\nacquire class [%p] %s", class->key, class->name);
2766 if (class->name_version > 1) 2753 if (class->name_version > 1)
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index a2ee95ad1313..8d7d4b6c741a 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -110,29 +110,61 @@ lockdep_count_backward_deps(struct lock_class *class)
110#endif 110#endif
111 111
112#ifdef CONFIG_DEBUG_LOCKDEP 112#ifdef CONFIG_DEBUG_LOCKDEP
113
114#include <asm/local.h>
113/* 115/*
114 * Various lockdep statistics: 116 * Various lockdep statistics.
117 * We want them per cpu as they are often accessed in fast path
118 * and we want to avoid too much cache bouncing.
115 */ 119 */
116extern atomic_t chain_lookup_hits; 120struct lockdep_stats {
117extern atomic_t chain_lookup_misses; 121 int chain_lookup_hits;
118extern atomic_t hardirqs_on_events; 122 int chain_lookup_misses;
119extern atomic_t hardirqs_off_events; 123 int hardirqs_on_events;
120extern atomic_t redundant_hardirqs_on; 124 int hardirqs_off_events;
121extern atomic_t redundant_hardirqs_off; 125 int redundant_hardirqs_on;
122extern atomic_t softirqs_on_events; 126 int redundant_hardirqs_off;
123extern atomic_t softirqs_off_events; 127 int softirqs_on_events;
124extern atomic_t redundant_softirqs_on; 128 int softirqs_off_events;
125extern atomic_t redundant_softirqs_off; 129 int redundant_softirqs_on;
126extern atomic_t nr_unused_locks; 130 int redundant_softirqs_off;
127extern atomic_t nr_cyclic_checks; 131 int nr_unused_locks;
128extern atomic_t nr_cyclic_check_recursions; 132 int nr_cyclic_checks;
129extern atomic_t nr_find_usage_forwards_checks; 133 int nr_cyclic_check_recursions;
130extern atomic_t nr_find_usage_forwards_recursions; 134 int nr_find_usage_forwards_checks;
131extern atomic_t nr_find_usage_backwards_checks; 135 int nr_find_usage_forwards_recursions;
132extern atomic_t nr_find_usage_backwards_recursions; 136 int nr_find_usage_backwards_checks;
133# define debug_atomic_inc(ptr) atomic_inc(ptr) 137 int nr_find_usage_backwards_recursions;
134# define debug_atomic_dec(ptr) atomic_dec(ptr) 138};
135# define debug_atomic_read(ptr) atomic_read(ptr) 139
140DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
141
142#define debug_atomic_inc(ptr) { \
143 struct lockdep_stats *__cpu_lockdep_stats; \
144 \
145 WARN_ON_ONCE(!irqs_disabled()); \
146 __cpu_lockdep_stats = &__get_cpu_var(lockdep_stats); \
147 __cpu_lockdep_stats->ptr++; \
148}
149
150#define debug_atomic_dec(ptr) { \
151 struct lockdep_stats *__cpu_lockdep_stats; \
152 \
153 WARN_ON_ONCE(!irqs_disabled()); \
154 __cpu_lockdep_stats = &__get_cpu_var(lockdep_stats); \
155 __cpu_lockdep_stats->ptr--; \
156}
157
158#define debug_atomic_read(ptr) ({ \
159 struct lockdep_stats *__cpu_lockdep_stats; \
160 unsigned long long __total = 0; \
161 int __cpu; \
162 for_each_possible_cpu(__cpu) { \
163 __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \
164 __total += __cpu_lockdep_stats->ptr; \
165 } \
166 __total; \
167})
136#else 168#else
137# define debug_atomic_inc(ptr) do { } while (0) 169# define debug_atomic_inc(ptr) do { } while (0)
138# define debug_atomic_dec(ptr) do { } while (0) 170# define debug_atomic_dec(ptr) do { } while (0)
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d4aba4f3584c..59b76c8ce9d7 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -184,34 +184,34 @@ static const struct file_operations proc_lockdep_chains_operations = {
184static void lockdep_stats_debug_show(struct seq_file *m) 184static void lockdep_stats_debug_show(struct seq_file *m)
185{ 185{
186#ifdef CONFIG_DEBUG_LOCKDEP 186#ifdef CONFIG_DEBUG_LOCKDEP
187 unsigned int hi1 = debug_atomic_read(&hardirqs_on_events), 187 unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
188 hi2 = debug_atomic_read(&hardirqs_off_events), 188 hi2 = debug_atomic_read(hardirqs_off_events),
189 hr1 = debug_atomic_read(&redundant_hardirqs_on), 189 hr1 = debug_atomic_read(redundant_hardirqs_on),
190 hr2 = debug_atomic_read(&redundant_hardirqs_off), 190 hr2 = debug_atomic_read(redundant_hardirqs_off),
191 si1 = debug_atomic_read(&softirqs_on_events), 191 si1 = debug_atomic_read(softirqs_on_events),
192 si2 = debug_atomic_read(&softirqs_off_events), 192 si2 = debug_atomic_read(softirqs_off_events),
193 sr1 = debug_atomic_read(&redundant_softirqs_on), 193 sr1 = debug_atomic_read(redundant_softirqs_on),
194 sr2 = debug_atomic_read(&redundant_softirqs_off); 194 sr2 = debug_atomic_read(redundant_softirqs_off);
195 195
196 seq_printf(m, " chain lookup misses: %11u\n", 196 seq_printf(m, " chain lookup misses: %11llu\n",
197 debug_atomic_read(&chain_lookup_misses)); 197 debug_atomic_read(chain_lookup_misses));
198 seq_printf(m, " chain lookup hits: %11u\n", 198 seq_printf(m, " chain lookup hits: %11llu\n",
199 debug_atomic_read(&chain_lookup_hits)); 199 debug_atomic_read(chain_lookup_hits));
200 seq_printf(m, " cyclic checks: %11u\n", 200 seq_printf(m, " cyclic checks: %11llu\n",
201 debug_atomic_read(&nr_cyclic_checks)); 201 debug_atomic_read(nr_cyclic_checks));
202 seq_printf(m, " find-mask forwards checks: %11u\n", 202 seq_printf(m, " find-mask forwards checks: %11llu\n",
203 debug_atomic_read(&nr_find_usage_forwards_checks)); 203 debug_atomic_read(nr_find_usage_forwards_checks));
204 seq_printf(m, " find-mask backwards checks: %11u\n", 204 seq_printf(m, " find-mask backwards checks: %11llu\n",
205 debug_atomic_read(&nr_find_usage_backwards_checks)); 205 debug_atomic_read(nr_find_usage_backwards_checks));
206 206
207 seq_printf(m, " hardirq on events: %11u\n", hi1); 207 seq_printf(m, " hardirq on events: %11llu\n", hi1);
208 seq_printf(m, " hardirq off events: %11u\n", hi2); 208 seq_printf(m, " hardirq off events: %11llu\n", hi2);
209 seq_printf(m, " redundant hardirq ons: %11u\n", hr1); 209 seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
210 seq_printf(m, " redundant hardirq offs: %11u\n", hr2); 210 seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
211 seq_printf(m, " softirq on events: %11u\n", si1); 211 seq_printf(m, " softirq on events: %11llu\n", si1);
212 seq_printf(m, " softirq off events: %11u\n", si2); 212 seq_printf(m, " softirq off events: %11llu\n", si2);
213 seq_printf(m, " redundant softirq ons: %11u\n", sr1); 213 seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
214 seq_printf(m, " redundant softirq offs: %11u\n", sr2); 214 seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
215#endif 215#endif
216} 216}
217 217
@@ -263,7 +263,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
263#endif 263#endif
264 } 264 }
265#ifdef CONFIG_DEBUG_LOCKDEP 265#ifdef CONFIG_DEBUG_LOCKDEP
266 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); 266 DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
267#endif 267#endif
268 seq_printf(m, " lock-classes: %11lu [max: %lu]\n", 268 seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
269 nr_lock_classes, MAX_LOCKDEP_KEYS); 269 nr_lock_classes, MAX_LOCKDEP_KEYS);