diff options
author | Steven Rostedt <srostedt@redhat.com> | 2012-11-02 18:33:05 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2013-01-22 23:38:03 -0500 |
commit | 567cd4da54ff45513d2ca1f0e3cb9ba45b66d6cf (patch) | |
tree | 1a9e719a31643138fa76ecf556401fee1f536813 /kernel/trace/ring_buffer.c | |
parent | 897f68a48b1f8fb6cb7493e1ee37e3ed7f879937 (diff) |
ring-buffer: User context bit recursion checking
Using context bit recursion checking, we can help increase the
performance of the ring buffer.
Before this patch:
# echo function > /debug/tracing/current_tracer
# for i in `seq 10`; do ./hackbench 50; done
Time: 10.285
Time: 10.407
Time: 10.243
Time: 10.372
Time: 10.380
Time: 10.198
Time: 10.272
Time: 10.354
Time: 10.248
Time: 10.253
(average: 10.3012)
Now we have:
# echo function > /debug/tracing/current_tracer
# for i in `seq 10`; do ./hackbench 50; done
Time: 9.712
Time: 9.824
Time: 9.861
Time: 9.827
Time: 9.962
Time: 9.905
Time: 9.886
Time: 10.088
Time: 9.861
Time: 9.834
(average: 9.876)
a 4% savings!
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 85 |
1 files changed, 60 insertions, 25 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6ff9cc4658ed..481e26269281 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -2432,41 +2432,76 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2432 | 2432 | ||
2433 | #ifdef CONFIG_TRACING | 2433 | #ifdef CONFIG_TRACING |
2434 | 2434 | ||
2435 | #define TRACE_RECURSIVE_DEPTH 16 | 2435 | /* |
2436 | * The lock and unlock are done within a preempt disable section. | ||
2437 | * The current_context per_cpu variable can only be modified | ||
2438 | * by the current task between lock and unlock. But it can | ||
2439 | * be modified more than once via an interrupt. To pass this | ||
2440 | * information from the lock to the unlock without having to | ||
2441 | * access the 'in_interrupt()' functions again (which do show | ||
2442 | * a bit of overhead in something as critical as function tracing, | ||
2443 | * we use a bitmask trick. | ||
2444 | * | ||
2445 | * bit 0 = NMI context | ||
2446 | * bit 1 = IRQ context | ||
2447 | * bit 2 = SoftIRQ context | ||
2448 | * bit 3 = normal context. | ||
2449 | * | ||
2450 | * This works because this is the order of contexts that can | ||
2451 | * preempt other contexts. A SoftIRQ never preempts an IRQ | ||
2452 | * context. | ||
2453 | * | ||
2454 | * When the context is determined, the corresponding bit is | ||
2455 | * checked and set (if it was set, then a recursion of that context | ||
2456 | * happened). | ||
2457 | * | ||
2458 | * On unlock, we need to clear this bit. To do so, just subtract | ||
2459 | * 1 from the current_context and AND it to itself. | ||
2460 | * | ||
2461 | * (binary) | ||
2462 | * 101 - 1 = 100 | ||
2463 | * 101 & 100 = 100 (clearing bit zero) | ||
2464 | * | ||
2465 | * 1010 - 1 = 1001 | ||
2466 | * 1010 & 1001 = 1000 (clearing bit 1) | ||
2467 | * | ||
2468 | * The least significant bit can be cleared this way, and it | ||
2469 | * just so happens that it is the same bit corresponding to | ||
2470 | * the current context. | ||
2471 | */ | ||
2472 | static DEFINE_PER_CPU(unsigned int, current_context); | ||
2436 | 2473 | ||
2437 | /* Keep this code out of the fast path cache */ | 2474 | static __always_inline int trace_recursive_lock(void) |
2438 | static noinline void trace_recursive_fail(void) | ||
2439 | { | 2475 | { |
2440 | /* Disable all tracing before we do anything else */ | 2476 | unsigned int val = this_cpu_read(current_context); |
2441 | tracing_off_permanent(); | 2477 | int bit; |
2442 | |||
2443 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | ||
2444 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", | ||
2445 | trace_recursion_buffer(), | ||
2446 | hardirq_count() >> HARDIRQ_SHIFT, | ||
2447 | softirq_count() >> SOFTIRQ_SHIFT, | ||
2448 | in_nmi()); | ||
2449 | 2478 | ||
2450 | WARN_ON_ONCE(1); | 2479 | if (in_interrupt()) { |
2451 | } | 2480 | if (in_nmi()) |
2452 | 2481 | bit = 0; | |
2453 | static inline int trace_recursive_lock(void) | 2482 | else if (in_irq()) |
2454 | { | 2483 | bit = 1; |
2455 | trace_recursion_inc(); | 2484 | else |
2485 | bit = 2; | ||
2486 | } else | ||
2487 | bit = 3; | ||
2456 | 2488 | ||
2457 | if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) | 2489 | if (unlikely(val & (1 << bit))) |
2458 | return 0; | 2490 | return 1; |
2459 | 2491 | ||
2460 | trace_recursive_fail(); | 2492 | val |= (1 << bit); |
2493 | this_cpu_write(current_context, val); | ||
2461 | 2494 | ||
2462 | return -1; | 2495 | return 0; |
2463 | } | 2496 | } |
2464 | 2497 | ||
2465 | static inline void trace_recursive_unlock(void) | 2498 | static __always_inline void trace_recursive_unlock(void) |
2466 | { | 2499 | { |
2467 | WARN_ON_ONCE(!trace_recursion_buffer()); | 2500 | unsigned int val = this_cpu_read(current_context); |
2468 | 2501 | ||
2469 | trace_recursion_dec(); | 2502 | val--; |
2503 | val &= this_cpu_read(current_context); | ||
2504 | this_cpu_write(current_context, val); | ||
2470 | } | 2505 | } |
2471 | 2506 | ||
2472 | #else | 2507 | #else |