diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 79 |
1 files changed, 55 insertions, 24 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 91874a95060d..5af2842dea96 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
280 | /* Missed count stored at end */ | 280 | /* Missed count stored at end */ |
281 | #define RB_MISSED_STORED (1 << 30) | 281 | #define RB_MISSED_STORED (1 << 30) |
282 | 282 | ||
283 | #define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) | ||
284 | |||
283 | struct buffer_data_page { | 285 | struct buffer_data_page { |
284 | u64 time_stamp; /* page time stamp */ | 286 | u64 time_stamp; /* page time stamp */ |
285 | local_t commit; /* write committed index */ | 287 | local_t commit; /* write committed index */ |
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage) | |||
331 | */ | 333 | */ |
332 | size_t ring_buffer_page_len(void *page) | 334 | size_t ring_buffer_page_len(void *page) |
333 | { | 335 | { |
334 | return local_read(&((struct buffer_data_page *)page)->commit) | 336 | struct buffer_data_page *bpage = page; |
337 | |||
338 | return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) | ||
335 | + BUF_PAGE_HDR_SIZE; | 339 | + BUF_PAGE_HDR_SIZE; |
336 | } | 340 | } |
337 | 341 | ||
@@ -1799,12 +1803,6 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) | |||
1799 | } | 1803 | } |
1800 | EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); | 1804 | EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); |
1801 | 1805 | ||
1802 | static __always_inline void * | ||
1803 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | ||
1804 | { | ||
1805 | return bpage->data + index; | ||
1806 | } | ||
1807 | |||
1808 | static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) | 1806 | static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) |
1809 | { | 1807 | { |
1810 | return bpage->page->data + index; | 1808 | return bpage->page->data + index; |
@@ -2536,29 +2534,58 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) | |||
2536 | * The lock and unlock are done within a preempt disable section. | 2534 | * The lock and unlock are done within a preempt disable section. |
2537 | * The current_context per_cpu variable can only be modified | 2535 | * The current_context per_cpu variable can only be modified |
2538 | * by the current task between lock and unlock. But it can | 2536 | * by the current task between lock and unlock. But it can |
2539 | * be modified more than once via an interrupt. There are four | 2537 | * be modified more than once via an interrupt. To pass this |
2540 | * different contexts that we need to consider. | 2538 | * information from the lock to the unlock without having to |
2539 | * access the 'in_interrupt()' functions again (which do show | ||
2540 | * a bit of overhead in something as critical as function tracing, | ||
2541 | * we use a bitmask trick. | ||
2542 | * | ||
2543 | * bit 0 = NMI context | ||
2544 | * bit 1 = IRQ context | ||
2545 | * bit 2 = SoftIRQ context | ||
2546 | * bit 3 = normal context. | ||
2547 | * | ||
2548 | * This works because this is the order of contexts that can | ||
2549 | * preempt other contexts. A SoftIRQ never preempts an IRQ | ||
2550 | * context. | ||
2551 | * | ||
2552 | * When the context is determined, the corresponding bit is | ||
2553 | * checked and set (if it was set, then a recursion of that context | ||
2554 | * happened). | ||
2555 | * | ||
2556 | * On unlock, we need to clear this bit. To do so, just subtract | ||
2557 | * 1 from the current_context and AND it to itself. | ||
2541 | * | 2558 | * |
2542 | * Normal context. | 2559 | * (binary) |
2543 | * SoftIRQ context | 2560 | * 101 - 1 = 100 |
2544 | * IRQ context | 2561 | * 101 & 100 = 100 (clearing bit zero) |
2545 | * NMI context | ||
2546 | * | 2562 | * |
2547 | * If for some reason the ring buffer starts to recurse, we | 2563 | * 1010 - 1 = 1001 |
2548 | * only allow that to happen at most 4 times (one for each | 2564 | * 1010 & 1001 = 1000 (clearing bit 1) |
2549 | * context). If it happens 5 times, then we consider this a | 2565 | * |
2550 | * recusive loop and do not let it go further. | 2566 | * The least significant bit can be cleared this way, and it |
2567 | * just so happens that it is the same bit corresponding to | ||
2568 | * the current context. | ||
2551 | */ | 2569 | */ |
2552 | 2570 | ||
2553 | static __always_inline int | 2571 | static __always_inline int |
2554 | trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) | 2572 | trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) |
2555 | { | 2573 | { |
2556 | if (cpu_buffer->current_context >= 4) | 2574 | unsigned int val = cpu_buffer->current_context; |
2575 | unsigned long pc = preempt_count(); | ||
2576 | int bit; | ||
2577 | |||
2578 | if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) | ||
2579 | bit = RB_CTX_NORMAL; | ||
2580 | else | ||
2581 | bit = pc & NMI_MASK ? RB_CTX_NMI : | ||
2582 | pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; | ||
2583 | |||
2584 | if (unlikely(val & (1 << bit))) | ||
2557 | return 1; | 2585 | return 1; |
2558 | 2586 | ||
2559 | cpu_buffer->current_context++; | 2587 | val |= (1 << bit); |
2560 | /* Interrupts must see this update */ | 2588 | cpu_buffer->current_context = val; |
2561 | barrier(); | ||
2562 | 2589 | ||
2563 | return 0; | 2590 | return 0; |
2564 | } | 2591 | } |
@@ -2566,9 +2593,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) | |||
2566 | static __always_inline void | 2593 | static __always_inline void |
2567 | trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) | 2594 | trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) |
2568 | { | 2595 | { |
2569 | /* Don't let the dec leak out */ | 2596 | cpu_buffer->current_context &= cpu_buffer->current_context - 1; |
2570 | barrier(); | ||
2571 | cpu_buffer->current_context--; | ||
2572 | } | 2597 | } |
2573 | 2598 | ||
2574 | /** | 2599 | /** |
@@ -4406,8 +4431,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) | |||
4406 | { | 4431 | { |
4407 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 4432 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
4408 | struct buffer_data_page *bpage = data; | 4433 | struct buffer_data_page *bpage = data; |
4434 | struct page *page = virt_to_page(bpage); | ||
4409 | unsigned long flags; | 4435 | unsigned long flags; |
4410 | 4436 | ||
4437 | /* If the page is still in use someplace else, we can't reuse it */ | ||
4438 | if (page_ref_count(page) > 1) | ||
4439 | goto out; | ||
4440 | |||
4411 | local_irq_save(flags); | 4441 | local_irq_save(flags); |
4412 | arch_spin_lock(&cpu_buffer->lock); | 4442 | arch_spin_lock(&cpu_buffer->lock); |
4413 | 4443 | ||
@@ -4419,6 +4449,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) | |||
4419 | arch_spin_unlock(&cpu_buffer->lock); | 4449 | arch_spin_unlock(&cpu_buffer->lock); |
4420 | local_irq_restore(flags); | 4450 | local_irq_restore(flags); |
4421 | 4451 | ||
4452 | out: | ||
4422 | free_page((unsigned long)bpage); | 4453 | free_page((unsigned long)bpage); |
4423 | } | 4454 | } |
4424 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); | 4455 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); |