aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c179
1 files changed, 137 insertions, 42 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b979426d16c6..6989df2ba194 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,8 +3,10 @@
3 * 3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ftrace_event.h>
6#include <linux/ring_buffer.h> 7#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 8#include <linux/trace_clock.h>
9#include <linux/trace_seq.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/debugfs.h> 11#include <linux/debugfs.h>
10#include <linux/uaccess.h> 12#include <linux/uaccess.h>
@@ -21,7 +23,6 @@
21#include <linux/fs.h> 23#include <linux/fs.h>
22 24
23#include <asm/local.h> 25#include <asm/local.h>
24#include "trace.h"
25 26
26static void update_pages_handler(struct work_struct *work); 27static void update_pages_handler(struct work_struct *work);
27 28
@@ -177,7 +178,7 @@ void tracing_off_permanent(void)
177#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 178#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
178#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 179#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
179 180
180#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 181#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
181# define RB_FORCE_8BYTE_ALIGNMENT 0 182# define RB_FORCE_8BYTE_ALIGNMENT 0
182# define RB_ARCH_ALIGNMENT RB_ALIGNMENT 183# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
183#else 184#else
@@ -185,6 +186,8 @@ void tracing_off_permanent(void)
185# define RB_ARCH_ALIGNMENT 8U 186# define RB_ARCH_ALIGNMENT 8U
186#endif 187#endif
187 188
189#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
190
188/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 191/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
189#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 192#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
190 193
@@ -333,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
333struct buffer_data_page { 336struct buffer_data_page {
334 u64 time_stamp; /* page time stamp */ 337 u64 time_stamp; /* page time stamp */
335 local_t commit; /* write committed index */ 338 local_t commit; /* write committed index */
336 unsigned char data[]; /* data of buffer page */ 339 unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
337}; 340};
338 341
339/* 342/*
@@ -460,9 +463,10 @@ struct ring_buffer_per_cpu {
460 unsigned long lost_events; 463 unsigned long lost_events;
461 unsigned long last_overrun; 464 unsigned long last_overrun;
462 local_t entries_bytes; 465 local_t entries_bytes;
463 local_t commit_overrun;
464 local_t overrun;
465 local_t entries; 466 local_t entries;
467 local_t overrun;
468 local_t commit_overrun;
469 local_t dropped_events;
466 local_t committing; 470 local_t committing;
467 local_t commits; 471 local_t commits;
468 unsigned long read; 472 unsigned long read;
@@ -1396,6 +1400,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1396 struct list_head *head_page_with_bit; 1400 struct list_head *head_page_with_bit;
1397 1401
1398 head_page = &rb_set_head_page(cpu_buffer)->list; 1402 head_page = &rb_set_head_page(cpu_buffer)->list;
1403 if (!head_page)
1404 break;
1399 prev_page = head_page->prev; 1405 prev_page = head_page->prev;
1400 1406
1401 first_page = pages->next; 1407 first_page = pages->next;
@@ -1820,7 +1826,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1820} 1826}
1821 1827
1822/** 1828/**
1823 * ring_buffer_update_event - update event type and data 1829 * rb_update_event - update event type and data
1824 * @event: the even to update 1830 * @event: the even to update
1825 * @type: the type of event 1831 * @type: the type of event
1826 * @length: the size of the event field in the ring buffer 1832 * @length: the size of the event field in the ring buffer
@@ -2155,8 +2161,10 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2155 * If we are not in overwrite mode, 2161 * If we are not in overwrite mode,
2156 * this is easy, just stop here. 2162 * this is easy, just stop here.
2157 */ 2163 */
2158 if (!(buffer->flags & RB_FL_OVERWRITE)) 2164 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2165 local_inc(&cpu_buffer->dropped_events);
2159 goto out_reset; 2166 goto out_reset;
2167 }
2160 2168
2161 ret = rb_handle_head_page(cpu_buffer, 2169 ret = rb_handle_head_page(cpu_buffer,
2162 tail_page, 2170 tail_page,
@@ -2427,41 +2435,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2427 2435
2428#ifdef CONFIG_TRACING 2436#ifdef CONFIG_TRACING
2429 2437
2430#define TRACE_RECURSIVE_DEPTH 16 2438/*
2439 * The lock and unlock are done within a preempt disable section.
2440 * The current_context per_cpu variable can only be modified
2441 * by the current task between lock and unlock. But it can
2442 * be modified more than once via an interrupt. To pass this
2443 * information from the lock to the unlock without having to
2444 * access the 'in_interrupt()' functions again (which do show
2445 * a bit of overhead in something as critical as function tracing,
2446 * we use a bitmask trick.
2447 *
2448 * bit 0 = NMI context
2449 * bit 1 = IRQ context
2450 * bit 2 = SoftIRQ context
2451 * bit 3 = normal context.
2452 *
2453 * This works because this is the order of contexts that can
2454 * preempt other contexts. A SoftIRQ never preempts an IRQ
2455 * context.
2456 *
2457 * When the context is determined, the corresponding bit is
2458 * checked and set (if it was set, then a recursion of that context
2459 * happened).
2460 *
2461 * On unlock, we need to clear this bit. To do so, just subtract
2462 * 1 from the current_context and AND it to itself.
2463 *
2464 * (binary)
2465 * 101 - 1 = 100
2466 * 101 & 100 = 100 (clearing bit zero)
2467 *
2468 * 1010 - 1 = 1001
2469 * 1010 & 1001 = 1000 (clearing bit 1)
2470 *
2471 * The least significant bit can be cleared this way, and it
2472 * just so happens that it is the same bit corresponding to
2473 * the current context.
2474 */
2475static DEFINE_PER_CPU(unsigned int, current_context);
2431 2476
2432/* Keep this code out of the fast path cache */ 2477static __always_inline int trace_recursive_lock(void)
2433static noinline void trace_recursive_fail(void)
2434{ 2478{
2435 /* Disable all tracing before we do anything else */ 2479 unsigned int val = this_cpu_read(current_context);
2436 tracing_off_permanent(); 2480 int bit;
2437 2481
2438 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" 2482 if (in_interrupt()) {
2439 "HC[%lu]:SC[%lu]:NMI[%lu]\n", 2483 if (in_nmi())
2440 trace_recursion_buffer(), 2484 bit = 0;
2441 hardirq_count() >> HARDIRQ_SHIFT, 2485 else if (in_irq())
2442 softirq_count() >> SOFTIRQ_SHIFT, 2486 bit = 1;
2443 in_nmi()); 2487 else
2444 2488 bit = 2;
2445 WARN_ON_ONCE(1); 2489 } else
2446} 2490 bit = 3;
2447
2448static inline int trace_recursive_lock(void)
2449{
2450 trace_recursion_inc();
2451 2491
2452 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) 2492 if (unlikely(val & (1 << bit)))
2453 return 0; 2493 return 1;
2454 2494
2455 trace_recursive_fail(); 2495 val |= (1 << bit);
2496 this_cpu_write(current_context, val);
2456 2497
2457 return -1; 2498 return 0;
2458} 2499}
2459 2500
2460static inline void trace_recursive_unlock(void) 2501static __always_inline void trace_recursive_unlock(void)
2461{ 2502{
2462 WARN_ON_ONCE(!trace_recursion_buffer()); 2503 unsigned int val = this_cpu_read(current_context);
2463 2504
2464 trace_recursion_dec(); 2505 val--;
2506 val &= this_cpu_read(current_context);
2507 this_cpu_write(current_context, val);
2465} 2508}
2466 2509
2467#else 2510#else
@@ -2720,8 +2763,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2720 * and not the length of the event which would hold the header. 2763 * and not the length of the event which would hold the header.
2721 */ 2764 */
2722int ring_buffer_write(struct ring_buffer *buffer, 2765int ring_buffer_write(struct ring_buffer *buffer,
2723 unsigned long length, 2766 unsigned long length,
2724 void *data) 2767 void *data)
2725{ 2768{
2726 struct ring_buffer_per_cpu *cpu_buffer; 2769 struct ring_buffer_per_cpu *cpu_buffer;
2727 struct ring_buffer_event *event; 2770 struct ring_buffer_event *event;
@@ -2929,12 +2972,12 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2929 * @buffer: The ring buffer 2972 * @buffer: The ring buffer
2930 * @cpu: The per CPU buffer to read from. 2973 * @cpu: The per CPU buffer to read from.
2931 */ 2974 */
2932unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) 2975u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
2933{ 2976{
2934 unsigned long flags; 2977 unsigned long flags;
2935 struct ring_buffer_per_cpu *cpu_buffer; 2978 struct ring_buffer_per_cpu *cpu_buffer;
2936 struct buffer_page *bpage; 2979 struct buffer_page *bpage;
2937 unsigned long ret; 2980 u64 ret = 0;
2938 2981
2939 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2982 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2940 return 0; 2983 return 0;
@@ -2949,7 +2992,8 @@ unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
2949 bpage = cpu_buffer->reader_page; 2992 bpage = cpu_buffer->reader_page;
2950 else 2993 else
2951 bpage = rb_set_head_page(cpu_buffer); 2994 bpage = rb_set_head_page(cpu_buffer);
2952 ret = bpage->page->time_stamp; 2995 if (bpage)
2996 ret = bpage->page->time_stamp;
2953 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2997 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2954 2998
2955 return ret; 2999 return ret;
@@ -2995,7 +3039,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2995EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 3039EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2996 3040
2997/** 3041/**
2998 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer 3042 * ring_buffer_overrun_cpu - get the number of overruns caused by the ring
3043 * buffer wrapping around (only if RB_FL_OVERWRITE is on).
2999 * @buffer: The ring buffer 3044 * @buffer: The ring buffer
3000 * @cpu: The per CPU buffer to get the number of overruns from 3045 * @cpu: The per CPU buffer to get the number of overruns from
3001 */ 3046 */
@@ -3015,7 +3060,9 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
3015EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 3060EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3016 3061
3017/** 3062/**
3018 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits 3063 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by
3064 * commits failing due to the buffer wrapping around while there are uncommitted
3065 * events, such as during an interrupt storm.
3019 * @buffer: The ring buffer 3066 * @buffer: The ring buffer
3020 * @cpu: The per CPU buffer to get the number of overruns from 3067 * @cpu: The per CPU buffer to get the number of overruns from
3021 */ 3068 */
@@ -3036,6 +3083,46 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3036EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); 3083EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3037 3084
3038/** 3085/**
3086 * ring_buffer_dropped_events_cpu - get the number of dropped events caused by
3087 * the ring buffer filling up (only if RB_FL_OVERWRITE is off).
3088 * @buffer: The ring buffer
3089 * @cpu: The per CPU buffer to get the number of overruns from
3090 */
3091unsigned long
3092ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3093{
3094 struct ring_buffer_per_cpu *cpu_buffer;
3095 unsigned long ret;
3096
3097 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3098 return 0;
3099
3100 cpu_buffer = buffer->buffers[cpu];
3101 ret = local_read(&cpu_buffer->dropped_events);
3102
3103 return ret;
3104}
3105EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3106
3107/**
3108 * ring_buffer_read_events_cpu - get the number of events successfully read
3109 * @buffer: The ring buffer
3110 * @cpu: The per CPU buffer to get the number of events read
3111 */
3112unsigned long
3113ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3114{
3115 struct ring_buffer_per_cpu *cpu_buffer;
3116
3117 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3118 return 0;
3119
3120 cpu_buffer = buffer->buffers[cpu];
3121 return cpu_buffer->read;
3122}
3123EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3124
3125/**
3039 * ring_buffer_entries - get the number of entries in a buffer 3126 * ring_buffer_entries - get the number of entries in a buffer
3040 * @buffer: The ring buffer 3127 * @buffer: The ring buffer
3041 * 3128 *
@@ -3260,6 +3347,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3260 * Splice the empty reader page into the list around the head. 3347 * Splice the empty reader page into the list around the head.
3261 */ 3348 */
3262 reader = rb_set_head_page(cpu_buffer); 3349 reader = rb_set_head_page(cpu_buffer);
3350 if (!reader)
3351 goto out;
3263 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); 3352 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3264 cpu_buffer->reader_page->list.prev = reader->list.prev; 3353 cpu_buffer->reader_page->list.prev = reader->list.prev;
3265 3354
@@ -3392,7 +3481,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
3392 /* check for end of page padding */ 3481 /* check for end of page padding */
3393 if ((iter->head >= rb_page_size(iter->head_page)) && 3482 if ((iter->head >= rb_page_size(iter->head_page)) &&
3394 (iter->head_page != cpu_buffer->commit_page)) 3483 (iter->head_page != cpu_buffer->commit_page))
3395 rb_advance_iter(iter); 3484 rb_inc_iter(iter);
3396} 3485}
3397 3486
3398static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) 3487static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
@@ -3778,12 +3867,17 @@ void
3778ring_buffer_read_finish(struct ring_buffer_iter *iter) 3867ring_buffer_read_finish(struct ring_buffer_iter *iter)
3779{ 3868{
3780 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3869 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3870 unsigned long flags;
3781 3871
3782 /* 3872 /*
3783 * Ring buffer is disabled from recording, here's a good place 3873 * Ring buffer is disabled from recording, here's a good place
3784 * to check the integrity of the ring buffer. 3874 * to check the integrity of the ring buffer.
3875 * Must prevent readers from trying to read, as the check
3876 * clears the HEAD page and readers require it.
3785 */ 3877 */
3878 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3786 rb_check_pages(cpu_buffer); 3879 rb_check_pages(cpu_buffer);
3880 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3787 3881
3788 atomic_dec(&cpu_buffer->record_disabled); 3882 atomic_dec(&cpu_buffer->record_disabled);
3789 atomic_dec(&cpu_buffer->buffer->resize_disabled); 3883 atomic_dec(&cpu_buffer->buffer->resize_disabled);
@@ -3864,9 +3958,10 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3864 local_set(&cpu_buffer->reader_page->page->commit, 0); 3958 local_set(&cpu_buffer->reader_page->page->commit, 0);
3865 cpu_buffer->reader_page->read = 0; 3959 cpu_buffer->reader_page->read = 0;
3866 3960
3867 local_set(&cpu_buffer->commit_overrun, 0);
3868 local_set(&cpu_buffer->entries_bytes, 0); 3961 local_set(&cpu_buffer->entries_bytes, 0);
3869 local_set(&cpu_buffer->overrun, 0); 3962 local_set(&cpu_buffer->overrun, 0);
3963 local_set(&cpu_buffer->commit_overrun, 0);
3964 local_set(&cpu_buffer->dropped_events, 0);
3870 local_set(&cpu_buffer->entries, 0); 3965 local_set(&cpu_buffer->entries, 0);
3871 local_set(&cpu_buffer->committing, 0); 3966 local_set(&cpu_buffer->committing, 0);
3872 local_set(&cpu_buffer->commits, 0); 3967 local_set(&cpu_buffer->commits, 0);