aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c219
1 files changed, 180 insertions, 39 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8c1b2d290718..7f6059c5aa94 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
207#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
208 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
211 221
@@ -309,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
309#define TS_MASK ((1ULL << TS_SHIFT) - 1) 319#define TS_MASK ((1ULL << TS_SHIFT) - 1)
310#define TS_DELTA_TEST (~TS_MASK) 320#define TS_DELTA_TEST (~TS_MASK)
311 321
322/* Flag when events were overwritten */
323#define RB_MISSED_EVENTS (1 << 31)
324/* Missed count stored at end */
325#define RB_MISSED_STORED (1 << 30)
326
312struct buffer_data_page { 327struct buffer_data_page {
313 u64 time_stamp; /* page time stamp */ 328 u64 time_stamp; /* page time stamp */
314 local_t commit; /* write committed index */ 329 local_t commit; /* write committed index */
@@ -328,6 +343,7 @@ struct buffer_page {
328 local_t write; /* index for next write */ 343 local_t write; /* index for next write */
329 unsigned read; /* index for next read */ 344 unsigned read; /* index for next read */
330 local_t entries; /* entries on this page */ 345 local_t entries; /* entries on this page */
346 unsigned long real_end; /* real end of data */
331 struct buffer_data_page *page; /* Actual data page */ 347 struct buffer_data_page *page; /* Actual data page */
332}; 348};
333 349
@@ -407,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
407 (unsigned int)sizeof(field.commit), 423 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long)); 424 (unsigned int)is_signed_type(long));
409 425
426 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
427 "offset:%u;\tsize:%u;\tsigned:%u;\n",
428 (unsigned int)offsetof(typeof(field), commit),
429 1,
430 (unsigned int)is_signed_type(long));
431
410 ret = trace_seq_printf(s, "\tfield: char data;\t" 432 ret = trace_seq_printf(s, "\tfield: char data;\t"
411 "offset:%u;\tsize:%u;\tsigned:%u;\n", 433 "offset:%u;\tsize:%u;\tsigned:%u;\n",
412 (unsigned int)offsetof(typeof(field), data), 434 (unsigned int)offsetof(typeof(field), data),
@@ -430,6 +452,8 @@ struct ring_buffer_per_cpu {
430 struct buffer_page *tail_page; /* write to tail */ 452 struct buffer_page *tail_page; /* write to tail */
431 struct buffer_page *commit_page; /* committed pages */ 453 struct buffer_page *commit_page; /* committed pages */
432 struct buffer_page *reader_page; 454 struct buffer_page *reader_page;
455 unsigned long lost_events;
456 unsigned long last_overrun;
433 local_t commit_overrun; 457 local_t commit_overrun;
434 local_t overrun; 458 local_t overrun;
435 local_t entries; 459 local_t entries;
@@ -1200,18 +1224,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1200 1224
1201 for (i = 0; i < nr_pages; i++) { 1225 for (i = 0; i < nr_pages; i++) {
1202 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1226 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1203 return; 1227 goto out;
1204 p = cpu_buffer->pages->next; 1228 p = cpu_buffer->pages->next;
1205 bpage = list_entry(p, struct buffer_page, list); 1229 bpage = list_entry(p, struct buffer_page, list);
1206 list_del_init(&bpage->list); 1230 list_del_init(&bpage->list);
1207 free_buffer_page(bpage); 1231 free_buffer_page(bpage);
1208 } 1232 }
1209 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1233 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1210 return; 1234 goto out;
1211 1235
1212 rb_reset_cpu(cpu_buffer); 1236 rb_reset_cpu(cpu_buffer);
1213 rb_check_pages(cpu_buffer); 1237 rb_check_pages(cpu_buffer);
1214 1238
1239out:
1215 spin_unlock_irq(&cpu_buffer->reader_lock); 1240 spin_unlock_irq(&cpu_buffer->reader_lock);
1216} 1241}
1217 1242
@@ -1228,7 +1253,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1228 1253
1229 for (i = 0; i < nr_pages; i++) { 1254 for (i = 0; i < nr_pages; i++) {
1230 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1255 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1231 return; 1256 goto out;
1232 p = pages->next; 1257 p = pages->next;
1233 bpage = list_entry(p, struct buffer_page, list); 1258 bpage = list_entry(p, struct buffer_page, list);
1234 list_del_init(&bpage->list); 1259 list_del_init(&bpage->list);
@@ -1237,6 +1262,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1237 rb_reset_cpu(cpu_buffer); 1262 rb_reset_cpu(cpu_buffer);
1238 rb_check_pages(cpu_buffer); 1263 rb_check_pages(cpu_buffer);
1239 1264
1265out:
1240 spin_unlock_irq(&cpu_buffer->reader_lock); 1266 spin_unlock_irq(&cpu_buffer->reader_lock);
1241} 1267}
1242 1268
@@ -1546,7 +1572,7 @@ rb_update_event(struct ring_buffer_event *event,
1546 1572
1547 case 0: 1573 case 0:
1548 length -= RB_EVNT_HDR_SIZE; 1574 length -= RB_EVNT_HDR_SIZE;
1549 if (length > RB_MAX_SMALL_DATA) 1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1550 event->array[0] = length; 1576 event->array[0] = length;
1551 else 1577 else
1552 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1721,11 +1747,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1721 if (!length) 1747 if (!length)
1722 length = 1; 1748 length = 1;
1723 1749
1724 if (length > RB_MAX_SMALL_DATA) 1750 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1725 length += sizeof(event.array[0]); 1751 length += sizeof(event.array[0]);
1726 1752
1727 length += RB_EVNT_HDR_SIZE; 1753 length += RB_EVNT_HDR_SIZE;
1728 length = ALIGN(length, RB_ALIGNMENT); 1754 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1729 1755
1730 return length; 1756 return length;
1731} 1757}
@@ -1750,6 +1776,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1750 kmemcheck_annotate_bitfield(event, bitfield); 1776 kmemcheck_annotate_bitfield(event, bitfield);
1751 1777
1752 /* 1778 /*
1779 * Save the original length to the meta data.
1780 * This will be used by the reader to add lost event
1781 * counter.
1782 */
1783 tail_page->real_end = tail;
1784
1785 /*
1753 * If this event is bigger than the minimum size, then 1786 * If this event is bigger than the minimum size, then
1754 * we need to be careful that we don't subtract the 1787 * we need to be careful that we don't subtract the
1755 * write counter enough to allow another writer to slip 1788 * write counter enough to allow another writer to slip
@@ -1967,17 +2000,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1967 u64 *ts, u64 *delta) 2000 u64 *ts, u64 *delta)
1968{ 2001{
1969 struct ring_buffer_event *event; 2002 struct ring_buffer_event *event;
1970 static int once;
1971 int ret; 2003 int ret;
1972 2004
1973 if (unlikely(*delta > (1ULL << 59) && !once++)) { 2005 WARN_ONCE(*delta > (1ULL << 59),
1974 printk(KERN_WARNING "Delta way too big! %llu" 2006 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
1975 " ts=%llu write stamp = %llu\n", 2007 (unsigned long long)*delta,
1976 (unsigned long long)*delta, 2008 (unsigned long long)*ts,
1977 (unsigned long long)*ts, 2009 (unsigned long long)cpu_buffer->write_stamp);
1978 (unsigned long long)cpu_buffer->write_stamp);
1979 WARN_ON(1);
1980 }
1981 2010
1982 /* 2011 /*
1983 * The delta is too big, we to add a 2012 * The delta is too big, we to add a
@@ -2232,12 +2261,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2232 if (ring_buffer_flags != RB_BUFFERS_ON) 2261 if (ring_buffer_flags != RB_BUFFERS_ON)
2233 return NULL; 2262 return NULL;
2234 2263
2235 if (atomic_read(&buffer->record_disabled))
2236 return NULL;
2237
2238 /* If we are tracing schedule, we don't want to recurse */ 2264 /* If we are tracing schedule, we don't want to recurse */
2239 resched = ftrace_preempt_disable(); 2265 resched = ftrace_preempt_disable();
2240 2266
2267 if (atomic_read(&buffer->record_disabled))
2268 goto out_nocheck;
2269
2241 if (trace_recursive_lock()) 2270 if (trace_recursive_lock())
2242 goto out_nocheck; 2271 goto out_nocheck;
2243 2272
@@ -2469,11 +2498,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2469 if (ring_buffer_flags != RB_BUFFERS_ON) 2498 if (ring_buffer_flags != RB_BUFFERS_ON)
2470 return -EBUSY; 2499 return -EBUSY;
2471 2500
2472 if (atomic_read(&buffer->record_disabled))
2473 return -EBUSY;
2474
2475 resched = ftrace_preempt_disable(); 2501 resched = ftrace_preempt_disable();
2476 2502
2503 if (atomic_read(&buffer->record_disabled))
2504 goto out;
2505
2477 cpu = raw_smp_processor_id(); 2506 cpu = raw_smp_processor_id();
2478 2507
2479 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2508 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2541,7 +2570,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2541 * @buffer: The ring buffer to enable writes 2570 * @buffer: The ring buffer to enable writes
2542 * 2571 *
2543 * Note, multiple disables will need the same number of enables 2572 * Note, multiple disables will need the same number of enables
2544 * to truely enable the writing (much like preempt_disable). 2573 * to truly enable the writing (much like preempt_disable).
2545 */ 2574 */
2546void ring_buffer_record_enable(struct ring_buffer *buffer) 2575void ring_buffer_record_enable(struct ring_buffer *buffer)
2547{ 2576{
@@ -2577,7 +2606,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2577 * @cpu: The CPU to enable. 2606 * @cpu: The CPU to enable.
2578 * 2607 *
2579 * Note, multiple disables will need the same number of enables 2608 * Note, multiple disables will need the same number of enables
2580 * to truely enable the writing (much like preempt_disable). 2609 * to truly enable the writing (much like preempt_disable).
2581 */ 2610 */
2582void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2611void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2583{ 2612{
@@ -2826,6 +2855,7 @@ static struct buffer_page *
2826rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 2855rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2827{ 2856{
2828 struct buffer_page *reader = NULL; 2857 struct buffer_page *reader = NULL;
2858 unsigned long overwrite;
2829 unsigned long flags; 2859 unsigned long flags;
2830 int nr_loops = 0; 2860 int nr_loops = 0;
2831 int ret; 2861 int ret;
@@ -2867,6 +2897,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2867 local_set(&cpu_buffer->reader_page->write, 0); 2897 local_set(&cpu_buffer->reader_page->write, 0);
2868 local_set(&cpu_buffer->reader_page->entries, 0); 2898 local_set(&cpu_buffer->reader_page->entries, 0);
2869 local_set(&cpu_buffer->reader_page->page->commit, 0); 2899 local_set(&cpu_buffer->reader_page->page->commit, 0);
2900 cpu_buffer->reader_page->real_end = 0;
2870 2901
2871 spin: 2902 spin:
2872 /* 2903 /*
@@ -2887,6 +2918,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2887 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); 2918 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2888 2919
2889 /* 2920 /*
2921 * We want to make sure we read the overruns after we set up our
2922 * pointers to the next object. The writer side does a
2923 * cmpxchg to cross pages which acts as the mb on the writer
2924 * side. Note, the reader will constantly fail the swap
2925 * while the writer is updating the pointers, so this
2926 * guarantees that the overwrite recorded here is the one we
2927 * want to compare with the last_overrun.
2928 */
2929 smp_mb();
2930 overwrite = local_read(&(cpu_buffer->overrun));
2931
2932 /*
2890 * Here's the tricky part. 2933 * Here's the tricky part.
2891 * 2934 *
2892 * We need to move the pointer past the header page. 2935 * We need to move the pointer past the header page.
@@ -2917,6 +2960,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2917 cpu_buffer->reader_page = reader; 2960 cpu_buffer->reader_page = reader;
2918 rb_reset_reader_page(cpu_buffer); 2961 rb_reset_reader_page(cpu_buffer);
2919 2962
2963 if (overwrite != cpu_buffer->last_overrun) {
2964 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
2965 cpu_buffer->last_overrun = overwrite;
2966 }
2967
2920 goto again; 2968 goto again;
2921 2969
2922 out: 2970 out:
@@ -2993,8 +3041,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2993 rb_advance_iter(iter); 3041 rb_advance_iter(iter);
2994} 3042}
2995 3043
3044static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3045{
3046 return cpu_buffer->lost_events;
3047}
3048
2996static struct ring_buffer_event * 3049static struct ring_buffer_event *
2997rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) 3050rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3051 unsigned long *lost_events)
2998{ 3052{
2999 struct ring_buffer_event *event; 3053 struct ring_buffer_event *event;
3000 struct buffer_page *reader; 3054 struct buffer_page *reader;
@@ -3046,6 +3100,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
3046 ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 3100 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3047 cpu_buffer->cpu, ts); 3101 cpu_buffer->cpu, ts);
3048 } 3102 }
3103 if (lost_events)
3104 *lost_events = rb_lost_events(cpu_buffer);
3049 return event; 3105 return event;
3050 3106
3051 default: 3107 default:
@@ -3156,12 +3212,14 @@ static inline int rb_ok_to_lock(void)
3156 * @buffer: The ring buffer to read 3212 * @buffer: The ring buffer to read
3157 * @cpu: The cpu to peak at 3213 * @cpu: The cpu to peak at
3158 * @ts: The timestamp counter of this event. 3214 * @ts: The timestamp counter of this event.
3215 * @lost_events: a variable to store if events were lost (may be NULL)
3159 * 3216 *
3160 * This will return the event that will be read next, but does 3217 * This will return the event that will be read next, but does
3161 * not consume the data. 3218 * not consume the data.
3162 */ 3219 */
3163struct ring_buffer_event * 3220struct ring_buffer_event *
3164ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3221ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3222 unsigned long *lost_events)
3165{ 3223{
3166 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3224 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3167 struct ring_buffer_event *event; 3225 struct ring_buffer_event *event;
@@ -3176,7 +3234,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
3176 local_irq_save(flags); 3234 local_irq_save(flags);
3177 if (dolock) 3235 if (dolock)
3178 spin_lock(&cpu_buffer->reader_lock); 3236 spin_lock(&cpu_buffer->reader_lock);
3179 event = rb_buffer_peek(cpu_buffer, ts); 3237 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3180 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3238 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3181 rb_advance_reader(cpu_buffer); 3239 rb_advance_reader(cpu_buffer);
3182 if (dolock) 3240 if (dolock)
@@ -3218,13 +3276,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3218/** 3276/**
3219 * ring_buffer_consume - return an event and consume it 3277 * ring_buffer_consume - return an event and consume it
3220 * @buffer: The ring buffer to get the next event from 3278 * @buffer: The ring buffer to get the next event from
3279 * @cpu: the cpu to read the buffer from
3280 * @ts: a variable to store the timestamp (may be NULL)
3281 * @lost_events: a variable to store if events were lost (may be NULL)
3221 * 3282 *
3222 * Returns the next event in the ring buffer, and that event is consumed. 3283 * Returns the next event in the ring buffer, and that event is consumed.
3223 * Meaning, that sequential reads will keep returning a different event, 3284 * Meaning, that sequential reads will keep returning a different event,
3224 * and eventually empty the ring buffer if the producer is slower. 3285 * and eventually empty the ring buffer if the producer is slower.
3225 */ 3286 */
3226struct ring_buffer_event * 3287struct ring_buffer_event *
3227ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 3288ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3289 unsigned long *lost_events)
3228{ 3290{
3229 struct ring_buffer_per_cpu *cpu_buffer; 3291 struct ring_buffer_per_cpu *cpu_buffer;
3230 struct ring_buffer_event *event = NULL; 3292 struct ring_buffer_event *event = NULL;
@@ -3245,9 +3307,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3245 if (dolock) 3307 if (dolock)
3246 spin_lock(&cpu_buffer->reader_lock); 3308 spin_lock(&cpu_buffer->reader_lock);
3247 3309
3248 event = rb_buffer_peek(cpu_buffer, ts); 3310 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3249 if (event) 3311 if (event) {
3312 cpu_buffer->lost_events = 0;
3250 rb_advance_reader(cpu_buffer); 3313 rb_advance_reader(cpu_buffer);
3314 }
3251 3315
3252 if (dolock) 3316 if (dolock)
3253 spin_unlock(&cpu_buffer->reader_lock); 3317 spin_unlock(&cpu_buffer->reader_lock);
@@ -3264,23 +3328,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3264EXPORT_SYMBOL_GPL(ring_buffer_consume); 3328EXPORT_SYMBOL_GPL(ring_buffer_consume);
3265 3329
3266/** 3330/**
3267 * ring_buffer_read_start - start a non consuming read of the buffer 3331 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
3268 * @buffer: The ring buffer to read from 3332 * @buffer: The ring buffer to read from
3269 * @cpu: The cpu buffer to iterate over 3333 * @cpu: The cpu buffer to iterate over
3270 * 3334 *
3271 * This starts up an iteration through the buffer. It also disables 3335 * This performs the initial preparations necessary to iterate
3272 * the recording to the buffer until the reading is finished. 3336 * through the buffer. Memory is allocated, buffer recording
3273 * This prevents the reading from being corrupted. This is not 3337 * is disabled, and the iterator pointer is returned to the caller.
3274 * a consuming read, so a producer is not expected.
3275 * 3338 *
3276 * Must be paired with ring_buffer_finish. 3339 * Disabling buffer recordng prevents the reading from being
3340 * corrupted. This is not a consuming read, so a producer is not
3341 * expected.
3342 *
3343 * After a sequence of ring_buffer_read_prepare calls, the user is
3344 * expected to make at least one call to ring_buffer_prepare_sync.
3345 * Afterwards, ring_buffer_read_start is invoked to get things going
3346 * for real.
3347 *
3348 * This overall must be paired with ring_buffer_finish.
3277 */ 3349 */
3278struct ring_buffer_iter * 3350struct ring_buffer_iter *
3279ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 3351ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3280{ 3352{
3281 struct ring_buffer_per_cpu *cpu_buffer; 3353 struct ring_buffer_per_cpu *cpu_buffer;
3282 struct ring_buffer_iter *iter; 3354 struct ring_buffer_iter *iter;
3283 unsigned long flags;
3284 3355
3285 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3356 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3286 return NULL; 3357 return NULL;
@@ -3294,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3294 iter->cpu_buffer = cpu_buffer; 3365 iter->cpu_buffer = cpu_buffer;
3295 3366
3296 atomic_inc(&cpu_buffer->record_disabled); 3367 atomic_inc(&cpu_buffer->record_disabled);
3368
3369 return iter;
3370}
3371EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3372
3373/**
3374 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
3375 *
3376 * All previously invoked ring_buffer_read_prepare calls to prepare
3377 * iterators will be synchronized. Afterwards, read_buffer_read_start
3378 * calls on those iterators are allowed.
3379 */
3380void
3381ring_buffer_read_prepare_sync(void)
3382{
3297 synchronize_sched(); 3383 synchronize_sched();
3384}
3385EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3386
3387/**
3388 * ring_buffer_read_start - start a non consuming read of the buffer
3389 * @iter: The iterator returned by ring_buffer_read_prepare
3390 *
3391 * This finalizes the startup of an iteration through the buffer.
3392 * The iterator comes from a call to ring_buffer_read_prepare and
3393 * an intervening ring_buffer_read_prepare_sync must have been
3394 * performed.
3395 *
3396 * Must be paired with ring_buffer_finish.
3397 */
3398void
3399ring_buffer_read_start(struct ring_buffer_iter *iter)
3400{
3401 struct ring_buffer_per_cpu *cpu_buffer;
3402 unsigned long flags;
3403
3404 if (!iter)
3405 return;
3406
3407 cpu_buffer = iter->cpu_buffer;
3298 3408
3299 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3409 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3300 arch_spin_lock(&cpu_buffer->lock); 3410 arch_spin_lock(&cpu_buffer->lock);
3301 rb_iter_reset(iter); 3411 rb_iter_reset(iter);
3302 arch_spin_unlock(&cpu_buffer->lock); 3412 arch_spin_unlock(&cpu_buffer->lock);
3303 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3413 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3304
3305 return iter;
3306} 3414}
3307EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3415EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3308 3416
@@ -3396,6 +3504,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3396 cpu_buffer->write_stamp = 0; 3504 cpu_buffer->write_stamp = 0;
3397 cpu_buffer->read_stamp = 0; 3505 cpu_buffer->read_stamp = 0;
3398 3506
3507 cpu_buffer->lost_events = 0;
3508 cpu_buffer->last_overrun = 0;
3509
3399 rb_head_page_activate(cpu_buffer); 3510 rb_head_page_activate(cpu_buffer);
3400} 3511}
3401 3512
@@ -3671,6 +3782,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3671 struct ring_buffer_event *event; 3782 struct ring_buffer_event *event;
3672 struct buffer_data_page *bpage; 3783 struct buffer_data_page *bpage;
3673 struct buffer_page *reader; 3784 struct buffer_page *reader;
3785 unsigned long missed_events;
3674 unsigned long flags; 3786 unsigned long flags;
3675 unsigned int commit; 3787 unsigned int commit;
3676 unsigned int read; 3788 unsigned int read;
@@ -3707,6 +3819,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3707 read = reader->read; 3819 read = reader->read;
3708 commit = rb_page_commit(reader); 3820 commit = rb_page_commit(reader);
3709 3821
3822 /* Check if any events were dropped */
3823 missed_events = cpu_buffer->lost_events;
3824
3710 /* 3825 /*
3711 * If this page has been partially read or 3826 * If this page has been partially read or
3712 * if len is not big enough to read the rest of the page or 3827 * if len is not big enough to read the rest of the page or
@@ -3767,9 +3882,35 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3767 local_set(&reader->entries, 0); 3882 local_set(&reader->entries, 0);
3768 reader->read = 0; 3883 reader->read = 0;
3769 *data_page = bpage; 3884 *data_page = bpage;
3885
3886 /*
3887 * Use the real_end for the data size,
3888 * This gives us a chance to store the lost events
3889 * on the page.
3890 */
3891 if (reader->real_end)
3892 local_set(&bpage->commit, reader->real_end);
3770 } 3893 }
3771 ret = read; 3894 ret = read;
3772 3895
3896 cpu_buffer->lost_events = 0;
3897 /*
3898 * Set a flag in the commit field if we lost events
3899 */
3900 if (missed_events) {
3901 commit = local_read(&bpage->commit);
3902
3903 /* If there is room at the end of the page to save the
3904 * missed events, then record it there.
3905 */
3906 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
3907 memcpy(&bpage->data[commit], &missed_events,
3908 sizeof(missed_events));
3909 local_add(RB_MISSED_STORED, &bpage->commit);
3910 }
3911 local_add(RB_MISSED_EVENTS, &bpage->commit);
3912 }
3913
3773 out_unlock: 3914 out_unlock:
3774 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3915 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3775 3916