aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2010-08-04 07:59:13 -0400
committerMichal Marek <mmarek@suse.cz>2010-08-04 07:59:13 -0400
commit772320e84588dcbe1600ffb83e5f328f2209ac2a (patch)
treea7de21b79340aeaa17c58126f6b801b82c77b53a /kernel/trace/ring_buffer.c
parent1ce53adf13a54375d2a5c7cdbe341b2558389615 (diff)
parent9fe6206f400646a2322096b56c59891d530e8d51 (diff)
Merge commit 'v2.6.35' into kbuild/kbuild
Conflicts: arch/powerpc/Makefile
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c258
1 files changed, 216 insertions, 42 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index edefe3b2801b..1da7b6ea8b85 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
207#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
208 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
211 221
@@ -309,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
309#define TS_MASK ((1ULL << TS_SHIFT) - 1) 319#define TS_MASK ((1ULL << TS_SHIFT) - 1)
310#define TS_DELTA_TEST (~TS_MASK) 320#define TS_DELTA_TEST (~TS_MASK)
311 321
322/* Flag when events were overwritten */
323#define RB_MISSED_EVENTS (1 << 31)
324/* Missed count stored at end */
325#define RB_MISSED_STORED (1 << 30)
326
312struct buffer_data_page { 327struct buffer_data_page {
313 u64 time_stamp; /* page time stamp */ 328 u64 time_stamp; /* page time stamp */
314 local_t commit; /* write committed index */ 329 local_t commit; /* write committed index */
@@ -328,6 +343,7 @@ struct buffer_page {
328 local_t write; /* index for next write */ 343 local_t write; /* index for next write */
329 unsigned read; /* index for next read */ 344 unsigned read; /* index for next read */
330 local_t entries; /* entries on this page */ 345 local_t entries; /* entries on this page */
346 unsigned long real_end; /* real end of data */
331 struct buffer_data_page *page; /* Actual data page */ 347 struct buffer_data_page *page; /* Actual data page */
332}; 348};
333 349
@@ -407,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
407 (unsigned int)sizeof(field.commit), 423 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long)); 424 (unsigned int)is_signed_type(long));
409 425
426 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
427 "offset:%u;\tsize:%u;\tsigned:%u;\n",
428 (unsigned int)offsetof(typeof(field), commit),
429 1,
430 (unsigned int)is_signed_type(long));
431
410 ret = trace_seq_printf(s, "\tfield: char data;\t" 432 ret = trace_seq_printf(s, "\tfield: char data;\t"
411 "offset:%u;\tsize:%u;\tsigned:%u;\n", 433 "offset:%u;\tsize:%u;\tsigned:%u;\n",
412 (unsigned int)offsetof(typeof(field), data), 434 (unsigned int)offsetof(typeof(field), data),
@@ -430,6 +452,8 @@ struct ring_buffer_per_cpu {
430 struct buffer_page *tail_page; /* write to tail */ 452 struct buffer_page *tail_page; /* write to tail */
431 struct buffer_page *commit_page; /* committed pages */ 453 struct buffer_page *commit_page; /* committed pages */
432 struct buffer_page *reader_page; 454 struct buffer_page *reader_page;
455 unsigned long lost_events;
456 unsigned long last_overrun;
433 local_t commit_overrun; 457 local_t commit_overrun;
434 local_t overrun; 458 local_t overrun;
435 local_t entries; 459 local_t entries;
@@ -464,6 +488,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 488 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 489 unsigned long head;
466 struct buffer_page *head_page; 490 struct buffer_page *head_page;
491 struct buffer_page *cache_reader_page;
492 unsigned long cache_read;
467 u64 read_stamp; 493 u64 read_stamp;
468}; 494};
469 495
@@ -1198,18 +1224,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1198 1224
1199 for (i = 0; i < nr_pages; i++) { 1225 for (i = 0; i < nr_pages; i++) {
1200 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1226 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1201 return; 1227 goto out;
1202 p = cpu_buffer->pages->next; 1228 p = cpu_buffer->pages->next;
1203 bpage = list_entry(p, struct buffer_page, list); 1229 bpage = list_entry(p, struct buffer_page, list);
1204 list_del_init(&bpage->list); 1230 list_del_init(&bpage->list);
1205 free_buffer_page(bpage); 1231 free_buffer_page(bpage);
1206 } 1232 }
1207 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1233 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1208 return; 1234 goto out;
1209 1235
1210 rb_reset_cpu(cpu_buffer); 1236 rb_reset_cpu(cpu_buffer);
1211 rb_check_pages(cpu_buffer); 1237 rb_check_pages(cpu_buffer);
1212 1238
1239out:
1213 spin_unlock_irq(&cpu_buffer->reader_lock); 1240 spin_unlock_irq(&cpu_buffer->reader_lock);
1214} 1241}
1215 1242
@@ -1226,7 +1253,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1226 1253
1227 for (i = 0; i < nr_pages; i++) { 1254 for (i = 0; i < nr_pages; i++) {
1228 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1255 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1229 return; 1256 goto out;
1230 p = pages->next; 1257 p = pages->next;
1231 bpage = list_entry(p, struct buffer_page, list); 1258 bpage = list_entry(p, struct buffer_page, list);
1232 list_del_init(&bpage->list); 1259 list_del_init(&bpage->list);
@@ -1235,6 +1262,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1235 rb_reset_cpu(cpu_buffer); 1262 rb_reset_cpu(cpu_buffer);
1236 rb_check_pages(cpu_buffer); 1263 rb_check_pages(cpu_buffer);
1237 1264
1265out:
1238 spin_unlock_irq(&cpu_buffer->reader_lock); 1266 spin_unlock_irq(&cpu_buffer->reader_lock);
1239} 1267}
1240 1268
@@ -1544,7 +1572,7 @@ rb_update_event(struct ring_buffer_event *event,
1544 1572
1545 case 0: 1573 case 0:
1546 length -= RB_EVNT_HDR_SIZE; 1574 length -= RB_EVNT_HDR_SIZE;
1547 if (length > RB_MAX_SMALL_DATA) 1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1548 event->array[0] = length; 1576 event->array[0] = length;
1549 else 1577 else
1550 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1719,11 +1747,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1719 if (!length) 1747 if (!length)
1720 length = 1; 1748 length = 1;
1721 1749
1722 if (length > RB_MAX_SMALL_DATA) 1750 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1723 length += sizeof(event.array[0]); 1751 length += sizeof(event.array[0]);
1724 1752
1725 length += RB_EVNT_HDR_SIZE; 1753 length += RB_EVNT_HDR_SIZE;
1726 length = ALIGN(length, RB_ALIGNMENT); 1754 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1727 1755
1728 return length; 1756 return length;
1729} 1757}
@@ -1740,6 +1768,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1740 * must fill the old tail_page with padding. 1768 * must fill the old tail_page with padding.
1741 */ 1769 */
1742 if (tail >= BUF_PAGE_SIZE) { 1770 if (tail >= BUF_PAGE_SIZE) {
1771 /*
1772 * If the page was filled, then we still need
1773 * to update the real_end. Reset it to zero
1774 * and the reader will ignore it.
1775 */
1776 if (tail == BUF_PAGE_SIZE)
1777 tail_page->real_end = 0;
1778
1743 local_sub(length, &tail_page->write); 1779 local_sub(length, &tail_page->write);
1744 return; 1780 return;
1745 } 1781 }
@@ -1748,6 +1784,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1748 kmemcheck_annotate_bitfield(event, bitfield); 1784 kmemcheck_annotate_bitfield(event, bitfield);
1749 1785
1750 /* 1786 /*
1787 * Save the original length to the meta data.
1788 * This will be used by the reader to add lost event
1789 * counter.
1790 */
1791 tail_page->real_end = tail;
1792
1793 /*
1751 * If this event is bigger than the minimum size, then 1794 * If this event is bigger than the minimum size, then
1752 * we need to be careful that we don't subtract the 1795 * we need to be careful that we don't subtract the
1753 * write counter enough to allow another writer to slip 1796 * write counter enough to allow another writer to slip
@@ -1965,17 +2008,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1965 u64 *ts, u64 *delta) 2008 u64 *ts, u64 *delta)
1966{ 2009{
1967 struct ring_buffer_event *event; 2010 struct ring_buffer_event *event;
1968 static int once;
1969 int ret; 2011 int ret;
1970 2012
1971 if (unlikely(*delta > (1ULL << 59) && !once++)) { 2013 WARN_ONCE(*delta > (1ULL << 59),
1972 printk(KERN_WARNING "Delta way too big! %llu" 2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
1973 " ts=%llu write stamp = %llu\n", 2015 (unsigned long long)*delta,
1974 (unsigned long long)*delta, 2016 (unsigned long long)*ts,
1975 (unsigned long long)*ts, 2017 (unsigned long long)cpu_buffer->write_stamp);
1976 (unsigned long long)cpu_buffer->write_stamp);
1977 WARN_ON(1);
1978 }
1979 2018
1980 /* 2019 /*
1981 * The delta is too big, we to add a 2020 * The delta is too big, we to add a
@@ -2230,12 +2269,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2230 if (ring_buffer_flags != RB_BUFFERS_ON) 2269 if (ring_buffer_flags != RB_BUFFERS_ON)
2231 return NULL; 2270 return NULL;
2232 2271
2233 if (atomic_read(&buffer->record_disabled))
2234 return NULL;
2235
2236 /* If we are tracing schedule, we don't want to recurse */ 2272 /* If we are tracing schedule, we don't want to recurse */
2237 resched = ftrace_preempt_disable(); 2273 resched = ftrace_preempt_disable();
2238 2274
2275 if (atomic_read(&buffer->record_disabled))
2276 goto out_nocheck;
2277
2239 if (trace_recursive_lock()) 2278 if (trace_recursive_lock())
2240 goto out_nocheck; 2279 goto out_nocheck;
2241 2280
@@ -2467,11 +2506,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2467 if (ring_buffer_flags != RB_BUFFERS_ON) 2506 if (ring_buffer_flags != RB_BUFFERS_ON)
2468 return -EBUSY; 2507 return -EBUSY;
2469 2508
2470 if (atomic_read(&buffer->record_disabled))
2471 return -EBUSY;
2472
2473 resched = ftrace_preempt_disable(); 2509 resched = ftrace_preempt_disable();
2474 2510
2511 if (atomic_read(&buffer->record_disabled))
2512 goto out;
2513
2475 cpu = raw_smp_processor_id(); 2514 cpu = raw_smp_processor_id();
2476 2515
2477 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2516 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2539,7 +2578,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2539 * @buffer: The ring buffer to enable writes 2578 * @buffer: The ring buffer to enable writes
2540 * 2579 *
2541 * Note, multiple disables will need the same number of enables 2580 * Note, multiple disables will need the same number of enables
2542 * to truely enable the writing (much like preempt_disable). 2581 * to truly enable the writing (much like preempt_disable).
2543 */ 2582 */
2544void ring_buffer_record_enable(struct ring_buffer *buffer) 2583void ring_buffer_record_enable(struct ring_buffer *buffer)
2545{ 2584{
@@ -2575,7 +2614,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2575 * @cpu: The CPU to enable. 2614 * @cpu: The CPU to enable.
2576 * 2615 *
2577 * Note, multiple disables will need the same number of enables 2616 * Note, multiple disables will need the same number of enables
2578 * to truely enable the writing (much like preempt_disable). 2617 * to truly enable the writing (much like preempt_disable).
2579 */ 2618 */
2580void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2619void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2581{ 2620{
@@ -2716,6 +2755,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2716 iter->read_stamp = cpu_buffer->read_stamp; 2755 iter->read_stamp = cpu_buffer->read_stamp;
2717 else 2756 else
2718 iter->read_stamp = iter->head_page->page->time_stamp; 2757 iter->read_stamp = iter->head_page->page->time_stamp;
2758 iter->cache_reader_page = cpu_buffer->reader_page;
2759 iter->cache_read = cpu_buffer->read;
2719} 2760}
2720 2761
2721/** 2762/**
@@ -2822,6 +2863,7 @@ static struct buffer_page *
2822rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 2863rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2823{ 2864{
2824 struct buffer_page *reader = NULL; 2865 struct buffer_page *reader = NULL;
2866 unsigned long overwrite;
2825 unsigned long flags; 2867 unsigned long flags;
2826 int nr_loops = 0; 2868 int nr_loops = 0;
2827 int ret; 2869 int ret;
@@ -2863,6 +2905,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2863 local_set(&cpu_buffer->reader_page->write, 0); 2905 local_set(&cpu_buffer->reader_page->write, 0);
2864 local_set(&cpu_buffer->reader_page->entries, 0); 2906 local_set(&cpu_buffer->reader_page->entries, 0);
2865 local_set(&cpu_buffer->reader_page->page->commit, 0); 2907 local_set(&cpu_buffer->reader_page->page->commit, 0);
2908 cpu_buffer->reader_page->real_end = 0;
2866 2909
2867 spin: 2910 spin:
2868 /* 2911 /*
@@ -2883,6 +2926,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2883 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); 2926 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2884 2927
2885 /* 2928 /*
2929 * We want to make sure we read the overruns after we set up our
2930 * pointers to the next object. The writer side does a
2931 * cmpxchg to cross pages which acts as the mb on the writer
2932 * side. Note, the reader will constantly fail the swap
2933 * while the writer is updating the pointers, so this
2934 * guarantees that the overwrite recorded here is the one we
2935 * want to compare with the last_overrun.
2936 */
2937 smp_mb();
2938 overwrite = local_read(&(cpu_buffer->overrun));
2939
2940 /*
2886 * Here's the tricky part. 2941 * Here's the tricky part.
2887 * 2942 *
2888 * We need to move the pointer past the header page. 2943 * We need to move the pointer past the header page.
@@ -2913,6 +2968,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2913 cpu_buffer->reader_page = reader; 2968 cpu_buffer->reader_page = reader;
2914 rb_reset_reader_page(cpu_buffer); 2969 rb_reset_reader_page(cpu_buffer);
2915 2970
2971 if (overwrite != cpu_buffer->last_overrun) {
2972 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
2973 cpu_buffer->last_overrun = overwrite;
2974 }
2975
2916 goto again; 2976 goto again;
2917 2977
2918 out: 2978 out:
@@ -2989,8 +3049,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2989 rb_advance_iter(iter); 3049 rb_advance_iter(iter);
2990} 3050}
2991 3051
3052static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3053{
3054 return cpu_buffer->lost_events;
3055}
3056
2992static struct ring_buffer_event * 3057static struct ring_buffer_event *
2993rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) 3058rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3059 unsigned long *lost_events)
2994{ 3060{
2995 struct ring_buffer_event *event; 3061 struct ring_buffer_event *event;
2996 struct buffer_page *reader; 3062 struct buffer_page *reader;
@@ -3042,6 +3108,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
3042 ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 3108 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3043 cpu_buffer->cpu, ts); 3109 cpu_buffer->cpu, ts);
3044 } 3110 }
3111 if (lost_events)
3112 *lost_events = rb_lost_events(cpu_buffer);
3045 return event; 3113 return event;
3046 3114
3047 default: 3115 default:
@@ -3060,13 +3128,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3060 struct ring_buffer_event *event; 3128 struct ring_buffer_event *event;
3061 int nr_loops = 0; 3129 int nr_loops = 0;
3062 3130
3063 if (ring_buffer_iter_empty(iter))
3064 return NULL;
3065
3066 cpu_buffer = iter->cpu_buffer; 3131 cpu_buffer = iter->cpu_buffer;
3067 buffer = cpu_buffer->buffer; 3132 buffer = cpu_buffer->buffer;
3068 3133
3134 /*
3135 * Check if someone performed a consuming read to
3136 * the buffer. A consuming read invalidates the iterator
3137 * and we need to reset the iterator in this case.
3138 */
3139 if (unlikely(iter->cache_read != cpu_buffer->read ||
3140 iter->cache_reader_page != cpu_buffer->reader_page))
3141 rb_iter_reset(iter);
3142
3069 again: 3143 again:
3144 if (ring_buffer_iter_empty(iter))
3145 return NULL;
3146
3070 /* 3147 /*
3071 * We repeat when a timestamp is encountered. 3148 * We repeat when a timestamp is encountered.
3072 * We can get multiple timestamps by nested interrupts or also 3149 * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3158,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3081 if (rb_per_cpu_empty(cpu_buffer)) 3158 if (rb_per_cpu_empty(cpu_buffer))
3082 return NULL; 3159 return NULL;
3083 3160
3161 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3162 rb_inc_iter(iter);
3163 goto again;
3164 }
3165
3084 event = rb_iter_head_event(iter); 3166 event = rb_iter_head_event(iter);
3085 3167
3086 switch (event->type_len) { 3168 switch (event->type_len) {
@@ -3138,12 +3220,14 @@ static inline int rb_ok_to_lock(void)
3138 * @buffer: The ring buffer to read 3220 * @buffer: The ring buffer to read
3139 * @cpu: The cpu to peak at 3221 * @cpu: The cpu to peak at
3140 * @ts: The timestamp counter of this event. 3222 * @ts: The timestamp counter of this event.
3223 * @lost_events: a variable to store if events were lost (may be NULL)
3141 * 3224 *
3142 * This will return the event that will be read next, but does 3225 * This will return the event that will be read next, but does
3143 * not consume the data. 3226 * not consume the data.
3144 */ 3227 */
3145struct ring_buffer_event * 3228struct ring_buffer_event *
3146ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3229ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3230 unsigned long *lost_events)
3147{ 3231{
3148 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3232 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3149 struct ring_buffer_event *event; 3233 struct ring_buffer_event *event;
@@ -3158,7 +3242,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
3158 local_irq_save(flags); 3242 local_irq_save(flags);
3159 if (dolock) 3243 if (dolock)
3160 spin_lock(&cpu_buffer->reader_lock); 3244 spin_lock(&cpu_buffer->reader_lock);
3161 event = rb_buffer_peek(cpu_buffer, ts); 3245 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3162 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3246 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3163 rb_advance_reader(cpu_buffer); 3247 rb_advance_reader(cpu_buffer);
3164 if (dolock) 3248 if (dolock)
@@ -3200,13 +3284,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3200/** 3284/**
3201 * ring_buffer_consume - return an event and consume it 3285 * ring_buffer_consume - return an event and consume it
3202 * @buffer: The ring buffer to get the next event from 3286 * @buffer: The ring buffer to get the next event from
3287 * @cpu: the cpu to read the buffer from
3288 * @ts: a variable to store the timestamp (may be NULL)
3289 * @lost_events: a variable to store if events were lost (may be NULL)
3203 * 3290 *
3204 * Returns the next event in the ring buffer, and that event is consumed. 3291 * Returns the next event in the ring buffer, and that event is consumed.
3205 * Meaning, that sequential reads will keep returning a different event, 3292 * Meaning, that sequential reads will keep returning a different event,
3206 * and eventually empty the ring buffer if the producer is slower. 3293 * and eventually empty the ring buffer if the producer is slower.
3207 */ 3294 */
3208struct ring_buffer_event * 3295struct ring_buffer_event *
3209ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 3296ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3297 unsigned long *lost_events)
3210{ 3298{
3211 struct ring_buffer_per_cpu *cpu_buffer; 3299 struct ring_buffer_per_cpu *cpu_buffer;
3212 struct ring_buffer_event *event = NULL; 3300 struct ring_buffer_event *event = NULL;
@@ -3227,9 +3315,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3227 if (dolock) 3315 if (dolock)
3228 spin_lock(&cpu_buffer->reader_lock); 3316 spin_lock(&cpu_buffer->reader_lock);
3229 3317
3230 event = rb_buffer_peek(cpu_buffer, ts); 3318 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3231 if (event) 3319 if (event) {
3320 cpu_buffer->lost_events = 0;
3232 rb_advance_reader(cpu_buffer); 3321 rb_advance_reader(cpu_buffer);
3322 }
3233 3323
3234 if (dolock) 3324 if (dolock)
3235 spin_unlock(&cpu_buffer->reader_lock); 3325 spin_unlock(&cpu_buffer->reader_lock);
@@ -3246,23 +3336,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3246EXPORT_SYMBOL_GPL(ring_buffer_consume); 3336EXPORT_SYMBOL_GPL(ring_buffer_consume);
3247 3337
3248/** 3338/**
3249 * ring_buffer_read_start - start a non consuming read of the buffer 3339 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
3250 * @buffer: The ring buffer to read from 3340 * @buffer: The ring buffer to read from
3251 * @cpu: The cpu buffer to iterate over 3341 * @cpu: The cpu buffer to iterate over
3252 * 3342 *
3253 * This starts up an iteration through the buffer. It also disables 3343 * This performs the initial preparations necessary to iterate
3254 * the recording to the buffer until the reading is finished. 3344 * through the buffer. Memory is allocated, buffer recording
3255 * This prevents the reading from being corrupted. This is not 3345 * is disabled, and the iterator pointer is returned to the caller.
3256 * a consuming read, so a producer is not expected.
3257 * 3346 *
3258 * Must be paired with ring_buffer_finish. 3347 * Disabling buffer recordng prevents the reading from being
3348 * corrupted. This is not a consuming read, so a producer is not
3349 * expected.
3350 *
3351 * After a sequence of ring_buffer_read_prepare calls, the user is
3352 * expected to make at least one call to ring_buffer_prepare_sync.
3353 * Afterwards, ring_buffer_read_start is invoked to get things going
3354 * for real.
3355 *
3356 * This overall must be paired with ring_buffer_finish.
3259 */ 3357 */
3260struct ring_buffer_iter * 3358struct ring_buffer_iter *
3261ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 3359ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3262{ 3360{
3263 struct ring_buffer_per_cpu *cpu_buffer; 3361 struct ring_buffer_per_cpu *cpu_buffer;
3264 struct ring_buffer_iter *iter; 3362 struct ring_buffer_iter *iter;
3265 unsigned long flags;
3266 3363
3267 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3364 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3268 return NULL; 3365 return NULL;
@@ -3276,15 +3373,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3276 iter->cpu_buffer = cpu_buffer; 3373 iter->cpu_buffer = cpu_buffer;
3277 3374
3278 atomic_inc(&cpu_buffer->record_disabled); 3375 atomic_inc(&cpu_buffer->record_disabled);
3376
3377 return iter;
3378}
3379EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3380
3381/**
3382 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
3383 *
3384 * All previously invoked ring_buffer_read_prepare calls to prepare
3385 * iterators will be synchronized. Afterwards, read_buffer_read_start
3386 * calls on those iterators are allowed.
3387 */
3388void
3389ring_buffer_read_prepare_sync(void)
3390{
3279 synchronize_sched(); 3391 synchronize_sched();
3392}
3393EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3394
3395/**
3396 * ring_buffer_read_start - start a non consuming read of the buffer
3397 * @iter: The iterator returned by ring_buffer_read_prepare
3398 *
3399 * This finalizes the startup of an iteration through the buffer.
3400 * The iterator comes from a call to ring_buffer_read_prepare and
3401 * an intervening ring_buffer_read_prepare_sync must have been
3402 * performed.
3403 *
3404 * Must be paired with ring_buffer_finish.
3405 */
3406void
3407ring_buffer_read_start(struct ring_buffer_iter *iter)
3408{
3409 struct ring_buffer_per_cpu *cpu_buffer;
3410 unsigned long flags;
3411
3412 if (!iter)
3413 return;
3414
3415 cpu_buffer = iter->cpu_buffer;
3280 3416
3281 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3417 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3282 arch_spin_lock(&cpu_buffer->lock); 3418 arch_spin_lock(&cpu_buffer->lock);
3283 rb_iter_reset(iter); 3419 rb_iter_reset(iter);
3284 arch_spin_unlock(&cpu_buffer->lock); 3420 arch_spin_unlock(&cpu_buffer->lock);
3285 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3421 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3286
3287 return iter;
3288} 3422}
3289EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3423EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3290 3424
@@ -3378,6 +3512,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3378 cpu_buffer->write_stamp = 0; 3512 cpu_buffer->write_stamp = 0;
3379 cpu_buffer->read_stamp = 0; 3513 cpu_buffer->read_stamp = 0;
3380 3514
3515 cpu_buffer->lost_events = 0;
3516 cpu_buffer->last_overrun = 0;
3517
3381 rb_head_page_activate(cpu_buffer); 3518 rb_head_page_activate(cpu_buffer);
3382} 3519}
3383 3520
@@ -3653,6 +3790,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3653 struct ring_buffer_event *event; 3790 struct ring_buffer_event *event;
3654 struct buffer_data_page *bpage; 3791 struct buffer_data_page *bpage;
3655 struct buffer_page *reader; 3792 struct buffer_page *reader;
3793 unsigned long missed_events;
3656 unsigned long flags; 3794 unsigned long flags;
3657 unsigned int commit; 3795 unsigned int commit;
3658 unsigned int read; 3796 unsigned int read;
@@ -3689,6 +3827,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3689 read = reader->read; 3827 read = reader->read;
3690 commit = rb_page_commit(reader); 3828 commit = rb_page_commit(reader);
3691 3829
3830 /* Check if any events were dropped */
3831 missed_events = cpu_buffer->lost_events;
3832
3692 /* 3833 /*
3693 * If this page has been partially read or 3834 * If this page has been partially read or
3694 * if len is not big enough to read the rest of the page or 3835 * if len is not big enough to read the rest of the page or
@@ -3749,9 +3890,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3749 local_set(&reader->entries, 0); 3890 local_set(&reader->entries, 0);
3750 reader->read = 0; 3891 reader->read = 0;
3751 *data_page = bpage; 3892 *data_page = bpage;
3893
3894 /*
3895 * Use the real_end for the data size,
3896 * This gives us a chance to store the lost events
3897 * on the page.
3898 */
3899 if (reader->real_end)
3900 local_set(&bpage->commit, reader->real_end);
3752 } 3901 }
3753 ret = read; 3902 ret = read;
3754 3903
3904 cpu_buffer->lost_events = 0;
3905
3906 commit = local_read(&bpage->commit);
3907 /*
3908 * Set a flag in the commit field if we lost events
3909 */
3910 if (missed_events) {
3911 /* If there is room at the end of the page to save the
3912 * missed events, then record it there.
3913 */
3914 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
3915 memcpy(&bpage->data[commit], &missed_events,
3916 sizeof(missed_events));
3917 local_add(RB_MISSED_STORED, &bpage->commit);
3918 commit += sizeof(missed_events);
3919 }
3920 local_add(RB_MISSED_EVENTS, &bpage->commit);
3921 }
3922
3923 /*
3924 * This page may be off to user land. Zero it out here.
3925 */
3926 if (commit < BUF_PAGE_SIZE)
3927 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
3928
3755 out_unlock: 3929 out_unlock:
3756 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3930 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3757 3931