diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 219 |
1 files changed, 180 insertions, 39 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8c1b2d290718..7f6059c5aa94 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -14,12 +14,14 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/slab.h> | ||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/hash.h> | 19 | #include <linux/hash.h> |
19 | #include <linux/list.h> | 20 | #include <linux/list.h> |
20 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> |
21 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
22 | 23 | ||
24 | #include <asm/local.h> | ||
23 | #include "trace.h" | 25 | #include "trace.h" |
24 | 26 | ||
25 | /* | 27 | /* |
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
206 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
207 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | 209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ |
208 | 210 | ||
211 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | ||
212 | # define RB_FORCE_8BYTE_ALIGNMENT 0 | ||
213 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT | ||
214 | #else | ||
215 | # define RB_FORCE_8BYTE_ALIGNMENT 1 | ||
216 | # define RB_ARCH_ALIGNMENT 8U | ||
217 | #endif | ||
218 | |||
209 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 219 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
210 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 220 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
211 | 221 | ||
@@ -309,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
309 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 319 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
310 | #define TS_DELTA_TEST (~TS_MASK) | 320 | #define TS_DELTA_TEST (~TS_MASK) |
311 | 321 | ||
322 | /* Flag when events were overwritten */ | ||
323 | #define RB_MISSED_EVENTS (1 << 31) | ||
324 | /* Missed count stored at end */ | ||
325 | #define RB_MISSED_STORED (1 << 30) | ||
326 | |||
312 | struct buffer_data_page { | 327 | struct buffer_data_page { |
313 | u64 time_stamp; /* page time stamp */ | 328 | u64 time_stamp; /* page time stamp */ |
314 | local_t commit; /* write committed index */ | 329 | local_t commit; /* write committed index */ |
@@ -328,6 +343,7 @@ struct buffer_page { | |||
328 | local_t write; /* index for next write */ | 343 | local_t write; /* index for next write */ |
329 | unsigned read; /* index for next read */ | 344 | unsigned read; /* index for next read */ |
330 | local_t entries; /* entries on this page */ | 345 | local_t entries; /* entries on this page */ |
346 | unsigned long real_end; /* real end of data */ | ||
331 | struct buffer_data_page *page; /* Actual data page */ | 347 | struct buffer_data_page *page; /* Actual data page */ |
332 | }; | 348 | }; |
333 | 349 | ||
@@ -407,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
407 | (unsigned int)sizeof(field.commit), | 423 | (unsigned int)sizeof(field.commit), |
408 | (unsigned int)is_signed_type(long)); | 424 | (unsigned int)is_signed_type(long)); |
409 | 425 | ||
426 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
427 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
428 | (unsigned int)offsetof(typeof(field), commit), | ||
429 | 1, | ||
430 | (unsigned int)is_signed_type(long)); | ||
431 | |||
410 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 432 | ret = trace_seq_printf(s, "\tfield: char data;\t" |
411 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 433 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
412 | (unsigned int)offsetof(typeof(field), data), | 434 | (unsigned int)offsetof(typeof(field), data), |
@@ -430,6 +452,8 @@ struct ring_buffer_per_cpu { | |||
430 | struct buffer_page *tail_page; /* write to tail */ | 452 | struct buffer_page *tail_page; /* write to tail */ |
431 | struct buffer_page *commit_page; /* committed pages */ | 453 | struct buffer_page *commit_page; /* committed pages */ |
432 | struct buffer_page *reader_page; | 454 | struct buffer_page *reader_page; |
455 | unsigned long lost_events; | ||
456 | unsigned long last_overrun; | ||
433 | local_t commit_overrun; | 457 | local_t commit_overrun; |
434 | local_t overrun; | 458 | local_t overrun; |
435 | local_t entries; | 459 | local_t entries; |
@@ -1200,18 +1224,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
1200 | 1224 | ||
1201 | for (i = 0; i < nr_pages; i++) { | 1225 | for (i = 0; i < nr_pages; i++) { |
1202 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 1226 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
1203 | return; | 1227 | goto out; |
1204 | p = cpu_buffer->pages->next; | 1228 | p = cpu_buffer->pages->next; |
1205 | bpage = list_entry(p, struct buffer_page, list); | 1229 | bpage = list_entry(p, struct buffer_page, list); |
1206 | list_del_init(&bpage->list); | 1230 | list_del_init(&bpage->list); |
1207 | free_buffer_page(bpage); | 1231 | free_buffer_page(bpage); |
1208 | } | 1232 | } |
1209 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 1233 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
1210 | return; | 1234 | goto out; |
1211 | 1235 | ||
1212 | rb_reset_cpu(cpu_buffer); | 1236 | rb_reset_cpu(cpu_buffer); |
1213 | rb_check_pages(cpu_buffer); | 1237 | rb_check_pages(cpu_buffer); |
1214 | 1238 | ||
1239 | out: | ||
1215 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1240 | spin_unlock_irq(&cpu_buffer->reader_lock); |
1216 | } | 1241 | } |
1217 | 1242 | ||
@@ -1228,7 +1253,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1228 | 1253 | ||
1229 | for (i = 0; i < nr_pages; i++) { | 1254 | for (i = 0; i < nr_pages; i++) { |
1230 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 1255 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) |
1231 | return; | 1256 | goto out; |
1232 | p = pages->next; | 1257 | p = pages->next; |
1233 | bpage = list_entry(p, struct buffer_page, list); | 1258 | bpage = list_entry(p, struct buffer_page, list); |
1234 | list_del_init(&bpage->list); | 1259 | list_del_init(&bpage->list); |
@@ -1237,6 +1262,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1237 | rb_reset_cpu(cpu_buffer); | 1262 | rb_reset_cpu(cpu_buffer); |
1238 | rb_check_pages(cpu_buffer); | 1263 | rb_check_pages(cpu_buffer); |
1239 | 1264 | ||
1265 | out: | ||
1240 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1266 | spin_unlock_irq(&cpu_buffer->reader_lock); |
1241 | } | 1267 | } |
1242 | 1268 | ||
@@ -1546,7 +1572,7 @@ rb_update_event(struct ring_buffer_event *event, | |||
1546 | 1572 | ||
1547 | case 0: | 1573 | case 0: |
1548 | length -= RB_EVNT_HDR_SIZE; | 1574 | length -= RB_EVNT_HDR_SIZE; |
1549 | if (length > RB_MAX_SMALL_DATA) | 1575 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) |
1550 | event->array[0] = length; | 1576 | event->array[0] = length; |
1551 | else | 1577 | else |
1552 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1578 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); |
@@ -1721,11 +1747,11 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1721 | if (!length) | 1747 | if (!length) |
1722 | length = 1; | 1748 | length = 1; |
1723 | 1749 | ||
1724 | if (length > RB_MAX_SMALL_DATA) | 1750 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) |
1725 | length += sizeof(event.array[0]); | 1751 | length += sizeof(event.array[0]); |
1726 | 1752 | ||
1727 | length += RB_EVNT_HDR_SIZE; | 1753 | length += RB_EVNT_HDR_SIZE; |
1728 | length = ALIGN(length, RB_ALIGNMENT); | 1754 | length = ALIGN(length, RB_ARCH_ALIGNMENT); |
1729 | 1755 | ||
1730 | return length; | 1756 | return length; |
1731 | } | 1757 | } |
@@ -1750,6 +1776,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1750 | kmemcheck_annotate_bitfield(event, bitfield); | 1776 | kmemcheck_annotate_bitfield(event, bitfield); |
1751 | 1777 | ||
1752 | /* | 1778 | /* |
1779 | * Save the original length to the meta data. | ||
1780 | * This will be used by the reader to add lost event | ||
1781 | * counter. | ||
1782 | */ | ||
1783 | tail_page->real_end = tail; | ||
1784 | |||
1785 | /* | ||
1753 | * If this event is bigger than the minimum size, then | 1786 | * If this event is bigger than the minimum size, then |
1754 | * we need to be careful that we don't subtract the | 1787 | * we need to be careful that we don't subtract the |
1755 | * write counter enough to allow another writer to slip | 1788 | * write counter enough to allow another writer to slip |
@@ -1967,17 +2000,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1967 | u64 *ts, u64 *delta) | 2000 | u64 *ts, u64 *delta) |
1968 | { | 2001 | { |
1969 | struct ring_buffer_event *event; | 2002 | struct ring_buffer_event *event; |
1970 | static int once; | ||
1971 | int ret; | 2003 | int ret; |
1972 | 2004 | ||
1973 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | 2005 | WARN_ONCE(*delta > (1ULL << 59), |
1974 | printk(KERN_WARNING "Delta way too big! %llu" | 2006 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
1975 | " ts=%llu write stamp = %llu\n", | 2007 | (unsigned long long)*delta, |
1976 | (unsigned long long)*delta, | 2008 | (unsigned long long)*ts, |
1977 | (unsigned long long)*ts, | 2009 | (unsigned long long)cpu_buffer->write_stamp); |
1978 | (unsigned long long)cpu_buffer->write_stamp); | ||
1979 | WARN_ON(1); | ||
1980 | } | ||
1981 | 2010 | ||
1982 | /* | 2011 | /* |
1983 | * The delta is too big, we to add a | 2012 | * The delta is too big, we to add a |
@@ -2232,12 +2261,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
2232 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2261 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2233 | return NULL; | 2262 | return NULL; |
2234 | 2263 | ||
2235 | if (atomic_read(&buffer->record_disabled)) | ||
2236 | return NULL; | ||
2237 | |||
2238 | /* If we are tracing schedule, we don't want to recurse */ | 2264 | /* If we are tracing schedule, we don't want to recurse */ |
2239 | resched = ftrace_preempt_disable(); | 2265 | resched = ftrace_preempt_disable(); |
2240 | 2266 | ||
2267 | if (atomic_read(&buffer->record_disabled)) | ||
2268 | goto out_nocheck; | ||
2269 | |||
2241 | if (trace_recursive_lock()) | 2270 | if (trace_recursive_lock()) |
2242 | goto out_nocheck; | 2271 | goto out_nocheck; |
2243 | 2272 | ||
@@ -2469,11 +2498,11 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
2469 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2498 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2470 | return -EBUSY; | 2499 | return -EBUSY; |
2471 | 2500 | ||
2472 | if (atomic_read(&buffer->record_disabled)) | ||
2473 | return -EBUSY; | ||
2474 | |||
2475 | resched = ftrace_preempt_disable(); | 2501 | resched = ftrace_preempt_disable(); |
2476 | 2502 | ||
2503 | if (atomic_read(&buffer->record_disabled)) | ||
2504 | goto out; | ||
2505 | |||
2477 | cpu = raw_smp_processor_id(); | 2506 | cpu = raw_smp_processor_id(); |
2478 | 2507 | ||
2479 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2508 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
@@ -2541,7 +2570,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable); | |||
2541 | * @buffer: The ring buffer to enable writes | 2570 | * @buffer: The ring buffer to enable writes |
2542 | * | 2571 | * |
2543 | * Note, multiple disables will need the same number of enables | 2572 | * Note, multiple disables will need the same number of enables |
2544 | * to truely enable the writing (much like preempt_disable). | 2573 | * to truly enable the writing (much like preempt_disable). |
2545 | */ | 2574 | */ |
2546 | void ring_buffer_record_enable(struct ring_buffer *buffer) | 2575 | void ring_buffer_record_enable(struct ring_buffer *buffer) |
2547 | { | 2576 | { |
@@ -2577,7 +2606,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); | |||
2577 | * @cpu: The CPU to enable. | 2606 | * @cpu: The CPU to enable. |
2578 | * | 2607 | * |
2579 | * Note, multiple disables will need the same number of enables | 2608 | * Note, multiple disables will need the same number of enables |
2580 | * to truely enable the writing (much like preempt_disable). | 2609 | * to truly enable the writing (much like preempt_disable). |
2581 | */ | 2610 | */ |
2582 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | 2611 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) |
2583 | { | 2612 | { |
@@ -2826,6 +2855,7 @@ static struct buffer_page * | |||
2826 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 2855 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
2827 | { | 2856 | { |
2828 | struct buffer_page *reader = NULL; | 2857 | struct buffer_page *reader = NULL; |
2858 | unsigned long overwrite; | ||
2829 | unsigned long flags; | 2859 | unsigned long flags; |
2830 | int nr_loops = 0; | 2860 | int nr_loops = 0; |
2831 | int ret; | 2861 | int ret; |
@@ -2867,6 +2897,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2867 | local_set(&cpu_buffer->reader_page->write, 0); | 2897 | local_set(&cpu_buffer->reader_page->write, 0); |
2868 | local_set(&cpu_buffer->reader_page->entries, 0); | 2898 | local_set(&cpu_buffer->reader_page->entries, 0); |
2869 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2899 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2900 | cpu_buffer->reader_page->real_end = 0; | ||
2870 | 2901 | ||
2871 | spin: | 2902 | spin: |
2872 | /* | 2903 | /* |
@@ -2887,6 +2918,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2887 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 2918 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); |
2888 | 2919 | ||
2889 | /* | 2920 | /* |
2921 | * We want to make sure we read the overruns after we set up our | ||
2922 | * pointers to the next object. The writer side does a | ||
2923 | * cmpxchg to cross pages which acts as the mb on the writer | ||
2924 | * side. Note, the reader will constantly fail the swap | ||
2925 | * while the writer is updating the pointers, so this | ||
2926 | * guarantees that the overwrite recorded here is the one we | ||
2927 | * want to compare with the last_overrun. | ||
2928 | */ | ||
2929 | smp_mb(); | ||
2930 | overwrite = local_read(&(cpu_buffer->overrun)); | ||
2931 | |||
2932 | /* | ||
2890 | * Here's the tricky part. | 2933 | * Here's the tricky part. |
2891 | * | 2934 | * |
2892 | * We need to move the pointer past the header page. | 2935 | * We need to move the pointer past the header page. |
@@ -2917,6 +2960,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2917 | cpu_buffer->reader_page = reader; | 2960 | cpu_buffer->reader_page = reader; |
2918 | rb_reset_reader_page(cpu_buffer); | 2961 | rb_reset_reader_page(cpu_buffer); |
2919 | 2962 | ||
2963 | if (overwrite != cpu_buffer->last_overrun) { | ||
2964 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | ||
2965 | cpu_buffer->last_overrun = overwrite; | ||
2966 | } | ||
2967 | |||
2920 | goto again; | 2968 | goto again; |
2921 | 2969 | ||
2922 | out: | 2970 | out: |
@@ -2993,8 +3041,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
2993 | rb_advance_iter(iter); | 3041 | rb_advance_iter(iter); |
2994 | } | 3042 | } |
2995 | 3043 | ||
3044 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | ||
3045 | { | ||
3046 | return cpu_buffer->lost_events; | ||
3047 | } | ||
3048 | |||
2996 | static struct ring_buffer_event * | 3049 | static struct ring_buffer_event * |
2997 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | 3050 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, |
3051 | unsigned long *lost_events) | ||
2998 | { | 3052 | { |
2999 | struct ring_buffer_event *event; | 3053 | struct ring_buffer_event *event; |
3000 | struct buffer_page *reader; | 3054 | struct buffer_page *reader; |
@@ -3046,6 +3100,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | |||
3046 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 3100 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
3047 | cpu_buffer->cpu, ts); | 3101 | cpu_buffer->cpu, ts); |
3048 | } | 3102 | } |
3103 | if (lost_events) | ||
3104 | *lost_events = rb_lost_events(cpu_buffer); | ||
3049 | return event; | 3105 | return event; |
3050 | 3106 | ||
3051 | default: | 3107 | default: |
@@ -3156,12 +3212,14 @@ static inline int rb_ok_to_lock(void) | |||
3156 | * @buffer: The ring buffer to read | 3212 | * @buffer: The ring buffer to read |
3157 | * @cpu: The cpu to peak at | 3213 | * @cpu: The cpu to peak at |
3158 | * @ts: The timestamp counter of this event. | 3214 | * @ts: The timestamp counter of this event. |
3215 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3159 | * | 3216 | * |
3160 | * This will return the event that will be read next, but does | 3217 | * This will return the event that will be read next, but does |
3161 | * not consume the data. | 3218 | * not consume the data. |
3162 | */ | 3219 | */ |
3163 | struct ring_buffer_event * | 3220 | struct ring_buffer_event * |
3164 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3221 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
3222 | unsigned long *lost_events) | ||
3165 | { | 3223 | { |
3166 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3224 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
3167 | struct ring_buffer_event *event; | 3225 | struct ring_buffer_event *event; |
@@ -3176,7 +3234,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3176 | local_irq_save(flags); | 3234 | local_irq_save(flags); |
3177 | if (dolock) | 3235 | if (dolock) |
3178 | spin_lock(&cpu_buffer->reader_lock); | 3236 | spin_lock(&cpu_buffer->reader_lock); |
3179 | event = rb_buffer_peek(cpu_buffer, ts); | 3237 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3180 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3238 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3181 | rb_advance_reader(cpu_buffer); | 3239 | rb_advance_reader(cpu_buffer); |
3182 | if (dolock) | 3240 | if (dolock) |
@@ -3218,13 +3276,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3218 | /** | 3276 | /** |
3219 | * ring_buffer_consume - return an event and consume it | 3277 | * ring_buffer_consume - return an event and consume it |
3220 | * @buffer: The ring buffer to get the next event from | 3278 | * @buffer: The ring buffer to get the next event from |
3279 | * @cpu: the cpu to read the buffer from | ||
3280 | * @ts: a variable to store the timestamp (may be NULL) | ||
3281 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3221 | * | 3282 | * |
3222 | * Returns the next event in the ring buffer, and that event is consumed. | 3283 | * Returns the next event in the ring buffer, and that event is consumed. |
3223 | * Meaning, that sequential reads will keep returning a different event, | 3284 | * Meaning, that sequential reads will keep returning a different event, |
3224 | * and eventually empty the ring buffer if the producer is slower. | 3285 | * and eventually empty the ring buffer if the producer is slower. |
3225 | */ | 3286 | */ |
3226 | struct ring_buffer_event * | 3287 | struct ring_buffer_event * |
3227 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 3288 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
3289 | unsigned long *lost_events) | ||
3228 | { | 3290 | { |
3229 | struct ring_buffer_per_cpu *cpu_buffer; | 3291 | struct ring_buffer_per_cpu *cpu_buffer; |
3230 | struct ring_buffer_event *event = NULL; | 3292 | struct ring_buffer_event *event = NULL; |
@@ -3245,9 +3307,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3245 | if (dolock) | 3307 | if (dolock) |
3246 | spin_lock(&cpu_buffer->reader_lock); | 3308 | spin_lock(&cpu_buffer->reader_lock); |
3247 | 3309 | ||
3248 | event = rb_buffer_peek(cpu_buffer, ts); | 3310 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3249 | if (event) | 3311 | if (event) { |
3312 | cpu_buffer->lost_events = 0; | ||
3250 | rb_advance_reader(cpu_buffer); | 3313 | rb_advance_reader(cpu_buffer); |
3314 | } | ||
3251 | 3315 | ||
3252 | if (dolock) | 3316 | if (dolock) |
3253 | spin_unlock(&cpu_buffer->reader_lock); | 3317 | spin_unlock(&cpu_buffer->reader_lock); |
@@ -3264,23 +3328,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3264 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 3328 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
3265 | 3329 | ||
3266 | /** | 3330 | /** |
3267 | * ring_buffer_read_start - start a non consuming read of the buffer | 3331 | * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer |
3268 | * @buffer: The ring buffer to read from | 3332 | * @buffer: The ring buffer to read from |
3269 | * @cpu: The cpu buffer to iterate over | 3333 | * @cpu: The cpu buffer to iterate over |
3270 | * | 3334 | * |
3271 | * This starts up an iteration through the buffer. It also disables | 3335 | * This performs the initial preparations necessary to iterate |
3272 | * the recording to the buffer until the reading is finished. | 3336 | * through the buffer. Memory is allocated, buffer recording |
3273 | * This prevents the reading from being corrupted. This is not | 3337 | * is disabled, and the iterator pointer is returned to the caller. |
3274 | * a consuming read, so a producer is not expected. | ||
3275 | * | 3338 | * |
3276 | * Must be paired with ring_buffer_finish. | 3339 | * Disabling buffer recordng prevents the reading from being |
3340 | * corrupted. This is not a consuming read, so a producer is not | ||
3341 | * expected. | ||
3342 | * | ||
3343 | * After a sequence of ring_buffer_read_prepare calls, the user is | ||
3344 | * expected to make at least one call to ring_buffer_prepare_sync. | ||
3345 | * Afterwards, ring_buffer_read_start is invoked to get things going | ||
3346 | * for real. | ||
3347 | * | ||
3348 | * This overall must be paired with ring_buffer_finish. | ||
3277 | */ | 3349 | */ |
3278 | struct ring_buffer_iter * | 3350 | struct ring_buffer_iter * |
3279 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | 3351 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) |
3280 | { | 3352 | { |
3281 | struct ring_buffer_per_cpu *cpu_buffer; | 3353 | struct ring_buffer_per_cpu *cpu_buffer; |
3282 | struct ring_buffer_iter *iter; | 3354 | struct ring_buffer_iter *iter; |
3283 | unsigned long flags; | ||
3284 | 3355 | ||
3285 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3356 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3286 | return NULL; | 3357 | return NULL; |
@@ -3294,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | |||
3294 | iter->cpu_buffer = cpu_buffer; | 3365 | iter->cpu_buffer = cpu_buffer; |
3295 | 3366 | ||
3296 | atomic_inc(&cpu_buffer->record_disabled); | 3367 | atomic_inc(&cpu_buffer->record_disabled); |
3368 | |||
3369 | return iter; | ||
3370 | } | ||
3371 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); | ||
3372 | |||
3373 | /** | ||
3374 | * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls | ||
3375 | * | ||
3376 | * All previously invoked ring_buffer_read_prepare calls to prepare | ||
3377 | * iterators will be synchronized. Afterwards, read_buffer_read_start | ||
3378 | * calls on those iterators are allowed. | ||
3379 | */ | ||
3380 | void | ||
3381 | ring_buffer_read_prepare_sync(void) | ||
3382 | { | ||
3297 | synchronize_sched(); | 3383 | synchronize_sched(); |
3384 | } | ||
3385 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | ||
3386 | |||
3387 | /** | ||
3388 | * ring_buffer_read_start - start a non consuming read of the buffer | ||
3389 | * @iter: The iterator returned by ring_buffer_read_prepare | ||
3390 | * | ||
3391 | * This finalizes the startup of an iteration through the buffer. | ||
3392 | * The iterator comes from a call to ring_buffer_read_prepare and | ||
3393 | * an intervening ring_buffer_read_prepare_sync must have been | ||
3394 | * performed. | ||
3395 | * | ||
3396 | * Must be paired with ring_buffer_finish. | ||
3397 | */ | ||
3398 | void | ||
3399 | ring_buffer_read_start(struct ring_buffer_iter *iter) | ||
3400 | { | ||
3401 | struct ring_buffer_per_cpu *cpu_buffer; | ||
3402 | unsigned long flags; | ||
3403 | |||
3404 | if (!iter) | ||
3405 | return; | ||
3406 | |||
3407 | cpu_buffer = iter->cpu_buffer; | ||
3298 | 3408 | ||
3299 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3409 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3300 | arch_spin_lock(&cpu_buffer->lock); | 3410 | arch_spin_lock(&cpu_buffer->lock); |
3301 | rb_iter_reset(iter); | 3411 | rb_iter_reset(iter); |
3302 | arch_spin_unlock(&cpu_buffer->lock); | 3412 | arch_spin_unlock(&cpu_buffer->lock); |
3303 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3413 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3304 | |||
3305 | return iter; | ||
3306 | } | 3414 | } |
3307 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 3415 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
3308 | 3416 | ||
@@ -3396,6 +3504,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
3396 | cpu_buffer->write_stamp = 0; | 3504 | cpu_buffer->write_stamp = 0; |
3397 | cpu_buffer->read_stamp = 0; | 3505 | cpu_buffer->read_stamp = 0; |
3398 | 3506 | ||
3507 | cpu_buffer->lost_events = 0; | ||
3508 | cpu_buffer->last_overrun = 0; | ||
3509 | |||
3399 | rb_head_page_activate(cpu_buffer); | 3510 | rb_head_page_activate(cpu_buffer); |
3400 | } | 3511 | } |
3401 | 3512 | ||
@@ -3671,6 +3782,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3671 | struct ring_buffer_event *event; | 3782 | struct ring_buffer_event *event; |
3672 | struct buffer_data_page *bpage; | 3783 | struct buffer_data_page *bpage; |
3673 | struct buffer_page *reader; | 3784 | struct buffer_page *reader; |
3785 | unsigned long missed_events; | ||
3674 | unsigned long flags; | 3786 | unsigned long flags; |
3675 | unsigned int commit; | 3787 | unsigned int commit; |
3676 | unsigned int read; | 3788 | unsigned int read; |
@@ -3707,6 +3819,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3707 | read = reader->read; | 3819 | read = reader->read; |
3708 | commit = rb_page_commit(reader); | 3820 | commit = rb_page_commit(reader); |
3709 | 3821 | ||
3822 | /* Check if any events were dropped */ | ||
3823 | missed_events = cpu_buffer->lost_events; | ||
3824 | |||
3710 | /* | 3825 | /* |
3711 | * If this page has been partially read or | 3826 | * If this page has been partially read or |
3712 | * if len is not big enough to read the rest of the page or | 3827 | * if len is not big enough to read the rest of the page or |
@@ -3767,9 +3882,35 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3767 | local_set(&reader->entries, 0); | 3882 | local_set(&reader->entries, 0); |
3768 | reader->read = 0; | 3883 | reader->read = 0; |
3769 | *data_page = bpage; | 3884 | *data_page = bpage; |
3885 | |||
3886 | /* | ||
3887 | * Use the real_end for the data size, | ||
3888 | * This gives us a chance to store the lost events | ||
3889 | * on the page. | ||
3890 | */ | ||
3891 | if (reader->real_end) | ||
3892 | local_set(&bpage->commit, reader->real_end); | ||
3770 | } | 3893 | } |
3771 | ret = read; | 3894 | ret = read; |
3772 | 3895 | ||
3896 | cpu_buffer->lost_events = 0; | ||
3897 | /* | ||
3898 | * Set a flag in the commit field if we lost events | ||
3899 | */ | ||
3900 | if (missed_events) { | ||
3901 | commit = local_read(&bpage->commit); | ||
3902 | |||
3903 | /* If there is room at the end of the page to save the | ||
3904 | * missed events, then record it there. | ||
3905 | */ | ||
3906 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { | ||
3907 | memcpy(&bpage->data[commit], &missed_events, | ||
3908 | sizeof(missed_events)); | ||
3909 | local_add(RB_MISSED_STORED, &bpage->commit); | ||
3910 | } | ||
3911 | local_add(RB_MISSED_EVENTS, &bpage->commit); | ||
3912 | } | ||
3913 | |||
3773 | out_unlock: | 3914 | out_unlock: |
3774 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3915 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3775 | 3916 | ||