diff options
| author | Michal Marek <mmarek@suse.cz> | 2010-08-04 07:59:13 -0400 | 
|---|---|---|
| committer | Michal Marek <mmarek@suse.cz> | 2010-08-04 07:59:13 -0400 | 
| commit | 772320e84588dcbe1600ffb83e5f328f2209ac2a (patch) | |
| tree | a7de21b79340aeaa17c58126f6b801b82c77b53a /kernel/trace/ring_buffer.c | |
| parent | 1ce53adf13a54375d2a5c7cdbe341b2558389615 (diff) | |
| parent | 9fe6206f400646a2322096b56c59891d530e8d51 (diff) | |
Merge commit 'v2.6.35' into kbuild/kbuild
Conflicts:
	arch/powerpc/Makefile
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 258 | 
1 files changed, 216 insertions, 42 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index edefe3b2801b..1da7b6ea8b85 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c  | |||
| @@ -14,12 +14,14 @@ | |||
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> | 
| 15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> | 
| 16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> | 
| 17 | #include <linux/slab.h> | ||
| 17 | #include <linux/init.h> | 18 | #include <linux/init.h> | 
| 18 | #include <linux/hash.h> | 19 | #include <linux/hash.h> | 
| 19 | #include <linux/list.h> | 20 | #include <linux/list.h> | 
| 20 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> | 
| 21 | #include <linux/fs.h> | 22 | #include <linux/fs.h> | 
| 22 | 23 | ||
| 24 | #include <asm/local.h> | ||
| 23 | #include "trace.h" | 25 | #include "trace.h" | 
| 24 | 26 | ||
| 25 | /* | 27 | /* | 
| @@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
| 206 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 
| 207 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | 209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | 
| 208 | 210 | ||
| 211 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | ||
| 212 | # define RB_FORCE_8BYTE_ALIGNMENT 0 | ||
| 213 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT | ||
| 214 | #else | ||
| 215 | # define RB_FORCE_8BYTE_ALIGNMENT 1 | ||
| 216 | # define RB_ARCH_ALIGNMENT 8U | ||
| 217 | #endif | ||
| 218 | |||
| 209 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 219 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 
| 210 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 220 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 
| 211 | 221 | ||
| @@ -309,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
| 309 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 319 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 
| 310 | #define TS_DELTA_TEST (~TS_MASK) | 320 | #define TS_DELTA_TEST (~TS_MASK) | 
| 311 | 321 | ||
| 322 | /* Flag when events were overwritten */ | ||
| 323 | #define RB_MISSED_EVENTS (1 << 31) | ||
| 324 | /* Missed count stored at end */ | ||
| 325 | #define RB_MISSED_STORED (1 << 30) | ||
| 326 | |||
| 312 | struct buffer_data_page { | 327 | struct buffer_data_page { | 
| 313 | u64 time_stamp; /* page time stamp */ | 328 | u64 time_stamp; /* page time stamp */ | 
| 314 | local_t commit; /* write committed index */ | 329 | local_t commit; /* write committed index */ | 
| @@ -328,6 +343,7 @@ struct buffer_page { | |||
| 328 | local_t write; /* index for next write */ | 343 | local_t write; /* index for next write */ | 
| 329 | unsigned read; /* index for next read */ | 344 | unsigned read; /* index for next read */ | 
| 330 | local_t entries; /* entries on this page */ | 345 | local_t entries; /* entries on this page */ | 
| 346 | unsigned long real_end; /* real end of data */ | ||
| 331 | struct buffer_data_page *page; /* Actual data page */ | 347 | struct buffer_data_page *page; /* Actual data page */ | 
| 332 | }; | 348 | }; | 
| 333 | 349 | ||
| @@ -407,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
| 407 | (unsigned int)sizeof(field.commit), | 423 | (unsigned int)sizeof(field.commit), | 
| 408 | (unsigned int)is_signed_type(long)); | 424 | (unsigned int)is_signed_type(long)); | 
| 409 | 425 | ||
| 426 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
| 427 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
| 428 | (unsigned int)offsetof(typeof(field), commit), | ||
| 429 | 1, | ||
| 430 | (unsigned int)is_signed_type(long)); | ||
| 431 | |||
| 410 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 432 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 
| 411 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 433 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 
| 412 | (unsigned int)offsetof(typeof(field), data), | 434 | (unsigned int)offsetof(typeof(field), data), | 
| @@ -430,6 +452,8 @@ struct ring_buffer_per_cpu { | |||
| 430 | struct buffer_page *tail_page; /* write to tail */ | 452 | struct buffer_page *tail_page; /* write to tail */ | 
| 431 | struct buffer_page *commit_page; /* committed pages */ | 453 | struct buffer_page *commit_page; /* committed pages */ | 
| 432 | struct buffer_page *reader_page; | 454 | struct buffer_page *reader_page; | 
| 455 | unsigned long lost_events; | ||
| 456 | unsigned long last_overrun; | ||
| 433 | local_t commit_overrun; | 457 | local_t commit_overrun; | 
| 434 | local_t overrun; | 458 | local_t overrun; | 
| 435 | local_t entries; | 459 | local_t entries; | 
| @@ -464,6 +488,8 @@ struct ring_buffer_iter { | |||
| 464 | struct ring_buffer_per_cpu *cpu_buffer; | 488 | struct ring_buffer_per_cpu *cpu_buffer; | 
| 465 | unsigned long head; | 489 | unsigned long head; | 
| 466 | struct buffer_page *head_page; | 490 | struct buffer_page *head_page; | 
| 491 | struct buffer_page *cache_reader_page; | ||
| 492 | unsigned long cache_read; | ||
| 467 | u64 read_stamp; | 493 | u64 read_stamp; | 
| 468 | }; | 494 | }; | 
| 469 | 495 | ||
| @@ -1198,18 +1224,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
| 1198 | 1224 | ||
| 1199 | for (i = 0; i < nr_pages; i++) { | 1225 | for (i = 0; i < nr_pages; i++) { | 
| 1200 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 1226 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 
| 1201 | return; | 1227 | goto out; | 
| 1202 | p = cpu_buffer->pages->next; | 1228 | p = cpu_buffer->pages->next; | 
| 1203 | bpage = list_entry(p, struct buffer_page, list); | 1229 | bpage = list_entry(p, struct buffer_page, list); | 
| 1204 | list_del_init(&bpage->list); | 1230 | list_del_init(&bpage->list); | 
| 1205 | free_buffer_page(bpage); | 1231 | free_buffer_page(bpage); | 
| 1206 | } | 1232 | } | 
| 1207 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 1233 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 
| 1208 | return; | 1234 | goto out; | 
| 1209 | 1235 | ||
| 1210 | rb_reset_cpu(cpu_buffer); | 1236 | rb_reset_cpu(cpu_buffer); | 
| 1211 | rb_check_pages(cpu_buffer); | 1237 | rb_check_pages(cpu_buffer); | 
| 1212 | 1238 | ||
| 1239 | out: | ||
| 1213 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1240 | spin_unlock_irq(&cpu_buffer->reader_lock); | 
| 1214 | } | 1241 | } | 
| 1215 | 1242 | ||
| @@ -1226,7 +1253,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1226 | 1253 | ||
| 1227 | for (i = 0; i < nr_pages; i++) { | 1254 | for (i = 0; i < nr_pages; i++) { | 
| 1228 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 1255 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 
| 1229 | return; | 1256 | goto out; | 
| 1230 | p = pages->next; | 1257 | p = pages->next; | 
| 1231 | bpage = list_entry(p, struct buffer_page, list); | 1258 | bpage = list_entry(p, struct buffer_page, list); | 
| 1232 | list_del_init(&bpage->list); | 1259 | list_del_init(&bpage->list); | 
| @@ -1235,6 +1262,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1235 | rb_reset_cpu(cpu_buffer); | 1262 | rb_reset_cpu(cpu_buffer); | 
| 1236 | rb_check_pages(cpu_buffer); | 1263 | rb_check_pages(cpu_buffer); | 
| 1237 | 1264 | ||
| 1265 | out: | ||
| 1238 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1266 | spin_unlock_irq(&cpu_buffer->reader_lock); | 
| 1239 | } | 1267 | } | 
| 1240 | 1268 | ||
| @@ -1544,7 +1572,7 @@ rb_update_event(struct ring_buffer_event *event, | |||
| 1544 | 1572 | ||
| 1545 | case 0: | 1573 | case 0: | 
| 1546 | length -= RB_EVNT_HDR_SIZE; | 1574 | length -= RB_EVNT_HDR_SIZE; | 
| 1547 | if (length > RB_MAX_SMALL_DATA) | 1575 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 
| 1548 | event->array[0] = length; | 1576 | event->array[0] = length; | 
| 1549 | else | 1577 | else | 
| 1550 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1578 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 
| @@ -1719,11 +1747,11 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
| 1719 | if (!length) | 1747 | if (!length) | 
| 1720 | length = 1; | 1748 | length = 1; | 
| 1721 | 1749 | ||
| 1722 | if (length > RB_MAX_SMALL_DATA) | 1750 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 
| 1723 | length += sizeof(event.array[0]); | 1751 | length += sizeof(event.array[0]); | 
| 1724 | 1752 | ||
| 1725 | length += RB_EVNT_HDR_SIZE; | 1753 | length += RB_EVNT_HDR_SIZE; | 
| 1726 | length = ALIGN(length, RB_ALIGNMENT); | 1754 | length = ALIGN(length, RB_ARCH_ALIGNMENT); | 
| 1727 | 1755 | ||
| 1728 | return length; | 1756 | return length; | 
| 1729 | } | 1757 | } | 
| @@ -1740,6 +1768,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1740 | * must fill the old tail_page with padding. | 1768 | * must fill the old tail_page with padding. | 
| 1741 | */ | 1769 | */ | 
| 1742 | if (tail >= BUF_PAGE_SIZE) { | 1770 | if (tail >= BUF_PAGE_SIZE) { | 
| 1771 | /* | ||
| 1772 | * If the page was filled, then we still need | ||
| 1773 | * to update the real_end. Reset it to zero | ||
| 1774 | * and the reader will ignore it. | ||
| 1775 | */ | ||
| 1776 | if (tail == BUF_PAGE_SIZE) | ||
| 1777 | tail_page->real_end = 0; | ||
| 1778 | |||
| 1743 | local_sub(length, &tail_page->write); | 1779 | local_sub(length, &tail_page->write); | 
| 1744 | return; | 1780 | return; | 
| 1745 | } | 1781 | } | 
| @@ -1748,6 +1784,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1748 | kmemcheck_annotate_bitfield(event, bitfield); | 1784 | kmemcheck_annotate_bitfield(event, bitfield); | 
| 1749 | 1785 | ||
| 1750 | /* | 1786 | /* | 
| 1787 | * Save the original length to the meta data. | ||
| 1788 | * This will be used by the reader to add lost event | ||
| 1789 | * counter. | ||
| 1790 | */ | ||
| 1791 | tail_page->real_end = tail; | ||
| 1792 | |||
| 1793 | /* | ||
| 1751 | * If this event is bigger than the minimum size, then | 1794 | * If this event is bigger than the minimum size, then | 
| 1752 | * we need to be careful that we don't subtract the | 1795 | * we need to be careful that we don't subtract the | 
| 1753 | * write counter enough to allow another writer to slip | 1796 | * write counter enough to allow another writer to slip | 
| @@ -1965,17 +2008,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1965 | u64 *ts, u64 *delta) | 2008 | u64 *ts, u64 *delta) | 
| 1966 | { | 2009 | { | 
| 1967 | struct ring_buffer_event *event; | 2010 | struct ring_buffer_event *event; | 
| 1968 | static int once; | ||
| 1969 | int ret; | 2011 | int ret; | 
| 1970 | 2012 | ||
| 1971 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | 2013 | WARN_ONCE(*delta > (1ULL << 59), | 
| 1972 | printk(KERN_WARNING "Delta way too big! %llu" | 2014 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", | 
| 1973 | " ts=%llu write stamp = %llu\n", | 2015 | (unsigned long long)*delta, | 
| 1974 | (unsigned long long)*delta, | 2016 | (unsigned long long)*ts, | 
| 1975 | (unsigned long long)*ts, | 2017 | (unsigned long long)cpu_buffer->write_stamp); | 
| 1976 | (unsigned long long)cpu_buffer->write_stamp); | ||
| 1977 | WARN_ON(1); | ||
| 1978 | } | ||
| 1979 | 2018 | ||
| 1980 | /* | 2019 | /* | 
| 1981 | * The delta is too big, we to add a | 2020 | * The delta is too big, we to add a | 
| @@ -2230,12 +2269,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
| 2230 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2269 | if (ring_buffer_flags != RB_BUFFERS_ON) | 
| 2231 | return NULL; | 2270 | return NULL; | 
| 2232 | 2271 | ||
| 2233 | if (atomic_read(&buffer->record_disabled)) | ||
| 2234 | return NULL; | ||
| 2235 | |||
| 2236 | /* If we are tracing schedule, we don't want to recurse */ | 2272 | /* If we are tracing schedule, we don't want to recurse */ | 
| 2237 | resched = ftrace_preempt_disable(); | 2273 | resched = ftrace_preempt_disable(); | 
| 2238 | 2274 | ||
| 2275 | if (atomic_read(&buffer->record_disabled)) | ||
| 2276 | goto out_nocheck; | ||
| 2277 | |||
| 2239 | if (trace_recursive_lock()) | 2278 | if (trace_recursive_lock()) | 
| 2240 | goto out_nocheck; | 2279 | goto out_nocheck; | 
| 2241 | 2280 | ||
| @@ -2467,11 +2506,11 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
| 2467 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2506 | if (ring_buffer_flags != RB_BUFFERS_ON) | 
| 2468 | return -EBUSY; | 2507 | return -EBUSY; | 
| 2469 | 2508 | ||
| 2470 | if (atomic_read(&buffer->record_disabled)) | ||
| 2471 | return -EBUSY; | ||
| 2472 | |||
| 2473 | resched = ftrace_preempt_disable(); | 2509 | resched = ftrace_preempt_disable(); | 
| 2474 | 2510 | ||
| 2511 | if (atomic_read(&buffer->record_disabled)) | ||
| 2512 | goto out; | ||
| 2513 | |||
| 2475 | cpu = raw_smp_processor_id(); | 2514 | cpu = raw_smp_processor_id(); | 
| 2476 | 2515 | ||
| 2477 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2516 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 
| @@ -2539,7 +2578,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable); | |||
| 2539 | * @buffer: The ring buffer to enable writes | 2578 | * @buffer: The ring buffer to enable writes | 
| 2540 | * | 2579 | * | 
| 2541 | * Note, multiple disables will need the same number of enables | 2580 | * Note, multiple disables will need the same number of enables | 
| 2542 | * to truely enable the writing (much like preempt_disable). | 2581 | * to truly enable the writing (much like preempt_disable). | 
| 2543 | */ | 2582 | */ | 
| 2544 | void ring_buffer_record_enable(struct ring_buffer *buffer) | 2583 | void ring_buffer_record_enable(struct ring_buffer *buffer) | 
| 2545 | { | 2584 | { | 
| @@ -2575,7 +2614,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); | |||
| 2575 | * @cpu: The CPU to enable. | 2614 | * @cpu: The CPU to enable. | 
| 2576 | * | 2615 | * | 
| 2577 | * Note, multiple disables will need the same number of enables | 2616 | * Note, multiple disables will need the same number of enables | 
| 2578 | * to truely enable the writing (much like preempt_disable). | 2617 | * to truly enable the writing (much like preempt_disable). | 
| 2579 | */ | 2618 | */ | 
| 2580 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | 2619 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | 
| 2581 | { | 2620 | { | 
| @@ -2716,6 +2755,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
| 2716 | iter->read_stamp = cpu_buffer->read_stamp; | 2755 | iter->read_stamp = cpu_buffer->read_stamp; | 
| 2717 | else | 2756 | else | 
| 2718 | iter->read_stamp = iter->head_page->page->time_stamp; | 2757 | iter->read_stamp = iter->head_page->page->time_stamp; | 
| 2758 | iter->cache_reader_page = cpu_buffer->reader_page; | ||
| 2759 | iter->cache_read = cpu_buffer->read; | ||
| 2719 | } | 2760 | } | 
| 2720 | 2761 | ||
| 2721 | /** | 2762 | /** | 
| @@ -2822,6 +2863,7 @@ static struct buffer_page * | |||
| 2822 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 2863 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 
| 2823 | { | 2864 | { | 
| 2824 | struct buffer_page *reader = NULL; | 2865 | struct buffer_page *reader = NULL; | 
| 2866 | unsigned long overwrite; | ||
| 2825 | unsigned long flags; | 2867 | unsigned long flags; | 
| 2826 | int nr_loops = 0; | 2868 | int nr_loops = 0; | 
| 2827 | int ret; | 2869 | int ret; | 
| @@ -2863,6 +2905,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2863 | local_set(&cpu_buffer->reader_page->write, 0); | 2905 | local_set(&cpu_buffer->reader_page->write, 0); | 
| 2864 | local_set(&cpu_buffer->reader_page->entries, 0); | 2906 | local_set(&cpu_buffer->reader_page->entries, 0); | 
| 2865 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2907 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 
| 2908 | cpu_buffer->reader_page->real_end = 0; | ||
| 2866 | 2909 | ||
| 2867 | spin: | 2910 | spin: | 
| 2868 | /* | 2911 | /* | 
| @@ -2883,6 +2926,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2883 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 2926 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 
| 2884 | 2927 | ||
| 2885 | /* | 2928 | /* | 
| 2929 | * We want to make sure we read the overruns after we set up our | ||
| 2930 | * pointers to the next object. The writer side does a | ||
| 2931 | * cmpxchg to cross pages which acts as the mb on the writer | ||
| 2932 | * side. Note, the reader will constantly fail the swap | ||
| 2933 | * while the writer is updating the pointers, so this | ||
| 2934 | * guarantees that the overwrite recorded here is the one we | ||
| 2935 | * want to compare with the last_overrun. | ||
| 2936 | */ | ||
| 2937 | smp_mb(); | ||
| 2938 | overwrite = local_read(&(cpu_buffer->overrun)); | ||
| 2939 | |||
| 2940 | /* | ||
| 2886 | * Here's the tricky part. | 2941 | * Here's the tricky part. | 
| 2887 | * | 2942 | * | 
| 2888 | * We need to move the pointer past the header page. | 2943 | * We need to move the pointer past the header page. | 
| @@ -2913,6 +2968,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2913 | cpu_buffer->reader_page = reader; | 2968 | cpu_buffer->reader_page = reader; | 
| 2914 | rb_reset_reader_page(cpu_buffer); | 2969 | rb_reset_reader_page(cpu_buffer); | 
| 2915 | 2970 | ||
| 2971 | if (overwrite != cpu_buffer->last_overrun) { | ||
| 2972 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | ||
| 2973 | cpu_buffer->last_overrun = overwrite; | ||
| 2974 | } | ||
| 2975 | |||
| 2916 | goto again; | 2976 | goto again; | 
| 2917 | 2977 | ||
| 2918 | out: | 2978 | out: | 
| @@ -2989,8 +3049,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
| 2989 | rb_advance_iter(iter); | 3049 | rb_advance_iter(iter); | 
| 2990 | } | 3050 | } | 
| 2991 | 3051 | ||
| 3052 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | ||
| 3053 | { | ||
| 3054 | return cpu_buffer->lost_events; | ||
| 3055 | } | ||
| 3056 | |||
| 2992 | static struct ring_buffer_event * | 3057 | static struct ring_buffer_event * | 
| 2993 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | 3058 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | 
| 3059 | unsigned long *lost_events) | ||
| 2994 | { | 3060 | { | 
| 2995 | struct ring_buffer_event *event; | 3061 | struct ring_buffer_event *event; | 
| 2996 | struct buffer_page *reader; | 3062 | struct buffer_page *reader; | 
| @@ -3042,6 +3108,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | |||
| 3042 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 3108 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 
| 3043 | cpu_buffer->cpu, ts); | 3109 | cpu_buffer->cpu, ts); | 
| 3044 | } | 3110 | } | 
| 3111 | if (lost_events) | ||
| 3112 | *lost_events = rb_lost_events(cpu_buffer); | ||
| 3045 | return event; | 3113 | return event; | 
| 3046 | 3114 | ||
| 3047 | default: | 3115 | default: | 
| @@ -3060,13 +3128,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 3060 | struct ring_buffer_event *event; | 3128 | struct ring_buffer_event *event; | 
| 3061 | int nr_loops = 0; | 3129 | int nr_loops = 0; | 
| 3062 | 3130 | ||
| 3063 | if (ring_buffer_iter_empty(iter)) | ||
| 3064 | return NULL; | ||
| 3065 | |||
| 3066 | cpu_buffer = iter->cpu_buffer; | 3131 | cpu_buffer = iter->cpu_buffer; | 
| 3067 | buffer = cpu_buffer->buffer; | 3132 | buffer = cpu_buffer->buffer; | 
| 3068 | 3133 | ||
| 3134 | /* | ||
| 3135 | * Check if someone performed a consuming read to | ||
| 3136 | * the buffer. A consuming read invalidates the iterator | ||
| 3137 | * and we need to reset the iterator in this case. | ||
| 3138 | */ | ||
| 3139 | if (unlikely(iter->cache_read != cpu_buffer->read || | ||
| 3140 | iter->cache_reader_page != cpu_buffer->reader_page)) | ||
| 3141 | rb_iter_reset(iter); | ||
| 3142 | |||
| 3069 | again: | 3143 | again: | 
| 3144 | if (ring_buffer_iter_empty(iter)) | ||
| 3145 | return NULL; | ||
| 3146 | |||
| 3070 | /* | 3147 | /* | 
| 3071 | * We repeat when a timestamp is encountered. | 3148 | * We repeat when a timestamp is encountered. | 
| 3072 | * We can get multiple timestamps by nested interrupts or also | 3149 | * We can get multiple timestamps by nested interrupts or also | 
| @@ -3081,6 +3158,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 3081 | if (rb_per_cpu_empty(cpu_buffer)) | 3158 | if (rb_per_cpu_empty(cpu_buffer)) | 
| 3082 | return NULL; | 3159 | return NULL; | 
| 3083 | 3160 | ||
| 3161 | if (iter->head >= local_read(&iter->head_page->page->commit)) { | ||
| 3162 | rb_inc_iter(iter); | ||
| 3163 | goto again; | ||
| 3164 | } | ||
| 3165 | |||
| 3084 | event = rb_iter_head_event(iter); | 3166 | event = rb_iter_head_event(iter); | 
| 3085 | 3167 | ||
| 3086 | switch (event->type_len) { | 3168 | switch (event->type_len) { | 
| @@ -3138,12 +3220,14 @@ static inline int rb_ok_to_lock(void) | |||
| 3138 | * @buffer: The ring buffer to read | 3220 | * @buffer: The ring buffer to read | 
| 3139 | * @cpu: The cpu to peak at | 3221 | * @cpu: The cpu to peak at | 
| 3140 | * @ts: The timestamp counter of this event. | 3222 | * @ts: The timestamp counter of this event. | 
| 3223 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
| 3141 | * | 3224 | * | 
| 3142 | * This will return the event that will be read next, but does | 3225 | * This will return the event that will be read next, but does | 
| 3143 | * not consume the data. | 3226 | * not consume the data. | 
| 3144 | */ | 3227 | */ | 
| 3145 | struct ring_buffer_event * | 3228 | struct ring_buffer_event * | 
| 3146 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3229 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, | 
| 3230 | unsigned long *lost_events) | ||
| 3147 | { | 3231 | { | 
| 3148 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3232 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 
| 3149 | struct ring_buffer_event *event; | 3233 | struct ring_buffer_event *event; | 
| @@ -3158,7 +3242,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 3158 | local_irq_save(flags); | 3242 | local_irq_save(flags); | 
| 3159 | if (dolock) | 3243 | if (dolock) | 
| 3160 | spin_lock(&cpu_buffer->reader_lock); | 3244 | spin_lock(&cpu_buffer->reader_lock); | 
| 3161 | event = rb_buffer_peek(cpu_buffer, ts); | 3245 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); | 
| 3162 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3246 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 
| 3163 | rb_advance_reader(cpu_buffer); | 3247 | rb_advance_reader(cpu_buffer); | 
| 3164 | if (dolock) | 3248 | if (dolock) | 
| @@ -3200,13 +3284,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 3200 | /** | 3284 | /** | 
| 3201 | * ring_buffer_consume - return an event and consume it | 3285 | * ring_buffer_consume - return an event and consume it | 
| 3202 | * @buffer: The ring buffer to get the next event from | 3286 | * @buffer: The ring buffer to get the next event from | 
| 3287 | * @cpu: the cpu to read the buffer from | ||
| 3288 | * @ts: a variable to store the timestamp (may be NULL) | ||
| 3289 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
| 3203 | * | 3290 | * | 
| 3204 | * Returns the next event in the ring buffer, and that event is consumed. | 3291 | * Returns the next event in the ring buffer, and that event is consumed. | 
| 3205 | * Meaning, that sequential reads will keep returning a different event, | 3292 | * Meaning, that sequential reads will keep returning a different event, | 
| 3206 | * and eventually empty the ring buffer if the producer is slower. | 3293 | * and eventually empty the ring buffer if the producer is slower. | 
| 3207 | */ | 3294 | */ | 
| 3208 | struct ring_buffer_event * | 3295 | struct ring_buffer_event * | 
| 3209 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 3296 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, | 
| 3297 | unsigned long *lost_events) | ||
| 3210 | { | 3298 | { | 
| 3211 | struct ring_buffer_per_cpu *cpu_buffer; | 3299 | struct ring_buffer_per_cpu *cpu_buffer; | 
| 3212 | struct ring_buffer_event *event = NULL; | 3300 | struct ring_buffer_event *event = NULL; | 
| @@ -3227,9 +3315,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 3227 | if (dolock) | 3315 | if (dolock) | 
| 3228 | spin_lock(&cpu_buffer->reader_lock); | 3316 | spin_lock(&cpu_buffer->reader_lock); | 
| 3229 | 3317 | ||
| 3230 | event = rb_buffer_peek(cpu_buffer, ts); | 3318 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); | 
| 3231 | if (event) | 3319 | if (event) { | 
| 3320 | cpu_buffer->lost_events = 0; | ||
| 3232 | rb_advance_reader(cpu_buffer); | 3321 | rb_advance_reader(cpu_buffer); | 
| 3322 | } | ||
| 3233 | 3323 | ||
| 3234 | if (dolock) | 3324 | if (dolock) | 
| 3235 | spin_unlock(&cpu_buffer->reader_lock); | 3325 | spin_unlock(&cpu_buffer->reader_lock); | 
| @@ -3246,23 +3336,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 3246 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 3336 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 
| 3247 | 3337 | ||
| 3248 | /** | 3338 | /** | 
| 3249 | * ring_buffer_read_start - start a non consuming read of the buffer | 3339 | * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer | 
| 3250 | * @buffer: The ring buffer to read from | 3340 | * @buffer: The ring buffer to read from | 
| 3251 | * @cpu: The cpu buffer to iterate over | 3341 | * @cpu: The cpu buffer to iterate over | 
| 3252 | * | 3342 | * | 
| 3253 | * This starts up an iteration through the buffer. It also disables | 3343 | * This performs the initial preparations necessary to iterate | 
| 3254 | * the recording to the buffer until the reading is finished. | 3344 | * through the buffer. Memory is allocated, buffer recording | 
| 3255 | * This prevents the reading from being corrupted. This is not | 3345 | * is disabled, and the iterator pointer is returned to the caller. | 
| 3256 | * a consuming read, so a producer is not expected. | ||
| 3257 | * | 3346 | * | 
| 3258 | * Must be paired with ring_buffer_finish. | 3347 | * Disabling buffer recordng prevents the reading from being | 
| 3348 | * corrupted. This is not a consuming read, so a producer is not | ||
| 3349 | * expected. | ||
| 3350 | * | ||
| 3351 | * After a sequence of ring_buffer_read_prepare calls, the user is | ||
| 3352 | * expected to make at least one call to ring_buffer_prepare_sync. | ||
| 3353 | * Afterwards, ring_buffer_read_start is invoked to get things going | ||
| 3354 | * for real. | ||
| 3355 | * | ||
| 3356 | * This overall must be paired with ring_buffer_finish. | ||
| 3259 | */ | 3357 | */ | 
| 3260 | struct ring_buffer_iter * | 3358 | struct ring_buffer_iter * | 
| 3261 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | 3359 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) | 
| 3262 | { | 3360 | { | 
| 3263 | struct ring_buffer_per_cpu *cpu_buffer; | 3361 | struct ring_buffer_per_cpu *cpu_buffer; | 
| 3264 | struct ring_buffer_iter *iter; | 3362 | struct ring_buffer_iter *iter; | 
| 3265 | unsigned long flags; | ||
| 3266 | 3363 | ||
| 3267 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3364 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 
| 3268 | return NULL; | 3365 | return NULL; | 
| @@ -3276,15 +3373,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | |||
| 3276 | iter->cpu_buffer = cpu_buffer; | 3373 | iter->cpu_buffer = cpu_buffer; | 
| 3277 | 3374 | ||
| 3278 | atomic_inc(&cpu_buffer->record_disabled); | 3375 | atomic_inc(&cpu_buffer->record_disabled); | 
| 3376 | |||
| 3377 | return iter; | ||
| 3378 | } | ||
| 3379 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); | ||
| 3380 | |||
| 3381 | /** | ||
| 3382 | * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls | ||
| 3383 | * | ||
| 3384 | * All previously invoked ring_buffer_read_prepare calls to prepare | ||
| 3385 | * iterators will be synchronized. Afterwards, read_buffer_read_start | ||
| 3386 | * calls on those iterators are allowed. | ||
| 3387 | */ | ||
| 3388 | void | ||
| 3389 | ring_buffer_read_prepare_sync(void) | ||
| 3390 | { | ||
| 3279 | synchronize_sched(); | 3391 | synchronize_sched(); | 
| 3392 | } | ||
| 3393 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | ||
| 3394 | |||
| 3395 | /** | ||
| 3396 | * ring_buffer_read_start - start a non consuming read of the buffer | ||
| 3397 | * @iter: The iterator returned by ring_buffer_read_prepare | ||
| 3398 | * | ||
| 3399 | * This finalizes the startup of an iteration through the buffer. | ||
| 3400 | * The iterator comes from a call to ring_buffer_read_prepare and | ||
| 3401 | * an intervening ring_buffer_read_prepare_sync must have been | ||
| 3402 | * performed. | ||
| 3403 | * | ||
| 3404 | * Must be paired with ring_buffer_finish. | ||
| 3405 | */ | ||
| 3406 | void | ||
| 3407 | ring_buffer_read_start(struct ring_buffer_iter *iter) | ||
| 3408 | { | ||
| 3409 | struct ring_buffer_per_cpu *cpu_buffer; | ||
| 3410 | unsigned long flags; | ||
| 3411 | |||
| 3412 | if (!iter) | ||
| 3413 | return; | ||
| 3414 | |||
| 3415 | cpu_buffer = iter->cpu_buffer; | ||
| 3280 | 3416 | ||
| 3281 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3417 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 
| 3282 | arch_spin_lock(&cpu_buffer->lock); | 3418 | arch_spin_lock(&cpu_buffer->lock); | 
| 3283 | rb_iter_reset(iter); | 3419 | rb_iter_reset(iter); | 
| 3284 | arch_spin_unlock(&cpu_buffer->lock); | 3420 | arch_spin_unlock(&cpu_buffer->lock); | 
| 3285 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3421 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 
| 3286 | |||
| 3287 | return iter; | ||
| 3288 | } | 3422 | } | 
| 3289 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 3423 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 
| 3290 | 3424 | ||
| @@ -3378,6 +3512,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 3378 | cpu_buffer->write_stamp = 0; | 3512 | cpu_buffer->write_stamp = 0; | 
| 3379 | cpu_buffer->read_stamp = 0; | 3513 | cpu_buffer->read_stamp = 0; | 
| 3380 | 3514 | ||
| 3515 | cpu_buffer->lost_events = 0; | ||
| 3516 | cpu_buffer->last_overrun = 0; | ||
| 3517 | |||
| 3381 | rb_head_page_activate(cpu_buffer); | 3518 | rb_head_page_activate(cpu_buffer); | 
| 3382 | } | 3519 | } | 
| 3383 | 3520 | ||
| @@ -3653,6 +3790,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3653 | struct ring_buffer_event *event; | 3790 | struct ring_buffer_event *event; | 
| 3654 | struct buffer_data_page *bpage; | 3791 | struct buffer_data_page *bpage; | 
| 3655 | struct buffer_page *reader; | 3792 | struct buffer_page *reader; | 
| 3793 | unsigned long missed_events; | ||
| 3656 | unsigned long flags; | 3794 | unsigned long flags; | 
| 3657 | unsigned int commit; | 3795 | unsigned int commit; | 
| 3658 | unsigned int read; | 3796 | unsigned int read; | 
| @@ -3689,6 +3827,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3689 | read = reader->read; | 3827 | read = reader->read; | 
| 3690 | commit = rb_page_commit(reader); | 3828 | commit = rb_page_commit(reader); | 
| 3691 | 3829 | ||
| 3830 | /* Check if any events were dropped */ | ||
| 3831 | missed_events = cpu_buffer->lost_events; | ||
| 3832 | |||
| 3692 | /* | 3833 | /* | 
| 3693 | * If this page has been partially read or | 3834 | * If this page has been partially read or | 
| 3694 | * if len is not big enough to read the rest of the page or | 3835 | * if len is not big enough to read the rest of the page or | 
| @@ -3749,9 +3890,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3749 | local_set(&reader->entries, 0); | 3890 | local_set(&reader->entries, 0); | 
| 3750 | reader->read = 0; | 3891 | reader->read = 0; | 
| 3751 | *data_page = bpage; | 3892 | *data_page = bpage; | 
| 3893 | |||
| 3894 | /* | ||
| 3895 | * Use the real_end for the data size, | ||
| 3896 | * This gives us a chance to store the lost events | ||
| 3897 | * on the page. | ||
| 3898 | */ | ||
| 3899 | if (reader->real_end) | ||
| 3900 | local_set(&bpage->commit, reader->real_end); | ||
| 3752 | } | 3901 | } | 
| 3753 | ret = read; | 3902 | ret = read; | 
| 3754 | 3903 | ||
| 3904 | cpu_buffer->lost_events = 0; | ||
| 3905 | |||
| 3906 | commit = local_read(&bpage->commit); | ||
| 3907 | /* | ||
| 3908 | * Set a flag in the commit field if we lost events | ||
| 3909 | */ | ||
| 3910 | if (missed_events) { | ||
| 3911 | /* If there is room at the end of the page to save the | ||
| 3912 | * missed events, then record it there. | ||
| 3913 | */ | ||
| 3914 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { | ||
| 3915 | memcpy(&bpage->data[commit], &missed_events, | ||
| 3916 | sizeof(missed_events)); | ||
| 3917 | local_add(RB_MISSED_STORED, &bpage->commit); | ||
| 3918 | commit += sizeof(missed_events); | ||
| 3919 | } | ||
| 3920 | local_add(RB_MISSED_EVENTS, &bpage->commit); | ||
| 3921 | } | ||
| 3922 | |||
| 3923 | /* | ||
| 3924 | * This page may be off to user land. Zero it out here. | ||
| 3925 | */ | ||
| 3926 | if (commit < BUF_PAGE_SIZE) | ||
| 3927 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); | ||
| 3928 | |||
| 3755 | out_unlock: | 3929 | out_unlock: | 
| 3756 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3930 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 
| 3757 | 3931 | ||
