From a0f320f48799f67329fcb1b26ff0451c304e1dde Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 20 Sep 2009 16:31:16 +0530 Subject: includecheck fix: kernel/trace, ring_buffer.c fix the following 'make includecheck' warning: kernel/trace/ring_buffer.c: trace.h is included more than once. Signed-off-by: Jaswinder Singh Rajput Cc: Steven Rostedt Cc: Ingo Molnar Cc: Sam Ravnborg LKML-Reference: <1247068617.4382.107.camel@ht.satnam> --- kernel/trace/ring_buffer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6eef38923b07..d4ff01970547 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -201,8 +201,6 @@ int tracing_is_on(void) } EXPORT_SYMBOL_GPL(tracing_is_on); -#include "trace.h" - #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) -- cgit v1.2.2 From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:50 -0500 Subject: tracing/events: Add 'signed' field to format files The sign info used for filters in the kernel is also useful to applications that process the trace stream. Add it to the format files and make it available to userspace. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d4ff01970547..e43c928356ee 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s) int ret; ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\n", - (unsigned int)sizeof(field.time_stamp)); + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit)); + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE); + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); return ret; } -- cgit v1.2.2 From 67b394f7f26d84edb7294cc6528ab7ca6daa2ad1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 23 Oct 2009 19:36:18 -0400 Subject: tracing: Fix comment typo and documentation example Trivial patch to fix a documentation example and to fix a comment. Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20091023233646.871719877@goodmis.org> Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d4ff01970547..217f6991184f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2681,7 +2681,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) EXPORT_SYMBOL_GPL(ring_buffer_entries); /** - * ring_buffer_overrun_cpu - get the number of overruns in buffer + * ring_buffer_overruns - get the number of overruns in buffer * @buffer: The ring buffer * * Returns the total number of overruns in the ring buffer -- cgit v1.2.2 From 6d3f1e12f46a2f9a1bb7e7aa433df8dd31ce5647 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 23 Oct 2009 19:36:19 -0400 Subject: tracing: Remove cpu arg from the rb_time_stamp() function The cpu argument is not used inside the rb_time_stamp() function. Plus fix a typo. Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20091023233647.118547500@goodmis.org> Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 217f6991184f..3ffa502fb243 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -483,7 +483,7 @@ struct ring_buffer_iter { /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 -static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) +static inline u64 rb_time_stamp(struct ring_buffer *buffer) { /* shift to debug/test normalization and TIME_EXTENTS */ return buffer->clock() << DEBUG_SHIFT; @@ -494,7 +494,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) u64 time; preempt_disable_notrace(); - time = rb_time_stamp(buffer, cpu); + time = rb_time_stamp(buffer); preempt_enable_no_resched_notrace(); return time; @@ -599,7 +599,7 @@ static struct list_head *rb_list_head(struct list_head *list) } /* - * rb_is_head_page - test if the give page is the head page + * rb_is_head_page - test if the given page is the head page * * Because the reader may move the head_page pointer, we can * not trust what the head page is (it may be pointing to @@ -1868,7 +1868,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * Nested commits always have zero deltas, so * just reread the time stamp */ - *ts = rb_time_stamp(buffer, cpu_buffer->cpu); + *ts = rb_time_stamp(buffer); next_page->page->time_stamp = *ts; } @@ -2111,7 +2111,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) goto out_fail; - ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); + ts = rb_time_stamp(cpu_buffer->buffer); /* * Only the first commit can update the timestamp. -- cgit v1.2.2 From f7112949f6a4cd6883d66c882d568c2197321de6 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 Nov 2009 19:42:45 +0800 Subject: ring-buffer: Synchronize resizing buffer with reader lock We got a sudden panic when we reduced the size of the ringbuffer. We can reproduce the panic by the following steps: echo 1 > events/sched/enable cat trace_pipe > /dev/null & while ((1)) do echo 12000 > buffer_size_kb echo 512 > buffer_size_kb done (not more than 5 seconds, panic ...) Reported-by: KOSAKI Motohiro Signed-off-by: Lai Jiangshan LKML-Reference: <4AF01735.9060409@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3ffa502fb243..5dd017fea6f5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1193,6 +1193,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) atomic_inc(&cpu_buffer->record_disabled); synchronize_sched(); + spin_lock_irq(&cpu_buffer->reader_lock); rb_head_page_deactivate(cpu_buffer); for (i = 0; i < nr_pages; i++) { @@ -1207,6 +1208,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) return; rb_reset_cpu(cpu_buffer); + spin_unlock_irq(&cpu_buffer->reader_lock); rb_check_pages(cpu_buffer); -- cgit v1.2.2 From 5a50e33cc916f6a81cb96f0f24f6a88c9ab78b79 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Nov 2009 08:43:01 -0500 Subject: ring-buffer: Move access to commit_page up into function used With the change of the way we process commits. Where a commit only happens at the outer most level, and that we don't need to worry about a commit ending after the rb_start_commit() has been called, the code use to grab the commit page before the tail page to prevent a possible race. But this race no longer exists with the rb_start_commit() rb_end_commit() interface. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3ffa502fb243..4b8293fa545e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1785,9 +1785,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, static struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long length, unsigned long tail, - struct buffer_page *commit_page, struct buffer_page *tail_page, u64 *ts) { + struct buffer_page *commit_page = cpu_buffer->commit_page; struct ring_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; @@ -1890,13 +1890,10 @@ static struct ring_buffer_event * __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, unsigned type, unsigned long length, u64 *ts) { - struct buffer_page *tail_page, *commit_page; + struct buffer_page *tail_page; struct ring_buffer_event *event; unsigned long tail, write; - commit_page = cpu_buffer->commit_page; - /* we just need to protect against interrupts */ - barrier(); tail_page = cpu_buffer->tail_page; write = local_add_return(length, &tail_page->write); @@ -1907,7 +1904,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (write > BUF_PAGE_SIZE) return rb_move_tail(cpu_buffer, length, tail, - commit_page, tail_page, ts); + tail_page, ts); /* We reserved something on the buffer */ -- cgit v1.2.2 From 184210154b9aa570099183f6c062ac4eb11190b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Dec 2009 22:54:27 -0500 Subject: ring-buffer: Use sync sched protection on ring buffer resizing There was a comment in the ring buffer code that says the calling layers should prevent tracing or reading of the ring buffer while resizing. I have discovered that the tracers do not honor this arrangement. This patch moves the disabling and synchronizing the ring buffer to a higher layer during resizing. This guarantees that no writes are occurring while the resize takes place. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1ca4956ab5e..0d64c51ab4df 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1193,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) struct list_head *p; unsigned i; - atomic_inc(&cpu_buffer->record_disabled); - synchronize_sched(); - spin_lock_irq(&cpu_buffer->reader_lock); rb_head_page_deactivate(cpu_buffer); @@ -1214,9 +1211,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) spin_unlock_irq(&cpu_buffer->reader_lock); rb_check_pages(cpu_buffer); - - atomic_dec(&cpu_buffer->record_disabled); - } static void @@ -1227,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, struct list_head *p; unsigned i; - atomic_inc(&cpu_buffer->record_disabled); - synchronize_sched(); - spin_lock_irq(&cpu_buffer->reader_lock); rb_head_page_deactivate(cpu_buffer); @@ -1245,8 +1236,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, spin_unlock_irq(&cpu_buffer->reader_lock); rb_check_pages(cpu_buffer); - - atomic_dec(&cpu_buffer->record_disabled); } /** @@ -1254,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, * @buffer: the buffer to resize. * @size: the new size. * - * The tracer is responsible for making sure that the buffer is - * not being used while changing the size. - * Note: We may be able to change the above requirement by using - * RCU synchronizations. - * * Minimum size is 2 * BUF_PAGE_SIZE. * * Returns -1 on failure. @@ -1290,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) if (size == buffer_size) return size; + atomic_inc(&buffer->record_disabled); + + /* Make sure all writers are done with this buffer. */ + synchronize_sched(); + mutex_lock(&buffer->mutex); get_online_cpus(); @@ -1352,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) put_online_cpus(); mutex_unlock(&buffer->mutex); + atomic_dec(&buffer->record_disabled); + return size; free_pages: @@ -1361,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) } put_online_cpus(); mutex_unlock(&buffer->mutex); + atomic_dec(&buffer->record_disabled); return -ENOMEM; /* @@ -1370,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) out_fail: put_online_cpus(); mutex_unlock(&buffer->mutex); + atomic_dec(&buffer->record_disabled); return -1; } EXPORT_SYMBOL_GPL(ring_buffer_resize); -- cgit v1.2.2 From dd7f59435782a02ceb6d16b9ce823dd3345d75ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Dec 2009 23:20:52 -0500 Subject: ring-buffer: Move resize integrity check under reader lock While using an application that does splice on the ftrace ring buffer at start up, I triggered an integrity check failure. Looking into this, I discovered that resizing the buffer performs an integrity check after the buffer is resized. This check unfortunately is preformed after it releases the reader lock. If a reader is reading the buffer it may cause the integrity check to trigger a false failure. This patch simply moves the integrity checker under the protection of the ring buffer reader lock. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0d64c51ab4df..eccb4cf1e998 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1208,9 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) return; rb_reset_cpu(cpu_buffer); - spin_unlock_irq(&cpu_buffer->reader_lock); - rb_check_pages(cpu_buffer); + + spin_unlock_irq(&cpu_buffer->reader_lock); } static void @@ -1233,9 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, list_add_tail(&bpage->list, cpu_buffer->pages); } rb_reset_cpu(cpu_buffer); - spin_unlock_irq(&cpu_buffer->reader_lock); - rb_check_pages(cpu_buffer); + + spin_unlock_irq(&cpu_buffer->reader_lock); } /** -- cgit v1.2.2 From 445c89514be242b1b0080056d50bdc1b72adeb5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Dec 2009 19:49:50 +0100 Subject: locking: Convert raw_spinlock to arch_spinlock The raw_spin* namespace was taken by lockdep for the architecture specific implementations. raw_spin_* would be the ideal name space for the spinlocks which are not converted to sleeping locks in preempt-rt. Linus suggested to convert the raw_ to arch_ locks and cleanup the name space instead of using an artifical name like core_spin, atomic_spin or whatever No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1ca4956ab5e..5ac8ee0a9e35 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -423,7 +423,7 @@ struct ring_buffer_per_cpu { int cpu; struct ring_buffer *buffer; spinlock_t reader_lock; /* serialize readers */ - raw_spinlock_t lock; + arch_spinlock_t lock; struct lock_class_key lock_key; struct list_head *pages; struct buffer_page *head_page; /* read from head */ @@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->buffer = buffer; spin_lock_init(&cpu_buffer->reader_lock); lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); - cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + cpu_buffer->lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); -- cgit v1.2.2 From edc35bd72e2079b25f99c5da7d7a65dbbffc4a26 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Dec 2009 12:38:57 +0100 Subject: locking: Rename __RAW_SPIN_LOCK_UNLOCKED to __ARCH_SPIN_LOCK_UNLOCKED Further name space cleanup. No functional change Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5ac8ee0a9e35..fb7a0fa508b9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->buffer = buffer; spin_lock_init(&cpu_buffer->reader_lock); lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); - cpu_buffer->lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); -- cgit v1.2.2 From 0199c4e68d1f02894bdefe4b5d9e9ee4aedd8d62 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Dec 2009 20:01:25 +0100 Subject: locking: Convert __raw_spin* functions to arch_spin* Name space cleanup. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- kernel/trace/ring_buffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index fb7a0fa508b9..f58c9ad15830 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2834,7 +2834,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) int ret; local_irq_save(flags); - __raw_spin_lock(&cpu_buffer->lock); + arch_spin_lock(&cpu_buffer->lock); again: /* @@ -2923,7 +2923,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto again; out: - __raw_spin_unlock(&cpu_buffer->lock); + arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return reader; @@ -3286,9 +3286,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) synchronize_sched(); spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - __raw_spin_lock(&cpu_buffer->lock); + arch_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); - __raw_spin_unlock(&cpu_buffer->lock); + arch_spin_unlock(&cpu_buffer->lock); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return iter; @@ -3408,11 +3408,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) goto out; - __raw_spin_lock(&cpu_buffer->lock); + arch_spin_lock(&cpu_buffer->lock); rb_reset_cpu(cpu_buffer); - __raw_spin_unlock(&cpu_buffer->lock); + arch_spin_unlock(&cpu_buffer->lock); out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); -- cgit v1.2.2 From 5ded3dc6a3c7549b36a8ac27bbd81b33756a2c29 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 6 Jan 2010 17:12:07 -0800 Subject: ring-buffer: Wrap a list.next reference with rb_list_head() This reference at the end of rb_get_reader_page() was causing off-by-one writes to the prev pointer of the page after the reader page when that page is the head page, and therefore the reader page has the RB_PAGE_HEAD flag in its list.next pointer. This eventually results in a GPF in a subsequent call to rb_set_head_page() (usually from rb_get_reader_page()) when that prev pointer is dereferenced. The dereferenced register would characteristically have an address that appears shifted left by one byte (eg, ffxxxxxxxxxxxxyy instead of ffffxxxxxxxxxxxx) due to being written at an address one byte too high. Signed-off-by: David Sharp LKML-Reference: <1262826727-9090-1-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c4..d5b7308b7e1b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2906,7 +2906,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * * Now make the new head point back to the reader page. */ - reader->list.next->prev = &cpu_buffer->reader_page->list; + rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; rb_inc_page(cpu_buffer, &cpu_buffer->head_page); /* Finally update the reader page to the new head */ -- cgit v1.2.2 From 0e1ff5d72a6393f2ef5dbf74f58bb55a12d63834 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jan 2010 20:40:44 -0500 Subject: ring-buffer: Add rb_list_head() wrapper around new reader page next field If the very unlikely case happens where the writer moves the head by one between where the head page is read and where the new reader page is assigned _and_ the writer then writes and wraps the entire ring buffer so that the head page is back to what was originally read as the head page, the page to be swapped will have a corrupted next pointer. Simple solution is to wrap the assignment of the next pointer with a rb_list_head(). Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d5b7308b7e1b..edefe3b2801b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2869,7 +2869,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Splice the empty reader page into the list around the head. */ reader = rb_set_head_page(cpu_buffer); - cpu_buffer->reader_page->list.next = reader->list.next; + cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); cpu_buffer->reader_page->list.prev = reader->list.prev; /* -- cgit v1.2.2 From 492a74f4210e15f4701422e2e1c4cd3c1e45ddae Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 25 Jan 2010 15:17:47 -0500 Subject: ring-buffer: Check if ring buffer iterator has stale data Usually reads of the ring buffer is performed by a single task. There are two types of reads from the ring buffer. One is a consuming read which will consume the entry that was read and the next read will be the entry that follows. The other is an iterator that will let the user read the contents of the ring buffer without modifying it. When an iterator is allocated, writes to the ring buffer are disabled to protect the iterator. The problem exists when consuming reads happen while an iterator is allocated. Specifically, the kind of read that swaps out an entire page (used by splice) and replaces it with a new read. If the iterator is on the page that is swapped out, then the next read may read from this swapped out page and return garbage. This patch adds a check when reading the iterator to make sure that the iterator contents are still valid. If a consuming read has taken place, the iterator is reset. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index edefe3b2801b..503b630e0bda 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -464,6 +464,8 @@ struct ring_buffer_iter { struct ring_buffer_per_cpu *cpu_buffer; unsigned long head; struct buffer_page *head_page; + struct buffer_page *cache_reader_page; + unsigned long cache_read; u64 read_stamp; }; @@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) iter->read_stamp = cpu_buffer->read_stamp; else iter->read_stamp = iter->head_page->page->time_stamp; + iter->cache_reader_page = cpu_buffer->reader_page; + iter->cache_read = cpu_buffer->read; } /** @@ -3066,6 +3070,15 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; + /* + * Check if someone performed a consuming read to + * the buffer. A consuming read invalidates the iterator + * and we need to reset the iterator in this case. + */ + if (unlikely(iter->cache_read != cpu_buffer->read || + iter->cache_reader_page != cpu_buffer->reader_page)) + rb_iter_reset(iter); + again: /* * We repeat when a timestamp is encountered. -- cgit v1.2.2 From 3c05d7482777f15e71bb4cb1ba78dee2800dfec6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 26 Jan 2010 16:14:08 -0500 Subject: ring-buffer: Check for end of page in iterator If the iterator comes to an empty page for some reason, or if the page is emptied by a consuming read. The iterator code currently does not check if the iterator is pass the contents, and may return a false entry. This patch adds a check to the ring buffer iterator to test if the current page has been completely read and sets the iterator to the next page if necessary. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 503b630e0bda..8c1b2d290718 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3064,9 +3064,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_event *event; int nr_loops = 0; - if (ring_buffer_iter_empty(iter)) - return NULL; - cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; @@ -3080,6 +3077,9 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) rb_iter_reset(iter); again: + if (ring_buffer_iter_empty(iter)) + return NULL; + /* * We repeat when a timestamp is encountered. * We can get multiple timestamps by nested interrupts or also @@ -3094,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) if (rb_per_cpu_empty(cpu_buffer)) return NULL; + if (iter->head >= local_read(&iter->head_page->page->commit)) { + rb_inc_iter(iter); + goto again; + } + event = rb_iter_head_event(iter); switch (event->type_len) { -- cgit v1.2.2