From f6195aa09e618d712f52bf4fa33b5293820eb93d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Sep 2010 12:23:12 -0400 Subject: ring-buffer: Place duplicate expression into a single function While discussing the strictness of the 80 character limit on the Kernel Summit Discussion mailing list, I showed examples that I broke that limit slightly with some algorithms. In discussing with John Linville, what looked better, I realized that two of the 80 char breaking culprits were an identical expression. As a clean up, this patch moves the identical expression into its own helper function and that is used instead. As a side effect, the offending code is now under the 80 character limit. :-) This clean up code also changes the expression from (A - B) - C to A - (B + C) This makes the code look a little nicer too. Cc: John W. Linville Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 19cccc3c3028..ef27017caa56 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); +/* + * The total entries in the ring buffer is the running counter + * of entries entered into the ring buffer, minus the sum of + * the entries read from the ring buffer and the number of + * entries that were overwritten. + */ +static inline unsigned long +rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) +{ + return local_read(&cpu_buffer->entries) - + (local_read(&cpu_buffer->overrun) + cpu_buffer->read); +} + /** * ring_buffer_entries_cpu - get the number of entries in a cpu buffer * @buffer: The ring buffer @@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; - ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) - - cpu_buffer->read; - return ret; + return rb_num_of_entries(cpu_buffer); } EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); @@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) /* if you care about this being correct, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - entries += (local_read(&cpu_buffer->entries) - - local_read(&cpu_buffer->overrun)) - cpu_buffer->read; + entries += rb_num_of_entries(cpu_buffer); } return entries; -- cgit v1.2.2 From 6038f373a3dc1f1c26496e60b6c40b164716f07e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 15 Aug 2010 18:52:59 +0200 Subject: llseek: automatically add .llseek fop All file_operations should get a .llseek operation so we can make nonseekable_open the default for future file operations without a .llseek pointer. The three cases that we can automatically detect are no_llseek, seq_lseek and default_llseek. For cases where we can we can automatically prove that the file offset is always ignored, we use noop_llseek, which maintains the current behavior of not returning an error from a seek. New drivers should normally not use noop_llseek but instead use no_llseek and call nonseekable_open at open time. Existing drivers can be converted to do the same when the maintainer knows for certain that no user code relies on calling seek on the device file. The generated code is often incorrectly indented and right now contains comments that clarify for each added line why a specific variant was chosen. In the version that gets submitted upstream, the comments will be gone and I will manually fix the indentation, because there does not seem to be a way to do that using coccinelle. Some amount of new code is currently sitting in linux-next that should get the same modifications, which I will do at the end of the merge window. Many thanks to Julia Lawall for helping me learn to write a semantic patch that does all this. ===== begin semantic patch ===== // This adds an llseek= method to all file operations, // as a preparation for making no_llseek the default. // // The rules are // - use no_llseek explicitly if we do nonseekable_open // - use seq_lseek for sequential files // - use default_llseek if we know we access f_pos // - use noop_llseek if we know we don't access f_pos, // but we still want to allow users to call lseek // @ open1 exists @ identifier nested_open; @@ nested_open(...) { <+... nonseekable_open(...) ...+> } @ open exists@ identifier open_f; identifier i, f; identifier open1.nested_open; @@ int open_f(struct inode *i, struct file *f) { <+... ( nonseekable_open(...) | nested_open(...) ) ...+> } @ read disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ read_no_fpos disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { ... when != off } @ write @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ write_no_fpos @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { ... when != off } @ fops0 @ identifier fops; @@ struct file_operations fops = { ... }; @ has_llseek depends on fops0 @ identifier fops0.fops; identifier llseek_f; @@ struct file_operations fops = { ... .llseek = llseek_f, ... }; @ has_read depends on fops0 @ identifier fops0.fops; identifier read_f; @@ struct file_operations fops = { ... .read = read_f, ... }; @ has_write depends on fops0 @ identifier fops0.fops; identifier write_f; @@ struct file_operations fops = { ... .write = write_f, ... }; @ has_open depends on fops0 @ identifier fops0.fops; identifier open_f; @@ struct file_operations fops = { ... .open = open_f, ... }; // use no_llseek if we call nonseekable_open //////////////////////////////////////////// @ nonseekable1 depends on !has_llseek && has_open @ identifier fops0.fops; identifier nso ~= "nonseekable_open"; @@ struct file_operations fops = { ... .open = nso, ... +.llseek = no_llseek, /* nonseekable */ }; @ nonseekable2 depends on !has_llseek @ identifier fops0.fops; identifier open.open_f; @@ struct file_operations fops = { ... .open = open_f, ... +.llseek = no_llseek, /* open uses nonseekable */ }; // use seq_lseek for sequential files ///////////////////////////////////// @ seq depends on !has_llseek @ identifier fops0.fops; identifier sr ~= "seq_read"; @@ struct file_operations fops = { ... .read = sr, ... +.llseek = seq_lseek, /* we have seq_read */ }; // use default_llseek if there is a readdir /////////////////////////////////////////// @ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier readdir_e; @@ // any other fop is used that changes pos struct file_operations fops = { ... .readdir = readdir_e, ... +.llseek = default_llseek, /* readdir is present */ }; // use default_llseek if at least one of read/write touches f_pos ///////////////////////////////////////////////////////////////// @ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read.read_f; @@ // read fops use offset struct file_operations fops = { ... .read = read_f, ... +.llseek = default_llseek, /* read accesses f_pos */ }; @ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, ... + .llseek = default_llseek, /* write accesses f_pos */ }; // Use noop_llseek if neither read nor write accesses f_pos /////////////////////////////////////////////////////////// @ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; identifier write_no_fpos.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, .read = read_f, ... +.llseek = noop_llseek, /* read and write both use no f_pos */ }; @ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write_no_fpos.write_f; @@ struct file_operations fops = { ... .write = write_f, ... +.llseek = noop_llseek, /* write uses no f_pos */ }; @ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; @@ struct file_operations fops = { ... .read = read_f, ... +.llseek = noop_llseek, /* read uses no f_pos */ }; @ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; @@ struct file_operations fops = { ... +.llseek = noop_llseek, /* no read or write fn */ }; ===== End semantic patch ===== Signed-off-by: Arnd Bergmann Cc: Julia Lawall Cc: Christoph Hellwig --- kernel/trace/ring_buffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 492197e2f86c..3aea966d16de 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3965,6 +3965,7 @@ static const struct file_operations rb_simple_fops = { .open = tracing_open_generic, .read = rb_simple_read, .write = rb_simple_write, + .llseek = default_llseek, }; -- cgit v1.2.2 From 747e94ae3d1b4c9bf5380e569f614eb9040b79e7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 8 Oct 2010 13:51:48 -0400 Subject: ring-buffer: Make write slow path out of line Gcc inlines the slow path of the ring buffer write which can hurt performance. This patch simply forces the slow path function rb_move_tail() to always be a function. The ring_buffer_benchmark module with reader_disabled=1 shows that this patch changes the time to record an event from 135 ns to 132 ns. (3 ns or 2.22% improvement) Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bca96377fd4e..0b88df849a59 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1823,7 +1823,10 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, local_sub(length, &tail_page->write); } -static struct ring_buffer_event * +/* + * This is the slow path, force gcc not to inline it. + */ +static noinline struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long length, unsigned long tail, struct buffer_page *tail_page, u64 *ts) @@ -1943,7 +1946,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, tail = write - length; /* See if we shot pass the end of this buffer page */ - if (write > BUF_PAGE_SIZE) + if (unlikely(write > BUF_PAGE_SIZE)) return rb_move_tail(cpu_buffer, length, tail, tail_page, ts); -- cgit v1.2.2 From e8bc43e84fada397af1b677b07dbf26e6ac78fcc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 Oct 2010 10:58:02 -0400 Subject: ring-buffer: Pass timestamp by value and not by reference The original code for the ring buffer had locations that modified the timestamp and that change was used by the callers. Now, the timestamp is not reused by the callers and there is no reason to pass it by reference. By changing the call to pass by value, lets gcc optimize the code a bit more where it can store the timestamp in a register and not worry about updating the reference. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0b88df849a59..c8ce6bde7fa4 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1829,7 +1829,7 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, static noinline struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long length, unsigned long tail, - struct buffer_page *tail_page, u64 *ts) + struct buffer_page *tail_page, u64 ts) { struct buffer_page *commit_page = cpu_buffer->commit_page; struct ring_buffer *buffer = cpu_buffer->buffer; @@ -1912,8 +1912,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * Nested commits always have zero deltas, so * just reread the time stamp */ - *ts = rb_time_stamp(buffer); - next_page->page->time_stamp = *ts; + ts = rb_time_stamp(buffer); + next_page->page->time_stamp = ts; } out_again: @@ -1932,7 +1932,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, static struct ring_buffer_event * __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, - unsigned type, unsigned long length, u64 *ts) + unsigned type, unsigned long length, u64 ts) { struct buffer_page *tail_page; struct ring_buffer_event *event; @@ -1965,7 +1965,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * its timestamp. */ if (!tail) - tail_page->page->time_stamp = *ts; + tail_page->page->time_stamp = ts; return event; } @@ -2008,7 +2008,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, static int rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, - u64 *ts, u64 *delta) + u64 ts, u64 *delta) { struct ring_buffer_event *event; int ret; @@ -2016,7 +2016,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, WARN_ONCE(*delta > (1ULL << 59), KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", (unsigned long long)*delta, - (unsigned long long)*ts, + (unsigned long long)ts, (unsigned long long)cpu_buffer->write_stamp); /* @@ -2051,7 +2051,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, event->array[0] = 0; } } - cpu_buffer->write_stamp = *ts; + cpu_buffer->write_stamp = ts; /* let the caller know this was the commit */ ret = 1; } else { @@ -2175,7 +2175,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { - commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); + commit = rb_add_time_stamp(cpu_buffer, ts, &delta); if (commit == -EBUSY) goto out_fail; @@ -2187,7 +2187,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, } get_event: - event = __rb_reserve_next(cpu_buffer, 0, length, &ts); + event = __rb_reserve_next(cpu_buffer, 0, length, ts); if (unlikely(PTR_ERR(event) == -EAGAIN)) goto again; -- cgit v1.2.2 From f25106aeab7408394b9dd707e5ecf557e269c723 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 Oct 2010 12:40:12 -0400 Subject: ring-buffer: Pass delta by value and not by reference The delta between events is passed to the timestamp code by reference and the timestamp code will reset the value. But it can be reset from the caller. No need to pass it in by reference. By changing the call to pass by value, lets gcc optimize the code a bit more where it can store the delta in a register and not worry about updating the reference. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c8ce6bde7fa4..3af77cd47f21 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2008,14 +2008,14 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, static int rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, - u64 ts, u64 *delta) + u64 ts, u64 delta) { struct ring_buffer_event *event; int ret; - WARN_ONCE(*delta > (1ULL << 59), + WARN_ONCE(delta > (1ULL << 59), KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", - (unsigned long long)*delta, + (unsigned long long)delta, (unsigned long long)ts, (unsigned long long)cpu_buffer->write_stamp); @@ -2041,8 +2041,8 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, * and if we can't just make it zero. */ if (rb_event_index(event)) { - event->time_delta = *delta & TS_MASK; - event->array[0] = *delta >> TS_SHIFT; + event->time_delta = delta & TS_MASK; + event->array[0] = delta >> TS_SHIFT; } else { /* try to discard, since we do not need this */ if (!rb_try_to_discard(cpu_buffer, event)) { @@ -2064,8 +2064,6 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, ret = 0; } - *delta = 0; - return ret; } @@ -2175,7 +2173,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { - commit = rb_add_time_stamp(cpu_buffer, ts, &delta); + commit = rb_add_time_stamp(cpu_buffer, ts, delta); + delta = 0; + if (commit == -EBUSY) goto out_fail; -- cgit v1.2.2 From 69d1b839f7eee347e357b3f6cce7f630cc6ff93d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 7 Oct 2010 18:18:05 -0400 Subject: ring-buffer: Bind time extend and data events together When the time between two timestamps is greater than 2^27 nanosecs (~134 ms) a time extend event is added that extends the time difference to 59 bits (~18 years). This is due to events only having a 27 bit field to store time. Currently this time extend is a separate event. We add it just before the event data that is being written to the buffer. But before the event data is committed, the event data can also be discarded (as with the case of filters). But because the time extend has already been committed, it will stay in the buffer. If lots of events are being filtered and no event is being written, then every 134ms a time extend can be added to the buffer without any data attached. To keep from filling the entire buffer with time extends, a time extend will never be the first event in a page because the page timestamp can be used. Time extends can only fill the rest of a page with some data at the beginning. This patch binds the time extend with the data. The difference here is that the time extend is not committed before the data is added. Instead, when a time extend is needed, the space reserved on the ring buffer is the time extend + the data event size. The time extend is added to the first part of the reserved block and the data is added to the second. The time extend event is passed back to the reserver, but since the reserver also uses a function to find the data portion of the reserved block, no changes to the ring buffer interface need to be made. When a commit is discarded, we now remove both the time extend and the event. With this approach no more than one time extend can be in the buffer in a row. Data must always follow a time extend. Thanks to Mathieu Desnoyers for suggesting this idea. Suggested-by: Mathieu Desnoyers Cc: Thomas Gleixner Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 266 ++++++++++++++++++++++++--------------------- 1 file changed, 142 insertions(+), 124 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3af77cd47f21..f50f43107e93 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -224,6 +224,9 @@ enum { RB_LEN_TIME_STAMP = 16, }; +#define skip_time_extend(event) \ + ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) + static inline int rb_null_event(struct ring_buffer_event *event) { return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; @@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) return length + RB_EVNT_HDR_SIZE; } -/* inline for ring buffer fast paths */ -static unsigned +/* + * Return the length of the given event. Will return + * the length of the time extend if the event is a + * time extend. + */ +static inline unsigned rb_event_length(struct ring_buffer_event *event) { switch (event->type_len) { @@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) return 0; } +/* + * Return total length of time extend and data, + * or just the event length for all other events. + */ +static inline unsigned +rb_event_ts_length(struct ring_buffer_event *event) +{ + unsigned len = 0; + + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { + /* time extends include the data event after it */ + len = RB_LEN_TIME_EXTEND; + event = skip_time_extend(event); + } + return len + rb_event_length(event); +} + /** * ring_buffer_event_length - return the length of the event * @event: the event to get the length of + * + * Returns the size of the data load of a data event. + * If the event is something other than a data event, it + * returns the size of the event itself. With the exception + * of a TIME EXTEND, where it still returns the size of the + * data load of the data event after it. */ unsigned ring_buffer_event_length(struct ring_buffer_event *event) { - unsigned length = rb_event_length(event); + unsigned length; + + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) + event = skip_time_extend(event); + + length = rb_event_length(event); if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) return length; length -= RB_EVNT_HDR_SIZE; @@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); static void * rb_event_data(struct ring_buffer_event *event) { + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) + event = skip_time_extend(event); BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ if (event->type_len) @@ -1546,6 +1583,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) iter->head = 0; } +/* Slow path, do not inline */ +static noinline struct ring_buffer_event * +rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) +{ + event->type_len = RINGBUF_TYPE_TIME_EXTEND; + + /* Not the first event on the page? */ + if (rb_event_index(event)) { + event->time_delta = delta & TS_MASK; + event->array[0] = delta >> TS_SHIFT; + } else { + /* nope, just zero it */ + event->time_delta = 0; + event->array[0] = 0; + } + + return skip_time_extend(event); +} + /** * ring_buffer_update_event - update event type and data * @event: the even to update @@ -1558,28 +1614,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) * data field. */ static void -rb_update_event(struct ring_buffer_event *event, - unsigned type, unsigned length) +rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event, unsigned length, + int add_timestamp, u64 delta) { - event->type_len = type; - - switch (type) { - - case RINGBUF_TYPE_PADDING: - case RINGBUF_TYPE_TIME_EXTEND: - case RINGBUF_TYPE_TIME_STAMP: - break; + /* Only a commit updates the timestamp */ + if (unlikely(!rb_event_is_commit(cpu_buffer, event))) + delta = 0; - case 0: - length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) - event->array[0] = length; - else - event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); - break; - default: - BUG(); + /* + * If we need to add a timestamp, then we + * add it to the start of the resevered space. + */ + if (unlikely(add_timestamp)) { + event = rb_add_time_stamp(event, delta); + length -= RB_LEN_TIME_EXTEND; + delta = 0; } + + event->time_delta = delta; + length -= RB_EVNT_HDR_SIZE; + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { + event->type_len = 0; + event->array[0] = length; + } else + event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); } /* @@ -1932,12 +1991,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, static struct ring_buffer_event * __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, - unsigned type, unsigned long length, u64 ts) + unsigned long length, u64 ts, + u64 delta, int add_timestamp) { struct buffer_page *tail_page; struct ring_buffer_event *event; unsigned long tail, write; + /* + * If the time delta since the last event is too big to + * hold in the time field of the event, then we append a + * TIME EXTEND event ahead of the data event. + */ + if (unlikely(add_timestamp)) + length += RB_LEN_TIME_EXTEND; + tail_page = cpu_buffer->tail_page; write = local_add_return(length, &tail_page->write); @@ -1954,11 +2022,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); kmemcheck_annotate_bitfield(event, bitfield); - rb_update_event(event, type, length); + rb_update_event(cpu_buffer, event, length, add_timestamp, delta); - /* The passed in type is zero for DATA */ - if (likely(!type)) - local_inc(&tail_page->entries); + local_inc(&tail_page->entries); /* * If this is the first commit on the page, then update @@ -1980,7 +2046,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, unsigned long addr; new_index = rb_event_index(event); - old_index = new_index + rb_event_length(event); + old_index = new_index + rb_event_ts_length(event); addr = (unsigned long)event; addr &= PAGE_MASK; @@ -2006,67 +2072,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, return 0; } -static int -rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, - u64 ts, u64 delta) -{ - struct ring_buffer_event *event; - int ret; - - WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", - (unsigned long long)delta, - (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp); - - /* - * The delta is too big, we to add a - * new timestamp. - */ - event = __rb_reserve_next(cpu_buffer, - RINGBUF_TYPE_TIME_EXTEND, - RB_LEN_TIME_EXTEND, - ts); - if (!event) - return -EBUSY; - - if (PTR_ERR(event) == -EAGAIN) - return -EAGAIN; - - /* Only a commited time event can update the write stamp */ - if (rb_event_is_commit(cpu_buffer, event)) { - /* - * If this is the first on the page, then it was - * updated with the page itself. Try to discard it - * and if we can't just make it zero. - */ - if (rb_event_index(event)) { - event->time_delta = delta & TS_MASK; - event->array[0] = delta >> TS_SHIFT; - } else { - /* try to discard, since we do not need this */ - if (!rb_try_to_discard(cpu_buffer, event)) { - /* nope, just zero it */ - event->time_delta = 0; - event->array[0] = 0; - } - } - cpu_buffer->write_stamp = ts; - /* let the caller know this was the commit */ - ret = 1; - } else { - /* Try to discard the event */ - if (!rb_try_to_discard(cpu_buffer, event)) { - /* Darn, this is just wasted space */ - event->time_delta = 0; - event->array[0] = 0; - } - ret = 0; - } - - return ret; -} - static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) { local_inc(&cpu_buffer->committing); @@ -2111,9 +2116,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, unsigned long length) { struct ring_buffer_event *event; - u64 ts, delta = 0; - int commit = 0; + u64 ts, delta; int nr_loops = 0; + int add_timestamp; rb_start_commit(cpu_buffer); @@ -2134,6 +2139,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, length = rb_calculate_event_length(length); again: + add_timestamp = 0; + delta = 0; + /* * We allow for interrupts to reenter here and do a trace. * If one does, it will cause this original code to loop @@ -2172,33 +2180,24 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { - - commit = rb_add_time_stamp(cpu_buffer, ts, delta); - delta = 0; - - if (commit == -EBUSY) - goto out_fail; - - if (commit == -EAGAIN) - goto again; - - RB_WARN_ON(cpu_buffer, commit < 0); + WARN_ONCE(delta > (1ULL << 59), + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + (unsigned long long)delta, + (unsigned long long)ts, + (unsigned long long)cpu_buffer->write_stamp); + add_timestamp = 1; } } get_event: - event = __rb_reserve_next(cpu_buffer, 0, length, ts); + event = __rb_reserve_next(cpu_buffer, length, ts, + delta, add_timestamp); if (unlikely(PTR_ERR(event) == -EAGAIN)) goto again; if (!event) goto out_fail; - if (!rb_event_is_commit(cpu_buffer, event)) - delta = 0; - - event->time_delta = delta; - return event; out_fail: @@ -2311,12 +2310,28 @@ static void rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { + u64 delta; + /* * The event first in the commit queue updates the * time stamp. */ - if (rb_event_is_commit(cpu_buffer, event)) - cpu_buffer->write_stamp += event->time_delta; + if (rb_event_is_commit(cpu_buffer, event)) { + /* + * A commit event that is first on a page + * updates the write timestamp with the page stamp + */ + if (!rb_event_index(event)) + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; + else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { + delta = event->array[0]; + delta <<= TS_SHIFT; + delta += event->time_delta; + cpu_buffer->write_stamp += delta; + } else + cpu_buffer->write_stamp += event->time_delta; + } } static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, @@ -2356,6 +2371,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); static inline void rb_event_discard(struct ring_buffer_event *event) { + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) + event = skip_time_extend(event); + /* array[0] holds the actual length for the discarded event */ event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; event->type_len = RINGBUF_TYPE_PADDING; @@ -3043,12 +3061,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, again: /* - * We repeat when a timestamp is encountered. It is possible - * to get multiple timestamps from an interrupt entering just - * as one timestamp is about to be written, or from discarded - * commits. The most that we can have is the number on a single page. + * We repeat when a time extend is encountered. + * Since the time extend is always attached to a data event, + * we should never loop more than once. + * (We never hit the following condition more than twice). */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) return NULL; reader = rb_get_reader_page(cpu_buffer); @@ -3124,14 +3142,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return NULL; /* - * We repeat when a timestamp is encountered. - * We can get multiple timestamps by nested interrupts or also - * if filtering is on (discarding commits). Since discarding - * commits can be frequent we can get a lot of timestamps. - * But we limit them by not adding timestamps if they begin - * at the start of a page. + * We repeat when a time extend is encountered. + * Since the time extend is always attached to a data event, + * we should never loop more than once. + * (We never hit the following condition more than twice). */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) return NULL; if (rb_per_cpu_empty(cpu_buffer)) @@ -3829,7 +3845,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, if (len > (commit - read)) len = (commit - read); - size = rb_event_length(event); + /* Always keep the time extend and data together */ + size = rb_event_ts_length(event); if (len < size) goto out_unlock; @@ -3851,7 +3868,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, break; event = rb_reader_event(cpu_buffer); - size = rb_event_length(event); + /* Always keep the time extend and data together */ + size = rb_event_ts_length(event); } while (len > size); /* update bpage */ -- cgit v1.2.2 From 140ff89127c74b1b1c1b0152a36ea3720ccf6bc3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 8 Oct 2010 10:50:30 -0400 Subject: ring-buffer: Remove condition to add timestamp in fast path There's a condition to check if we should add a time extend or not in the fast path. But this condition is racey (in the sense that we can add a unnecessary time extend, but nothing that can break anything). We later check if the time or event time delta should be zero or have real data in it (not racey), making this first check redundant. This check may help save space once in a while, but really is not worth the hassle to try to save some space that happens at most 134 ms at a time. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f50f43107e93..d9f3e7a82137 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2119,6 +2119,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, u64 ts, delta; int nr_loops = 0; int add_timestamp; + u64 diff; rb_start_commit(cpu_buffer); @@ -2155,29 +2156,13 @@ rb_reserve_next_event(struct ring_buffer *buffer, goto out_fail; ts = rb_time_stamp(cpu_buffer->buffer); + diff = ts - cpu_buffer->write_stamp; - /* - * Only the first commit can update the timestamp. - * Yes there is a race here. If an interrupt comes in - * just after the conditional and it traces too, then it - * will also check the deltas. More than one timestamp may - * also be made. But only the entry that did the actual - * commit will be something other than zero. - */ - if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && - rb_page_write(cpu_buffer->tail_page) == - rb_commit_index(cpu_buffer))) { - u64 diff; - - diff = ts - cpu_buffer->write_stamp; - - /* make sure this diff is calculated here */ - barrier(); - - /* Did the write stamp get updated already? */ - if (unlikely(ts < cpu_buffer->write_stamp)) - goto get_event; + /* make sure this diff is calculated here */ + barrier(); + /* Did the write stamp get updated already? */ + if (likely(ts >= cpu_buffer->write_stamp)) { delta = diff; if (unlikely(test_time_stamp(delta))) { WARN_ONCE(delta > (1ULL << 59), @@ -2189,7 +2174,6 @@ rb_reserve_next_event(struct ring_buffer *buffer, } } - get_event: event = __rb_reserve_next(cpu_buffer, length, ts, delta, add_timestamp); if (unlikely(PTR_ERR(event) == -EAGAIN)) -- cgit v1.2.2 From d9abde2138e0a00a0d7e44676928efa0ef629d48 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 19 Oct 2010 13:17:08 -0400 Subject: ring-buffer: Micro-optimize with some strategic inlining By using inline and noinline, we are able to make the fast path of recording an event 4% faster. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d9f3e7a82137..f5007d0d932d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2078,7 +2078,7 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) local_inc(&cpu_buffer->commits); } -static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) +static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long commits; @@ -2193,13 +2193,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, #define TRACE_RECURSIVE_DEPTH 16 -static int trace_recursive_lock(void) +/* Keep this code out of the fast path cache */ +static noinline void trace_recursive_fail(void) { - current->trace_recursion++; - - if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) - return 0; - /* Disable all tracing before we do anything else */ tracing_off_permanent(); @@ -2211,10 +2207,21 @@ static int trace_recursive_lock(void) in_nmi()); WARN_ON_ONCE(1); +} + +static inline int trace_recursive_lock(void) +{ + current->trace_recursion++; + + if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) + return 0; + + trace_recursive_fail(); + return -1; } -static void trace_recursive_unlock(void) +static inline void trace_recursive_unlock(void) { WARN_ON_ONCE(!current->trace_recursion); -- cgit v1.2.2 From b8b2663bd7c9da04ac804659b9f617c199d0252c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 19 Oct 2010 13:23:25 -0400 Subject: ring-buffer: Remove unused macro RB_TIMESTAMPS_PER_PAGE With the binding of time extends to events we no longer need to use the macro RB_TIMESTAMPS_PER_PAGE. Remove it. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f5007d0d932d..ad25490f8b40 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -441,9 +441,6 @@ static inline int test_time_stamp(u64 delta) /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) -/* Max number of timestamps that can fit on a page */ -#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND) - int ring_buffer_print_page_header(struct trace_seq *s) { struct buffer_data_page field; -- cgit v1.2.2 From e1e359273576ee8fe27021356b064c772ed29af3 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 22 Dec 2010 16:38:24 -0800 Subject: ring_buffer: Off-by-one and duplicate events in ring_buffer_read_page Fix two related problems in the event-copying loop of ring_buffer_read_page. The loop condition for copying events is off-by-one. "len" is the remaining space in the caller-supplied page. "size" is the size of the next event (or two events). If len == size, then there is just enough space for the next event. size was set to rb_event_ts_length, which may include the size of two events if the first event is a time-extend, in order to assure time- extends are kept together with the event after it. However, rb_advance_reader always advances by one event. This would result in the event after any time-extend being duplicated. Instead, get the size of a single event for the memcpy, but use rb_event_ts_length for the loop condition. Signed-off-by: David Sharp LKML-Reference: <1293064704-8101-1-git-send-email-dhsharp@google.com> LKML-Reference: Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9ed509a015d8..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, /* Need to copy one event at a time */ do { + /* We need the size of one event, because + * rb_advance_reader only advances by one event, + * whereas rb_event_ts_length may include the size of + * one or two events. + * We have already ensured there's enough space if this + * is a time extend. */ + size = rb_event_length(event); memcpy(bpage->data + pos, rpage->data + rpos, size); len -= size; @@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, event = rb_reader_event(cpu_buffer); /* Always keep the time extend and data together */ size = rb_event_ts_length(event); - } while (len > size); + } while (len >= size); /* update bpage */ local_set(&bpage->commit, pos); -- cgit v1.2.2 From 42b16b3fbb5ee4555f5dee6220f3ccaa6e1ebe47 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 17 Jan 2011 00:09:38 +0100 Subject: =?UTF-8?q?Kill=20off=20warning:=20=E2=80=98inline=E2=80=99=20is?= =?UTF-8?q?=20not=20at=20beginning=20of=20declaration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bunch of warning: ‘inline’ is not at beginning of declaration messages when building a 'make allyesconfig' kernel with -Wextra. These warnings are trivial to kill, yet rather annoying when building with -Wextra. The more we can cut down on pointless crap like this the better (IMHO). A previous patch to do this for a 'allnoconfig' build has already been merged. This just takes the cleanup a little further. Signed-off-by: Jesper Juhl Signed-off-by: Jiri Kosina --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd1c35a4fbcc..6ee56b4ad136 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -669,7 +669,7 @@ static struct list_head *rb_list_head(struct list_head *list) * the reader page). But if the next page is a header page, * its flags will be non zero. */ -static int inline +static inline int rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *page, struct list_head *list) { -- cgit v1.2.2 From 5e38ca8f3ea423442eaafe1b7e206084aa38120a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Feb 2011 13:28:18 +0100 Subject: tracing: Add unstable sched clock note to the warning The warning "Delta way too big" warning might appear on a system with unstable shed clock right after the system is resumed and tracing was enabled during the suspend. Since it's not realy bug, and the unstable sched clock is working fast and reliable otherwise, Steven suggested to keep using the sched clock in any case and just to make note in the warning itself. Signed-off-by: Jiri Olsa LKML-Reference: <1296649698-6003-1-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd1c35a4fbcc..7739893a1d0a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2163,10 +2163,14 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", (unsigned long long)delta, (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp); + (unsigned long long)cpu_buffer->write_stamp, + sched_clock_stable ? "" : + "If you just came from a suspend/resume,\n" + "please switch to the trace global clock:\n" + " echo global > /sys/kernel/debug/tracing/trace_clock\n"); add_timestamp = 1; } } -- cgit v1.2.2 From e9345aab675382176740bc8a2c6d3caf1510e46d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Feb 2011 08:09:49 +0100 Subject: Revert "tracing: Add unstable sched clock note to the warning" This reverts commit 5e38ca8f3ea423442eaafe1b7e206084aa38120a. Breaks the build of several !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures. Cc: Jiri Olsa Cc: Steven Rostedt Message-ID: <20110217171823.GB17058@elte.hu> Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7739893a1d0a..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2163,14 +2163,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", (unsigned long long)delta, (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp, - sched_clock_stable ? "" : - "If you just came from a suspend/resume,\n" - "please switch to the trace global clock:\n" - " echo global > /sys/kernel/debug/tracing/trace_clock\n"); + (unsigned long long)cpu_buffer->write_stamp); add_timestamp = 1; } } -- cgit v1.2.2 From 750912fa366312e9c5bc83eab352898a26750401 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 8 Dec 2010 13:46:47 -0800 Subject: tracing: Add an 'overwrite' trace_option. Add an "overwrite" trace_option for ftrace to control whether the buffer should be overwritten on overflow or not. The default remains to overwrite old events when the buffer is full. This patch adds the option to instead discard newest events when the buffer is full. This is useful to get a snapshot of traces just after enabling traces. Dropping the current event is also a simpler code path. Signed-off-by: David Sharp LKML-Reference: <1291844807-15481-1-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd1c35a4fbcc..269db80a961e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1429,6 +1429,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) } EXPORT_SYMBOL_GPL(ring_buffer_resize); +void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) +{ + mutex_lock(&buffer->mutex); + if (val) + buffer->flags |= RB_FL_OVERWRITE; + else + buffer->flags &= ~RB_FL_OVERWRITE; + mutex_unlock(&buffer->mutex); +} +EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); + static inline void * __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) { -- cgit v1.2.2 From de29be5e712dc8b7eef2bef9417af3bb6a88e47a Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:16 -0800 Subject: ring-buffer: Remove unused #include Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-3-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 269db80a961e..3237d961d905 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5,7 +5,6 @@ */ #include #include -#include #include #include #include -- cgit v1.2.2 From 31274d72f01604f4b02d933b4f3cac84d2c201fd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Feb 2011 15:52:19 +0100 Subject: tracing: Explain about unstable clock on resume with ring buffer warning The "Delta way too big" warning might appear on a system with a unstable shed clock right after the system is resumed and tracing was enabled at time of suspend. Since it's not realy a bug, and the unstable sched clock is working fast and reliable otherwise, Steven suggested to keep using the sched clock in any case and just to make note in the warning itself. v2 changes: - added #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK Signed-off-by: Jiri Olsa LKML-Reference: <20110218145219.GD2604@jolsa.brq.redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3237d961d905..db7b439d23ee 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2172,11 +2172,19 @@ rb_reserve_next_event(struct ring_buffer *buffer, if (likely(ts >= cpu_buffer->write_stamp)) { delta = diff; if (unlikely(test_time_stamp(delta))) { + int local_clock_stable = 1; +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + local_clock_stable = sched_clock_stable; +#endif WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", (unsigned long long)delta, (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp); + (unsigned long long)cpu_buffer->write_stamp, + local_clock_stable ? "" : + "If you just came from a suspend/resume,\n" + "please switch to the trace global clock:\n" + " echo global > /sys/kernel/debug/tracing/trace_clock\n"); add_timestamp = 1; } } -- cgit v1.2.2 From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 30 Mar 2011 22:57:33 -0300 Subject: Fix common misspellings Fixes generated by 'codespell' and manually reviewed. Signed-off-by: Lucas De Marchi --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d9c8bcafb120..0ef7b4b2a1f7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1478,7 +1478,7 @@ static inline unsigned long rb_page_entries(struct buffer_page *bpage) return local_read(&bpage->entries) & RB_WRITE_MASK; } -/* Size is determined by what has been commited */ +/* Size is determined by what has been committed */ static inline unsigned rb_page_size(struct buffer_page *bpage) { return rb_page_commit(bpage); @@ -2932,7 +2932,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) /* * cpu_buffer->pages just needs to point to the buffer, it * has no specific buffer page to point to. Lets move it out - * of our way so we don't accidently swap it. + * of our way so we don't accidentally swap it. */ cpu_buffer->pages = reader->list.prev; -- cgit v1.2.2 From b1cff0ad1062621ae63cb6c5dc4165191fe2e9f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 May 2011 14:27:43 -0400 Subject: ftrace: Add internal recursive checks Witold reported a reboot caused by the selftests of the dynamic function tracer. He sent me a config and I used ktest to do a config_bisect on it (as my config did not cause the crash). It pointed out that the problem config was CONFIG_PROVE_RCU. What happened was that if multiple callbacks are attached to the function tracer, we iterate a list of callbacks. Because the list is managed by synchronize_sched() and preempt_disable, the access to the pointers uses rcu_dereference_raw(). When PROVE_RCU is enabled, the rcu_dereference_raw() calls some debugging functions, which happen to be traced. The tracing of the debug function would then call rcu_dereference_raw() which would then call the debug function and then... well you get the idea. I first wrote two different patches to solve this bug. 1) add a __rcu_dereference_raw() that would not do any checks. 2) add notrace to the offending debug functions. Both of these patches worked. Talking with Paul McKenney on IRC, he suggested to add recursion detection instead. This seemed to be a better solution, so I decided to implement it. As the task_struct already has a trace_recursion to detect recursion in the ring buffer, and that has a very small number it allows, I decided to use that same variable to add flags that can detect the recursion inside the infrastructure of the function tracer. I plan to change it so that the task struct bit can be checked in mcount, but as that requires changes to all archs, I will hold that off to the next merge window. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Frederic Weisbecker Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/1306348063.1465.116.camel@gandalf.stny.rr.com Reported-by: Witold Baryluk Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0ef7b4b2a1f7..b0c7aa407943 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void) printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" "HC[%lu]:SC[%lu]:NMI[%lu]\n", - current->trace_recursion, + trace_recursion_buffer(), hardirq_count() >> HARDIRQ_SHIFT, softirq_count() >> SOFTIRQ_SHIFT, in_nmi()); @@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void) static inline int trace_recursive_lock(void) { - current->trace_recursion++; + trace_recursion_inc(); - if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) + if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) return 0; trace_recursive_fail(); @@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void) static inline void trace_recursive_unlock(void) { - WARN_ON_ONCE(!current->trace_recursion); + WARN_ON_ONCE(!trace_recursion_buffer()); - current->trace_recursion--; + trace_recursion_dec(); } #else -- cgit v1.2.2