aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c398
1 files changed, 221 insertions, 177 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bca96377fd4e..b0c7aa407943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5,7 +5,6 @@
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
9#include <linux/spinlock.h> 8#include <linux/spinlock.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <linux/uaccess.h> 10#include <linux/uaccess.h>
@@ -224,6 +223,9 @@ enum {
224 RB_LEN_TIME_STAMP = 16, 223 RB_LEN_TIME_STAMP = 16,
225}; 224};
226 225
226#define skip_time_extend(event) \
227 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
228
227static inline int rb_null_event(struct ring_buffer_event *event) 229static inline int rb_null_event(struct ring_buffer_event *event)
228{ 230{
229 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 231 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +250,12 @@ rb_event_data_length(struct ring_buffer_event *event)
248 return length + RB_EVNT_HDR_SIZE; 250 return length + RB_EVNT_HDR_SIZE;
249} 251}
250 252
251/* inline for ring buffer fast paths */ 253/*
252static unsigned 254 * Return the length of the given event. Will return
255 * the length of the time extend if the event is a
256 * time extend.
257 */
258static inline unsigned
253rb_event_length(struct ring_buffer_event *event) 259rb_event_length(struct ring_buffer_event *event)
254{ 260{
255 switch (event->type_len) { 261 switch (event->type_len) {
@@ -274,13 +280,41 @@ rb_event_length(struct ring_buffer_event *event)
274 return 0; 280 return 0;
275} 281}
276 282
283/*
284 * Return total length of time extend and data,
285 * or just the event length for all other events.
286 */
287static inline unsigned
288rb_event_ts_length(struct ring_buffer_event *event)
289{
290 unsigned len = 0;
291
292 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
293 /* time extends include the data event after it */
294 len = RB_LEN_TIME_EXTEND;
295 event = skip_time_extend(event);
296 }
297 return len + rb_event_length(event);
298}
299
277/** 300/**
278 * ring_buffer_event_length - return the length of the event 301 * ring_buffer_event_length - return the length of the event
279 * @event: the event to get the length of 302 * @event: the event to get the length of
303 *
304 * Returns the size of the data load of a data event.
305 * If the event is something other than a data event, it
306 * returns the size of the event itself. With the exception
307 * of a TIME EXTEND, where it still returns the size of the
308 * data load of the data event after it.
280 */ 309 */
281unsigned ring_buffer_event_length(struct ring_buffer_event *event) 310unsigned ring_buffer_event_length(struct ring_buffer_event *event)
282{ 311{
283 unsigned length = rb_event_length(event); 312 unsigned length;
313
314 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
315 event = skip_time_extend(event);
316
317 length = rb_event_length(event);
284 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 318 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
285 return length; 319 return length;
286 length -= RB_EVNT_HDR_SIZE; 320 length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +328,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
294static void * 328static void *
295rb_event_data(struct ring_buffer_event *event) 329rb_event_data(struct ring_buffer_event *event)
296{ 330{
331 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
332 event = skip_time_extend(event);
297 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 333 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
298 /* If length is in len field, then array[0] has the data */ 334 /* If length is in len field, then array[0] has the data */
299 if (event->type_len) 335 if (event->type_len)
@@ -404,9 +440,6 @@ static inline int test_time_stamp(u64 delta)
404/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 440/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
405#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 441#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
406 442
407/* Max number of timestamps that can fit on a page */
408#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
409
410int ring_buffer_print_page_header(struct trace_seq *s) 443int ring_buffer_print_page_header(struct trace_seq *s)
411{ 444{
412 struct buffer_data_page field; 445 struct buffer_data_page field;
@@ -635,7 +668,7 @@ static struct list_head *rb_list_head(struct list_head *list)
635 * the reader page). But if the next page is a header page, 668 * the reader page). But if the next page is a header page,
636 * its flags will be non zero. 669 * its flags will be non zero.
637 */ 670 */
638static int inline 671static inline int
639rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, 672rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
640 struct buffer_page *page, struct list_head *list) 673 struct buffer_page *page, struct list_head *list)
641{ 674{
@@ -1395,6 +1428,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1395} 1428}
1396EXPORT_SYMBOL_GPL(ring_buffer_resize); 1429EXPORT_SYMBOL_GPL(ring_buffer_resize);
1397 1430
1431void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1432{
1433 mutex_lock(&buffer->mutex);
1434 if (val)
1435 buffer->flags |= RB_FL_OVERWRITE;
1436 else
1437 buffer->flags &= ~RB_FL_OVERWRITE;
1438 mutex_unlock(&buffer->mutex);
1439}
1440EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1441
1398static inline void * 1442static inline void *
1399__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 1443__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1400{ 1444{
@@ -1434,7 +1478,7 @@ static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1434 return local_read(&bpage->entries) & RB_WRITE_MASK; 1478 return local_read(&bpage->entries) & RB_WRITE_MASK;
1435} 1479}
1436 1480
1437/* Size is determined by what has been commited */ 1481/* Size is determined by what has been committed */
1438static inline unsigned rb_page_size(struct buffer_page *bpage) 1482static inline unsigned rb_page_size(struct buffer_page *bpage)
1439{ 1483{
1440 return rb_page_commit(bpage); 1484 return rb_page_commit(bpage);
@@ -1546,6 +1590,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1546 iter->head = 0; 1590 iter->head = 0;
1547} 1591}
1548 1592
1593/* Slow path, do not inline */
1594static noinline struct ring_buffer_event *
1595rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1596{
1597 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1598
1599 /* Not the first event on the page? */
1600 if (rb_event_index(event)) {
1601 event->time_delta = delta & TS_MASK;
1602 event->array[0] = delta >> TS_SHIFT;
1603 } else {
1604 /* nope, just zero it */
1605 event->time_delta = 0;
1606 event->array[0] = 0;
1607 }
1608
1609 return skip_time_extend(event);
1610}
1611
1549/** 1612/**
1550 * ring_buffer_update_event - update event type and data 1613 * ring_buffer_update_event - update event type and data
1551 * @event: the even to update 1614 * @event: the even to update
@@ -1558,28 +1621,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1558 * data field. 1621 * data field.
1559 */ 1622 */
1560static void 1623static void
1561rb_update_event(struct ring_buffer_event *event, 1624rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1562 unsigned type, unsigned length) 1625 struct ring_buffer_event *event, unsigned length,
1626 int add_timestamp, u64 delta)
1563{ 1627{
1564 event->type_len = type; 1628 /* Only a commit updates the timestamp */
1565 1629 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1566 switch (type) { 1630 delta = 0;
1567
1568 case RINGBUF_TYPE_PADDING:
1569 case RINGBUF_TYPE_TIME_EXTEND:
1570 case RINGBUF_TYPE_TIME_STAMP:
1571 break;
1572 1631
1573 case 0: 1632 /*
1574 length -= RB_EVNT_HDR_SIZE; 1633 * If we need to add a timestamp, then we
1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) 1634 * add it to the start of the resevered space.
1576 event->array[0] = length; 1635 */
1577 else 1636 if (unlikely(add_timestamp)) {
1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1637 event = rb_add_time_stamp(event, delta);
1579 break; 1638 length -= RB_LEN_TIME_EXTEND;
1580 default: 1639 delta = 0;
1581 BUG();
1582 } 1640 }
1641
1642 event->time_delta = delta;
1643 length -= RB_EVNT_HDR_SIZE;
1644 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1645 event->type_len = 0;
1646 event->array[0] = length;
1647 } else
1648 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1583} 1649}
1584 1650
1585/* 1651/*
@@ -1823,10 +1889,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1823 local_sub(length, &tail_page->write); 1889 local_sub(length, &tail_page->write);
1824} 1890}
1825 1891
1826static struct ring_buffer_event * 1892/*
1893 * This is the slow path, force gcc not to inline it.
1894 */
1895static noinline struct ring_buffer_event *
1827rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1896rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1828 unsigned long length, unsigned long tail, 1897 unsigned long length, unsigned long tail,
1829 struct buffer_page *tail_page, u64 *ts) 1898 struct buffer_page *tail_page, u64 ts)
1830{ 1899{
1831 struct buffer_page *commit_page = cpu_buffer->commit_page; 1900 struct buffer_page *commit_page = cpu_buffer->commit_page;
1832 struct ring_buffer *buffer = cpu_buffer->buffer; 1901 struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1978,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1909 * Nested commits always have zero deltas, so 1978 * Nested commits always have zero deltas, so
1910 * just reread the time stamp 1979 * just reread the time stamp
1911 */ 1980 */
1912 *ts = rb_time_stamp(buffer); 1981 ts = rb_time_stamp(buffer);
1913 next_page->page->time_stamp = *ts; 1982 next_page->page->time_stamp = ts;
1914 } 1983 }
1915 1984
1916 out_again: 1985 out_again:
@@ -1929,12 +1998,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1929 1998
1930static struct ring_buffer_event * 1999static struct ring_buffer_event *
1931__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 2000__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1932 unsigned type, unsigned long length, u64 *ts) 2001 unsigned long length, u64 ts,
2002 u64 delta, int add_timestamp)
1933{ 2003{
1934 struct buffer_page *tail_page; 2004 struct buffer_page *tail_page;
1935 struct ring_buffer_event *event; 2005 struct ring_buffer_event *event;
1936 unsigned long tail, write; 2006 unsigned long tail, write;
1937 2007
2008 /*
2009 * If the time delta since the last event is too big to
2010 * hold in the time field of the event, then we append a
2011 * TIME EXTEND event ahead of the data event.
2012 */
2013 if (unlikely(add_timestamp))
2014 length += RB_LEN_TIME_EXTEND;
2015
1938 tail_page = cpu_buffer->tail_page; 2016 tail_page = cpu_buffer->tail_page;
1939 write = local_add_return(length, &tail_page->write); 2017 write = local_add_return(length, &tail_page->write);
1940 2018
@@ -1943,7 +2021,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1943 tail = write - length; 2021 tail = write - length;
1944 2022
1945 /* See if we shot pass the end of this buffer page */ 2023 /* See if we shot pass the end of this buffer page */
1946 if (write > BUF_PAGE_SIZE) 2024 if (unlikely(write > BUF_PAGE_SIZE))
1947 return rb_move_tail(cpu_buffer, length, tail, 2025 return rb_move_tail(cpu_buffer, length, tail,
1948 tail_page, ts); 2026 tail_page, ts);
1949 2027
@@ -1951,18 +2029,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1951 2029
1952 event = __rb_page_index(tail_page, tail); 2030 event = __rb_page_index(tail_page, tail);
1953 kmemcheck_annotate_bitfield(event, bitfield); 2031 kmemcheck_annotate_bitfield(event, bitfield);
1954 rb_update_event(event, type, length); 2032 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1955 2033
1956 /* The passed in type is zero for DATA */ 2034 local_inc(&tail_page->entries);
1957 if (likely(!type))
1958 local_inc(&tail_page->entries);
1959 2035
1960 /* 2036 /*
1961 * If this is the first commit on the page, then update 2037 * If this is the first commit on the page, then update
1962 * its timestamp. 2038 * its timestamp.
1963 */ 2039 */
1964 if (!tail) 2040 if (!tail)
1965 tail_page->page->time_stamp = *ts; 2041 tail_page->page->time_stamp = ts;
1966 2042
1967 return event; 2043 return event;
1968} 2044}
@@ -1977,7 +2053,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1977 unsigned long addr; 2053 unsigned long addr;
1978 2054
1979 new_index = rb_event_index(event); 2055 new_index = rb_event_index(event);
1980 old_index = new_index + rb_event_length(event); 2056 old_index = new_index + rb_event_ts_length(event);
1981 addr = (unsigned long)event; 2057 addr = (unsigned long)event;
1982 addr &= PAGE_MASK; 2058 addr &= PAGE_MASK;
1983 2059
@@ -2003,76 +2079,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2003 return 0; 2079 return 0;
2004} 2080}
2005 2081
2006static int
2007rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2008 u64 *ts, u64 *delta)
2009{
2010 struct ring_buffer_event *event;
2011 int ret;
2012
2013 WARN_ONCE(*delta > (1ULL << 59),
2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2015 (unsigned long long)*delta,
2016 (unsigned long long)*ts,
2017 (unsigned long long)cpu_buffer->write_stamp);
2018
2019 /*
2020 * The delta is too big, we to add a
2021 * new timestamp.
2022 */
2023 event = __rb_reserve_next(cpu_buffer,
2024 RINGBUF_TYPE_TIME_EXTEND,
2025 RB_LEN_TIME_EXTEND,
2026 ts);
2027 if (!event)
2028 return -EBUSY;
2029
2030 if (PTR_ERR(event) == -EAGAIN)
2031 return -EAGAIN;
2032
2033 /* Only a commited time event can update the write stamp */
2034 if (rb_event_is_commit(cpu_buffer, event)) {
2035 /*
2036 * If this is the first on the page, then it was
2037 * updated with the page itself. Try to discard it
2038 * and if we can't just make it zero.
2039 */
2040 if (rb_event_index(event)) {
2041 event->time_delta = *delta & TS_MASK;
2042 event->array[0] = *delta >> TS_SHIFT;
2043 } else {
2044 /* try to discard, since we do not need this */
2045 if (!rb_try_to_discard(cpu_buffer, event)) {
2046 /* nope, just zero it */
2047 event->time_delta = 0;
2048 event->array[0] = 0;
2049 }
2050 }
2051 cpu_buffer->write_stamp = *ts;
2052 /* let the caller know this was the commit */
2053 ret = 1;
2054 } else {
2055 /* Try to discard the event */
2056 if (!rb_try_to_discard(cpu_buffer, event)) {
2057 /* Darn, this is just wasted space */
2058 event->time_delta = 0;
2059 event->array[0] = 0;
2060 }
2061 ret = 0;
2062 }
2063
2064 *delta = 0;
2065
2066 return ret;
2067}
2068
2069static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2082static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2070{ 2083{
2071 local_inc(&cpu_buffer->committing); 2084 local_inc(&cpu_buffer->committing);
2072 local_inc(&cpu_buffer->commits); 2085 local_inc(&cpu_buffer->commits);
2073} 2086}
2074 2087
2075static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2088static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2076{ 2089{
2077 unsigned long commits; 2090 unsigned long commits;
2078 2091
@@ -2110,9 +2123,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2110 unsigned long length) 2123 unsigned long length)
2111{ 2124{
2112 struct ring_buffer_event *event; 2125 struct ring_buffer_event *event;
2113 u64 ts, delta = 0; 2126 u64 ts, delta;
2114 int commit = 0;
2115 int nr_loops = 0; 2127 int nr_loops = 0;
2128 int add_timestamp;
2129 u64 diff;
2116 2130
2117 rb_start_commit(cpu_buffer); 2131 rb_start_commit(cpu_buffer);
2118 2132
@@ -2133,6 +2147,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2133 2147
2134 length = rb_calculate_event_length(length); 2148 length = rb_calculate_event_length(length);
2135 again: 2149 again:
2150 add_timestamp = 0;
2151 delta = 0;
2152
2136 /* 2153 /*
2137 * We allow for interrupts to reenter here and do a trace. 2154 * We allow for interrupts to reenter here and do a trace.
2138 * If one does, it will cause this original code to loop 2155 * If one does, it will cause this original code to loop
@@ -2146,56 +2163,40 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2146 goto out_fail; 2163 goto out_fail;
2147 2164
2148 ts = rb_time_stamp(cpu_buffer->buffer); 2165 ts = rb_time_stamp(cpu_buffer->buffer);
2166 diff = ts - cpu_buffer->write_stamp;
2149 2167
2150 /* 2168 /* make sure this diff is calculated here */
2151 * Only the first commit can update the timestamp. 2169 barrier();
2152 * Yes there is a race here. If an interrupt comes in
2153 * just after the conditional and it traces too, then it
2154 * will also check the deltas. More than one timestamp may
2155 * also be made. But only the entry that did the actual
2156 * commit will be something other than zero.
2157 */
2158 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
2159 rb_page_write(cpu_buffer->tail_page) ==
2160 rb_commit_index(cpu_buffer))) {
2161 u64 diff;
2162
2163 diff = ts - cpu_buffer->write_stamp;
2164
2165 /* make sure this diff is calculated here */
2166 barrier();
2167
2168 /* Did the write stamp get updated already? */
2169 if (unlikely(ts < cpu_buffer->write_stamp))
2170 goto get_event;
2171 2170
2171 /* Did the write stamp get updated already? */
2172 if (likely(ts >= cpu_buffer->write_stamp)) {
2172 delta = diff; 2173 delta = diff;
2173 if (unlikely(test_time_stamp(delta))) { 2174 if (unlikely(test_time_stamp(delta))) {
2174 2175 int local_clock_stable = 1;
2175 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2176#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2176 if (commit == -EBUSY) 2177 local_clock_stable = sched_clock_stable;
2177 goto out_fail; 2178#endif
2178 2179 WARN_ONCE(delta > (1ULL << 59),
2179 if (commit == -EAGAIN) 2180 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2180 goto again; 2181 (unsigned long long)delta,
2181 2182 (unsigned long long)ts,
2182 RB_WARN_ON(cpu_buffer, commit < 0); 2183 (unsigned long long)cpu_buffer->write_stamp,
2184 local_clock_stable ? "" :
2185 "If you just came from a suspend/resume,\n"
2186 "please switch to the trace global clock:\n"
2187 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2188 add_timestamp = 1;
2183 } 2189 }
2184 } 2190 }
2185 2191
2186 get_event: 2192 event = __rb_reserve_next(cpu_buffer, length, ts,
2187 event = __rb_reserve_next(cpu_buffer, 0, length, &ts); 2193 delta, add_timestamp);
2188 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2194 if (unlikely(PTR_ERR(event) == -EAGAIN))
2189 goto again; 2195 goto again;
2190 2196
2191 if (!event) 2197 if (!event)
2192 goto out_fail; 2198 goto out_fail;
2193 2199
2194 if (!rb_event_is_commit(cpu_buffer, event))
2195 delta = 0;
2196
2197 event->time_delta = delta;
2198
2199 return event; 2200 return event;
2200 2201
2201 out_fail: 2202 out_fail:
@@ -2207,32 +2208,39 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2207 2208
2208#define TRACE_RECURSIVE_DEPTH 16 2209#define TRACE_RECURSIVE_DEPTH 16
2209 2210
2210static int trace_recursive_lock(void) 2211/* Keep this code out of the fast path cache */
2212static noinline void trace_recursive_fail(void)
2211{ 2213{
2212 current->trace_recursion++;
2213
2214 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2215 return 0;
2216
2217 /* Disable all tracing before we do anything else */ 2214 /* Disable all tracing before we do anything else */
2218 tracing_off_permanent(); 2215 tracing_off_permanent();
2219 2216
2220 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" 2217 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2221 "HC[%lu]:SC[%lu]:NMI[%lu]\n", 2218 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2222 current->trace_recursion, 2219 trace_recursion_buffer(),
2223 hardirq_count() >> HARDIRQ_SHIFT, 2220 hardirq_count() >> HARDIRQ_SHIFT,
2224 softirq_count() >> SOFTIRQ_SHIFT, 2221 softirq_count() >> SOFTIRQ_SHIFT,
2225 in_nmi()); 2222 in_nmi());
2226 2223
2227 WARN_ON_ONCE(1); 2224 WARN_ON_ONCE(1);
2225}
2226
2227static inline int trace_recursive_lock(void)
2228{
2229 trace_recursion_inc();
2230
2231 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
2232 return 0;
2233
2234 trace_recursive_fail();
2235
2228 return -1; 2236 return -1;
2229} 2237}
2230 2238
2231static void trace_recursive_unlock(void) 2239static inline void trace_recursive_unlock(void)
2232{ 2240{
2233 WARN_ON_ONCE(!current->trace_recursion); 2241 WARN_ON_ONCE(!trace_recursion_buffer());
2234 2242
2235 current->trace_recursion--; 2243 trace_recursion_dec();
2236} 2244}
2237 2245
2238#else 2246#else
@@ -2308,12 +2316,28 @@ static void
2308rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2316rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2309 struct ring_buffer_event *event) 2317 struct ring_buffer_event *event)
2310{ 2318{
2319 u64 delta;
2320
2311 /* 2321 /*
2312 * The event first in the commit queue updates the 2322 * The event first in the commit queue updates the
2313 * time stamp. 2323 * time stamp.
2314 */ 2324 */
2315 if (rb_event_is_commit(cpu_buffer, event)) 2325 if (rb_event_is_commit(cpu_buffer, event)) {
2316 cpu_buffer->write_stamp += event->time_delta; 2326 /*
2327 * A commit event that is first on a page
2328 * updates the write timestamp with the page stamp
2329 */
2330 if (!rb_event_index(event))
2331 cpu_buffer->write_stamp =
2332 cpu_buffer->commit_page->page->time_stamp;
2333 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2334 delta = event->array[0];
2335 delta <<= TS_SHIFT;
2336 delta += event->time_delta;
2337 cpu_buffer->write_stamp += delta;
2338 } else
2339 cpu_buffer->write_stamp += event->time_delta;
2340 }
2317} 2341}
2318 2342
2319static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2343static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2377,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2353 2377
2354static inline void rb_event_discard(struct ring_buffer_event *event) 2378static inline void rb_event_discard(struct ring_buffer_event *event)
2355{ 2379{
2380 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2381 event = skip_time_extend(event);
2382
2356 /* array[0] holds the actual length for the discarded event */ 2383 /* array[0] holds the actual length for the discarded event */
2357 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2384 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2358 event->type_len = RINGBUF_TYPE_PADDING; 2385 event->type_len = RINGBUF_TYPE_PADDING;
@@ -2606,6 +2633,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2606} 2633}
2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2634EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2608 2635
2636/*
2637 * The total entries in the ring buffer is the running counter
2638 * of entries entered into the ring buffer, minus the sum of
2639 * the entries read from the ring buffer and the number of
2640 * entries that were overwritten.
2641 */
2642static inline unsigned long
2643rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2644{
2645 return local_read(&cpu_buffer->entries) -
2646 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2647}
2648
2609/** 2649/**
2610 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2650 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2611 * @buffer: The ring buffer 2651 * @buffer: The ring buffer
@@ -2614,16 +2654,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2614unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2654unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2615{ 2655{
2616 struct ring_buffer_per_cpu *cpu_buffer; 2656 struct ring_buffer_per_cpu *cpu_buffer;
2617 unsigned long ret;
2618 2657
2619 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2658 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2620 return 0; 2659 return 0;
2621 2660
2622 cpu_buffer = buffer->buffers[cpu]; 2661 cpu_buffer = buffer->buffers[cpu];
2623 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
2624 - cpu_buffer->read;
2625 2662
2626 return ret; 2663 return rb_num_of_entries(cpu_buffer);
2627} 2664}
2628EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2665EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2629 2666
@@ -2684,8 +2721,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2684 /* if you care about this being correct, lock the buffer */ 2721 /* if you care about this being correct, lock the buffer */
2685 for_each_buffer_cpu(buffer, cpu) { 2722 for_each_buffer_cpu(buffer, cpu) {
2686 cpu_buffer = buffer->buffers[cpu]; 2723 cpu_buffer = buffer->buffers[cpu];
2687 entries += (local_read(&cpu_buffer->entries) - 2724 entries += rb_num_of_entries(cpu_buffer);
2688 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2689 } 2725 }
2690 2726
2691 return entries; 2727 return entries;
@@ -2896,7 +2932,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2896 /* 2932 /*
2897 * cpu_buffer->pages just needs to point to the buffer, it 2933 * cpu_buffer->pages just needs to point to the buffer, it
2898 * has no specific buffer page to point to. Lets move it out 2934 * has no specific buffer page to point to. Lets move it out
2899 * of our way so we don't accidently swap it. 2935 * of our way so we don't accidentally swap it.
2900 */ 2936 */
2901 cpu_buffer->pages = reader->list.prev; 2937 cpu_buffer->pages = reader->list.prev;
2902 2938
@@ -3040,12 +3076,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3040 3076
3041 again: 3077 again:
3042 /* 3078 /*
3043 * We repeat when a timestamp is encountered. It is possible 3079 * We repeat when a time extend is encountered.
3044 * to get multiple timestamps from an interrupt entering just 3080 * Since the time extend is always attached to a data event,
3045 * as one timestamp is about to be written, or from discarded 3081 * we should never loop more than once.
3046 * commits. The most that we can have is the number on a single page. 3082 * (We never hit the following condition more than twice).
3047 */ 3083 */
3048 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3084 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3049 return NULL; 3085 return NULL;
3050 3086
3051 reader = rb_get_reader_page(cpu_buffer); 3087 reader = rb_get_reader_page(cpu_buffer);
@@ -3121,14 +3157,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3121 return NULL; 3157 return NULL;
3122 3158
3123 /* 3159 /*
3124 * We repeat when a timestamp is encountered. 3160 * We repeat when a time extend is encountered.
3125 * We can get multiple timestamps by nested interrupts or also 3161 * Since the time extend is always attached to a data event,
3126 * if filtering is on (discarding commits). Since discarding 3162 * we should never loop more than once.
3127 * commits can be frequent we can get a lot of timestamps. 3163 * (We never hit the following condition more than twice).
3128 * But we limit them by not adding timestamps if they begin
3129 * at the start of a page.
3130 */ 3164 */
3131 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3165 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3132 return NULL; 3166 return NULL;
3133 3167
3134 if (rb_per_cpu_empty(cpu_buffer)) 3168 if (rb_per_cpu_empty(cpu_buffer))
@@ -3826,7 +3860,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3826 if (len > (commit - read)) 3860 if (len > (commit - read))
3827 len = (commit - read); 3861 len = (commit - read);
3828 3862
3829 size = rb_event_length(event); 3863 /* Always keep the time extend and data together */
3864 size = rb_event_ts_length(event);
3830 3865
3831 if (len < size) 3866 if (len < size)
3832 goto out_unlock; 3867 goto out_unlock;
@@ -3836,6 +3871,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3836 3871
3837 /* Need to copy one event at a time */ 3872 /* Need to copy one event at a time */
3838 do { 3873 do {
3874 /* We need the size of one event, because
3875 * rb_advance_reader only advances by one event,
3876 * whereas rb_event_ts_length may include the size of
3877 * one or two events.
3878 * We have already ensured there's enough space if this
3879 * is a time extend. */
3880 size = rb_event_length(event);
3839 memcpy(bpage->data + pos, rpage->data + rpos, size); 3881 memcpy(bpage->data + pos, rpage->data + rpos, size);
3840 3882
3841 len -= size; 3883 len -= size;
@@ -3848,8 +3890,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3848 break; 3890 break;
3849 3891
3850 event = rb_reader_event(cpu_buffer); 3892 event = rb_reader_event(cpu_buffer);
3851 size = rb_event_length(event); 3893 /* Always keep the time extend and data together */
3852 } while (len > size); 3894 size = rb_event_ts_length(event);
3895 } while (len >= size);
3853 3896
3854 /* update bpage */ 3897 /* update bpage */
3855 local_set(&bpage->commit, pos); 3898 local_set(&bpage->commit, pos);
@@ -3965,6 +4008,7 @@ static const struct file_operations rb_simple_fops = {
3965 .open = tracing_open_generic, 4008 .open = tracing_open_generic,
3966 .read = rb_simple_read, 4009 .read = rb_simple_read,
3967 .write = rb_simple_write, 4010 .write = rb_simple_write,
4011 .llseek = default_llseek,
3968}; 4012};
3969 4013
3970 4014