diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 398 |
1 files changed, 221 insertions, 177 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bca96377fd4e..b0c7aa407943 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -5,7 +5,6 @@ | |||
5 | */ | 5 | */ |
6 | #include <linux/ring_buffer.h> | 6 | #include <linux/ring_buffer.h> |
7 | #include <linux/trace_clock.h> | 7 | #include <linux/trace_clock.h> |
8 | #include <linux/ftrace_irq.h> | ||
9 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
10 | #include <linux/debugfs.h> | 9 | #include <linux/debugfs.h> |
11 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
@@ -224,6 +223,9 @@ enum { | |||
224 | RB_LEN_TIME_STAMP = 16, | 223 | RB_LEN_TIME_STAMP = 16, |
225 | }; | 224 | }; |
226 | 225 | ||
226 | #define skip_time_extend(event) \ | ||
227 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | ||
228 | |||
227 | static inline int rb_null_event(struct ring_buffer_event *event) | 229 | static inline int rb_null_event(struct ring_buffer_event *event) |
228 | { | 230 | { |
229 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 231 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
@@ -248,8 +250,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
248 | return length + RB_EVNT_HDR_SIZE; | 250 | return length + RB_EVNT_HDR_SIZE; |
249 | } | 251 | } |
250 | 252 | ||
251 | /* inline for ring buffer fast paths */ | 253 | /* |
252 | static unsigned | 254 | * Return the length of the given event. Will return |
255 | * the length of the time extend if the event is a | ||
256 | * time extend. | ||
257 | */ | ||
258 | static inline unsigned | ||
253 | rb_event_length(struct ring_buffer_event *event) | 259 | rb_event_length(struct ring_buffer_event *event) |
254 | { | 260 | { |
255 | switch (event->type_len) { | 261 | switch (event->type_len) { |
@@ -274,13 +280,41 @@ rb_event_length(struct ring_buffer_event *event) | |||
274 | return 0; | 280 | return 0; |
275 | } | 281 | } |
276 | 282 | ||
283 | /* | ||
284 | * Return total length of time extend and data, | ||
285 | * or just the event length for all other events. | ||
286 | */ | ||
287 | static inline unsigned | ||
288 | rb_event_ts_length(struct ring_buffer_event *event) | ||
289 | { | ||
290 | unsigned len = 0; | ||
291 | |||
292 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
293 | /* time extends include the data event after it */ | ||
294 | len = RB_LEN_TIME_EXTEND; | ||
295 | event = skip_time_extend(event); | ||
296 | } | ||
297 | return len + rb_event_length(event); | ||
298 | } | ||
299 | |||
277 | /** | 300 | /** |
278 | * ring_buffer_event_length - return the length of the event | 301 | * ring_buffer_event_length - return the length of the event |
279 | * @event: the event to get the length of | 302 | * @event: the event to get the length of |
303 | * | ||
304 | * Returns the size of the data load of a data event. | ||
305 | * If the event is something other than a data event, it | ||
306 | * returns the size of the event itself. With the exception | ||
307 | * of a TIME EXTEND, where it still returns the size of the | ||
308 | * data load of the data event after it. | ||
280 | */ | 309 | */ |
281 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 310 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
282 | { | 311 | { |
283 | unsigned length = rb_event_length(event); | 312 | unsigned length; |
313 | |||
314 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
315 | event = skip_time_extend(event); | ||
316 | |||
317 | length = rb_event_length(event); | ||
284 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 318 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
285 | return length; | 319 | return length; |
286 | length -= RB_EVNT_HDR_SIZE; | 320 | length -= RB_EVNT_HDR_SIZE; |
@@ -294,6 +328,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
294 | static void * | 328 | static void * |
295 | rb_event_data(struct ring_buffer_event *event) | 329 | rb_event_data(struct ring_buffer_event *event) |
296 | { | 330 | { |
331 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
332 | event = skip_time_extend(event); | ||
297 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 333 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
298 | /* If length is in len field, then array[0] has the data */ | 334 | /* If length is in len field, then array[0] has the data */ |
299 | if (event->type_len) | 335 | if (event->type_len) |
@@ -404,9 +440,6 @@ static inline int test_time_stamp(u64 delta) | |||
404 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 440 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
405 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 441 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
406 | 442 | ||
407 | /* Max number of timestamps that can fit on a page */ | ||
408 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND) | ||
409 | |||
410 | int ring_buffer_print_page_header(struct trace_seq *s) | 443 | int ring_buffer_print_page_header(struct trace_seq *s) |
411 | { | 444 | { |
412 | struct buffer_data_page field; | 445 | struct buffer_data_page field; |
@@ -635,7 +668,7 @@ static struct list_head *rb_list_head(struct list_head *list) | |||
635 | * the reader page). But if the next page is a header page, | 668 | * the reader page). But if the next page is a header page, |
636 | * its flags will be non zero. | 669 | * its flags will be non zero. |
637 | */ | 670 | */ |
638 | static int inline | 671 | static inline int |
639 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, | 672 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, |
640 | struct buffer_page *page, struct list_head *list) | 673 | struct buffer_page *page, struct list_head *list) |
641 | { | 674 | { |
@@ -1395,6 +1428,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
1395 | } | 1428 | } |
1396 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | 1429 | EXPORT_SYMBOL_GPL(ring_buffer_resize); |
1397 | 1430 | ||
1431 | void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) | ||
1432 | { | ||
1433 | mutex_lock(&buffer->mutex); | ||
1434 | if (val) | ||
1435 | buffer->flags |= RB_FL_OVERWRITE; | ||
1436 | else | ||
1437 | buffer->flags &= ~RB_FL_OVERWRITE; | ||
1438 | mutex_unlock(&buffer->mutex); | ||
1439 | } | ||
1440 | EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); | ||
1441 | |||
1398 | static inline void * | 1442 | static inline void * |
1399 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | 1443 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) |
1400 | { | 1444 | { |
@@ -1434,7 +1478,7 @@ static inline unsigned long rb_page_entries(struct buffer_page *bpage) | |||
1434 | return local_read(&bpage->entries) & RB_WRITE_MASK; | 1478 | return local_read(&bpage->entries) & RB_WRITE_MASK; |
1435 | } | 1479 | } |
1436 | 1480 | ||
1437 | /* Size is determined by what has been commited */ | 1481 | /* Size is determined by what has been committed */ |
1438 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 1482 | static inline unsigned rb_page_size(struct buffer_page *bpage) |
1439 | { | 1483 | { |
1440 | return rb_page_commit(bpage); | 1484 | return rb_page_commit(bpage); |
@@ -1546,6 +1590,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1546 | iter->head = 0; | 1590 | iter->head = 0; |
1547 | } | 1591 | } |
1548 | 1592 | ||
1593 | /* Slow path, do not inline */ | ||
1594 | static noinline struct ring_buffer_event * | ||
1595 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | ||
1596 | { | ||
1597 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
1598 | |||
1599 | /* Not the first event on the page? */ | ||
1600 | if (rb_event_index(event)) { | ||
1601 | event->time_delta = delta & TS_MASK; | ||
1602 | event->array[0] = delta >> TS_SHIFT; | ||
1603 | } else { | ||
1604 | /* nope, just zero it */ | ||
1605 | event->time_delta = 0; | ||
1606 | event->array[0] = 0; | ||
1607 | } | ||
1608 | |||
1609 | return skip_time_extend(event); | ||
1610 | } | ||
1611 | |||
1549 | /** | 1612 | /** |
1550 | * ring_buffer_update_event - update event type and data | 1613 | * ring_buffer_update_event - update event type and data |
1551 | * @event: the even to update | 1614 | * @event: the even to update |
@@ -1558,28 +1621,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1558 | * data field. | 1621 | * data field. |
1559 | */ | 1622 | */ |
1560 | static void | 1623 | static void |
1561 | rb_update_event(struct ring_buffer_event *event, | 1624 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
1562 | unsigned type, unsigned length) | 1625 | struct ring_buffer_event *event, unsigned length, |
1626 | int add_timestamp, u64 delta) | ||
1563 | { | 1627 | { |
1564 | event->type_len = type; | 1628 | /* Only a commit updates the timestamp */ |
1565 | 1629 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | |
1566 | switch (type) { | 1630 | delta = 0; |
1567 | |||
1568 | case RINGBUF_TYPE_PADDING: | ||
1569 | case RINGBUF_TYPE_TIME_EXTEND: | ||
1570 | case RINGBUF_TYPE_TIME_STAMP: | ||
1571 | break; | ||
1572 | 1631 | ||
1573 | case 0: | 1632 | /* |
1574 | length -= RB_EVNT_HDR_SIZE; | 1633 | * If we need to add a timestamp, then we |
1575 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 1634 | * add it to the start of the resevered space. |
1576 | event->array[0] = length; | 1635 | */ |
1577 | else | 1636 | if (unlikely(add_timestamp)) { |
1578 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1637 | event = rb_add_time_stamp(event, delta); |
1579 | break; | 1638 | length -= RB_LEN_TIME_EXTEND; |
1580 | default: | 1639 | delta = 0; |
1581 | BUG(); | ||
1582 | } | 1640 | } |
1641 | |||
1642 | event->time_delta = delta; | ||
1643 | length -= RB_EVNT_HDR_SIZE; | ||
1644 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | ||
1645 | event->type_len = 0; | ||
1646 | event->array[0] = length; | ||
1647 | } else | ||
1648 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
1583 | } | 1649 | } |
1584 | 1650 | ||
1585 | /* | 1651 | /* |
@@ -1823,10 +1889,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1823 | local_sub(length, &tail_page->write); | 1889 | local_sub(length, &tail_page->write); |
1824 | } | 1890 | } |
1825 | 1891 | ||
1826 | static struct ring_buffer_event * | 1892 | /* |
1893 | * This is the slow path, force gcc not to inline it. | ||
1894 | */ | ||
1895 | static noinline struct ring_buffer_event * | ||
1827 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1896 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
1828 | unsigned long length, unsigned long tail, | 1897 | unsigned long length, unsigned long tail, |
1829 | struct buffer_page *tail_page, u64 *ts) | 1898 | struct buffer_page *tail_page, u64 ts) |
1830 | { | 1899 | { |
1831 | struct buffer_page *commit_page = cpu_buffer->commit_page; | 1900 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
1832 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1901 | struct ring_buffer *buffer = cpu_buffer->buffer; |
@@ -1909,8 +1978,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1909 | * Nested commits always have zero deltas, so | 1978 | * Nested commits always have zero deltas, so |
1910 | * just reread the time stamp | 1979 | * just reread the time stamp |
1911 | */ | 1980 | */ |
1912 | *ts = rb_time_stamp(buffer); | 1981 | ts = rb_time_stamp(buffer); |
1913 | next_page->page->time_stamp = *ts; | 1982 | next_page->page->time_stamp = ts; |
1914 | } | 1983 | } |
1915 | 1984 | ||
1916 | out_again: | 1985 | out_again: |
@@ -1929,12 +1998,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1929 | 1998 | ||
1930 | static struct ring_buffer_event * | 1999 | static struct ring_buffer_event * |
1931 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 2000 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
1932 | unsigned type, unsigned long length, u64 *ts) | 2001 | unsigned long length, u64 ts, |
2002 | u64 delta, int add_timestamp) | ||
1933 | { | 2003 | { |
1934 | struct buffer_page *tail_page; | 2004 | struct buffer_page *tail_page; |
1935 | struct ring_buffer_event *event; | 2005 | struct ring_buffer_event *event; |
1936 | unsigned long tail, write; | 2006 | unsigned long tail, write; |
1937 | 2007 | ||
2008 | /* | ||
2009 | * If the time delta since the last event is too big to | ||
2010 | * hold in the time field of the event, then we append a | ||
2011 | * TIME EXTEND event ahead of the data event. | ||
2012 | */ | ||
2013 | if (unlikely(add_timestamp)) | ||
2014 | length += RB_LEN_TIME_EXTEND; | ||
2015 | |||
1938 | tail_page = cpu_buffer->tail_page; | 2016 | tail_page = cpu_buffer->tail_page; |
1939 | write = local_add_return(length, &tail_page->write); | 2017 | write = local_add_return(length, &tail_page->write); |
1940 | 2018 | ||
@@ -1943,7 +2021,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1943 | tail = write - length; | 2021 | tail = write - length; |
1944 | 2022 | ||
1945 | /* See if we shot pass the end of this buffer page */ | 2023 | /* See if we shot pass the end of this buffer page */ |
1946 | if (write > BUF_PAGE_SIZE) | 2024 | if (unlikely(write > BUF_PAGE_SIZE)) |
1947 | return rb_move_tail(cpu_buffer, length, tail, | 2025 | return rb_move_tail(cpu_buffer, length, tail, |
1948 | tail_page, ts); | 2026 | tail_page, ts); |
1949 | 2027 | ||
@@ -1951,18 +2029,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1951 | 2029 | ||
1952 | event = __rb_page_index(tail_page, tail); | 2030 | event = __rb_page_index(tail_page, tail); |
1953 | kmemcheck_annotate_bitfield(event, bitfield); | 2031 | kmemcheck_annotate_bitfield(event, bitfield); |
1954 | rb_update_event(event, type, length); | 2032 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
1955 | 2033 | ||
1956 | /* The passed in type is zero for DATA */ | 2034 | local_inc(&tail_page->entries); |
1957 | if (likely(!type)) | ||
1958 | local_inc(&tail_page->entries); | ||
1959 | 2035 | ||
1960 | /* | 2036 | /* |
1961 | * If this is the first commit on the page, then update | 2037 | * If this is the first commit on the page, then update |
1962 | * its timestamp. | 2038 | * its timestamp. |
1963 | */ | 2039 | */ |
1964 | if (!tail) | 2040 | if (!tail) |
1965 | tail_page->page->time_stamp = *ts; | 2041 | tail_page->page->time_stamp = ts; |
1966 | 2042 | ||
1967 | return event; | 2043 | return event; |
1968 | } | 2044 | } |
@@ -1977,7 +2053,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1977 | unsigned long addr; | 2053 | unsigned long addr; |
1978 | 2054 | ||
1979 | new_index = rb_event_index(event); | 2055 | new_index = rb_event_index(event); |
1980 | old_index = new_index + rb_event_length(event); | 2056 | old_index = new_index + rb_event_ts_length(event); |
1981 | addr = (unsigned long)event; | 2057 | addr = (unsigned long)event; |
1982 | addr &= PAGE_MASK; | 2058 | addr &= PAGE_MASK; |
1983 | 2059 | ||
@@ -2003,76 +2079,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
2003 | return 0; | 2079 | return 0; |
2004 | } | 2080 | } |
2005 | 2081 | ||
2006 | static int | ||
2007 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
2008 | u64 *ts, u64 *delta) | ||
2009 | { | ||
2010 | struct ring_buffer_event *event; | ||
2011 | int ret; | ||
2012 | |||
2013 | WARN_ONCE(*delta > (1ULL << 59), | ||
2014 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", | ||
2015 | (unsigned long long)*delta, | ||
2016 | (unsigned long long)*ts, | ||
2017 | (unsigned long long)cpu_buffer->write_stamp); | ||
2018 | |||
2019 | /* | ||
2020 | * The delta is too big, we to add a | ||
2021 | * new timestamp. | ||
2022 | */ | ||
2023 | event = __rb_reserve_next(cpu_buffer, | ||
2024 | RINGBUF_TYPE_TIME_EXTEND, | ||
2025 | RB_LEN_TIME_EXTEND, | ||
2026 | ts); | ||
2027 | if (!event) | ||
2028 | return -EBUSY; | ||
2029 | |||
2030 | if (PTR_ERR(event) == -EAGAIN) | ||
2031 | return -EAGAIN; | ||
2032 | |||
2033 | /* Only a commited time event can update the write stamp */ | ||
2034 | if (rb_event_is_commit(cpu_buffer, event)) { | ||
2035 | /* | ||
2036 | * If this is the first on the page, then it was | ||
2037 | * updated with the page itself. Try to discard it | ||
2038 | * and if we can't just make it zero. | ||
2039 | */ | ||
2040 | if (rb_event_index(event)) { | ||
2041 | event->time_delta = *delta & TS_MASK; | ||
2042 | event->array[0] = *delta >> TS_SHIFT; | ||
2043 | } else { | ||
2044 | /* try to discard, since we do not need this */ | ||
2045 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2046 | /* nope, just zero it */ | ||
2047 | event->time_delta = 0; | ||
2048 | event->array[0] = 0; | ||
2049 | } | ||
2050 | } | ||
2051 | cpu_buffer->write_stamp = *ts; | ||
2052 | /* let the caller know this was the commit */ | ||
2053 | ret = 1; | ||
2054 | } else { | ||
2055 | /* Try to discard the event */ | ||
2056 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2057 | /* Darn, this is just wasted space */ | ||
2058 | event->time_delta = 0; | ||
2059 | event->array[0] = 0; | ||
2060 | } | ||
2061 | ret = 0; | ||
2062 | } | ||
2063 | |||
2064 | *delta = 0; | ||
2065 | |||
2066 | return ret; | ||
2067 | } | ||
2068 | |||
2069 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2082 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2070 | { | 2083 | { |
2071 | local_inc(&cpu_buffer->committing); | 2084 | local_inc(&cpu_buffer->committing); |
2072 | local_inc(&cpu_buffer->commits); | 2085 | local_inc(&cpu_buffer->commits); |
2073 | } | 2086 | } |
2074 | 2087 | ||
2075 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2088 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2076 | { | 2089 | { |
2077 | unsigned long commits; | 2090 | unsigned long commits; |
2078 | 2091 | ||
@@ -2110,9 +2123,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2110 | unsigned long length) | 2123 | unsigned long length) |
2111 | { | 2124 | { |
2112 | struct ring_buffer_event *event; | 2125 | struct ring_buffer_event *event; |
2113 | u64 ts, delta = 0; | 2126 | u64 ts, delta; |
2114 | int commit = 0; | ||
2115 | int nr_loops = 0; | 2127 | int nr_loops = 0; |
2128 | int add_timestamp; | ||
2129 | u64 diff; | ||
2116 | 2130 | ||
2117 | rb_start_commit(cpu_buffer); | 2131 | rb_start_commit(cpu_buffer); |
2118 | 2132 | ||
@@ -2133,6 +2147,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2133 | 2147 | ||
2134 | length = rb_calculate_event_length(length); | 2148 | length = rb_calculate_event_length(length); |
2135 | again: | 2149 | again: |
2150 | add_timestamp = 0; | ||
2151 | delta = 0; | ||
2152 | |||
2136 | /* | 2153 | /* |
2137 | * We allow for interrupts to reenter here and do a trace. | 2154 | * We allow for interrupts to reenter here and do a trace. |
2138 | * If one does, it will cause this original code to loop | 2155 | * If one does, it will cause this original code to loop |
@@ -2146,56 +2163,40 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2146 | goto out_fail; | 2163 | goto out_fail; |
2147 | 2164 | ||
2148 | ts = rb_time_stamp(cpu_buffer->buffer); | 2165 | ts = rb_time_stamp(cpu_buffer->buffer); |
2166 | diff = ts - cpu_buffer->write_stamp; | ||
2149 | 2167 | ||
2150 | /* | 2168 | /* make sure this diff is calculated here */ |
2151 | * Only the first commit can update the timestamp. | 2169 | barrier(); |
2152 | * Yes there is a race here. If an interrupt comes in | ||
2153 | * just after the conditional and it traces too, then it | ||
2154 | * will also check the deltas. More than one timestamp may | ||
2155 | * also be made. But only the entry that did the actual | ||
2156 | * commit will be something other than zero. | ||
2157 | */ | ||
2158 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && | ||
2159 | rb_page_write(cpu_buffer->tail_page) == | ||
2160 | rb_commit_index(cpu_buffer))) { | ||
2161 | u64 diff; | ||
2162 | |||
2163 | diff = ts - cpu_buffer->write_stamp; | ||
2164 | |||
2165 | /* make sure this diff is calculated here */ | ||
2166 | barrier(); | ||
2167 | |||
2168 | /* Did the write stamp get updated already? */ | ||
2169 | if (unlikely(ts < cpu_buffer->write_stamp)) | ||
2170 | goto get_event; | ||
2171 | 2170 | ||
2171 | /* Did the write stamp get updated already? */ | ||
2172 | if (likely(ts >= cpu_buffer->write_stamp)) { | ||
2172 | delta = diff; | 2173 | delta = diff; |
2173 | if (unlikely(test_time_stamp(delta))) { | 2174 | if (unlikely(test_time_stamp(delta))) { |
2174 | 2175 | int local_clock_stable = 1; | |
2175 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2176 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
2176 | if (commit == -EBUSY) | 2177 | local_clock_stable = sched_clock_stable; |
2177 | goto out_fail; | 2178 | #endif |
2178 | 2179 | WARN_ONCE(delta > (1ULL << 59), | |
2179 | if (commit == -EAGAIN) | 2180 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", |
2180 | goto again; | 2181 | (unsigned long long)delta, |
2181 | 2182 | (unsigned long long)ts, | |
2182 | RB_WARN_ON(cpu_buffer, commit < 0); | 2183 | (unsigned long long)cpu_buffer->write_stamp, |
2184 | local_clock_stable ? "" : | ||
2185 | "If you just came from a suspend/resume,\n" | ||
2186 | "please switch to the trace global clock:\n" | ||
2187 | " echo global > /sys/kernel/debug/tracing/trace_clock\n"); | ||
2188 | add_timestamp = 1; | ||
2183 | } | 2189 | } |
2184 | } | 2190 | } |
2185 | 2191 | ||
2186 | get_event: | 2192 | event = __rb_reserve_next(cpu_buffer, length, ts, |
2187 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); | 2193 | delta, add_timestamp); |
2188 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2194 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
2189 | goto again; | 2195 | goto again; |
2190 | 2196 | ||
2191 | if (!event) | 2197 | if (!event) |
2192 | goto out_fail; | 2198 | goto out_fail; |
2193 | 2199 | ||
2194 | if (!rb_event_is_commit(cpu_buffer, event)) | ||
2195 | delta = 0; | ||
2196 | |||
2197 | event->time_delta = delta; | ||
2198 | |||
2199 | return event; | 2200 | return event; |
2200 | 2201 | ||
2201 | out_fail: | 2202 | out_fail: |
@@ -2207,32 +2208,39 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2207 | 2208 | ||
2208 | #define TRACE_RECURSIVE_DEPTH 16 | 2209 | #define TRACE_RECURSIVE_DEPTH 16 |
2209 | 2210 | ||
2210 | static int trace_recursive_lock(void) | 2211 | /* Keep this code out of the fast path cache */ |
2212 | static noinline void trace_recursive_fail(void) | ||
2211 | { | 2213 | { |
2212 | current->trace_recursion++; | ||
2213 | |||
2214 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2215 | return 0; | ||
2216 | |||
2217 | /* Disable all tracing before we do anything else */ | 2214 | /* Disable all tracing before we do anything else */ |
2218 | tracing_off_permanent(); | 2215 | tracing_off_permanent(); |
2219 | 2216 | ||
2220 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | 2217 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" |
2221 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", | 2218 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", |
2222 | current->trace_recursion, | 2219 | trace_recursion_buffer(), |
2223 | hardirq_count() >> HARDIRQ_SHIFT, | 2220 | hardirq_count() >> HARDIRQ_SHIFT, |
2224 | softirq_count() >> SOFTIRQ_SHIFT, | 2221 | softirq_count() >> SOFTIRQ_SHIFT, |
2225 | in_nmi()); | 2222 | in_nmi()); |
2226 | 2223 | ||
2227 | WARN_ON_ONCE(1); | 2224 | WARN_ON_ONCE(1); |
2225 | } | ||
2226 | |||
2227 | static inline int trace_recursive_lock(void) | ||
2228 | { | ||
2229 | trace_recursion_inc(); | ||
2230 | |||
2231 | if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) | ||
2232 | return 0; | ||
2233 | |||
2234 | trace_recursive_fail(); | ||
2235 | |||
2228 | return -1; | 2236 | return -1; |
2229 | } | 2237 | } |
2230 | 2238 | ||
2231 | static void trace_recursive_unlock(void) | 2239 | static inline void trace_recursive_unlock(void) |
2232 | { | 2240 | { |
2233 | WARN_ON_ONCE(!current->trace_recursion); | 2241 | WARN_ON_ONCE(!trace_recursion_buffer()); |
2234 | 2242 | ||
2235 | current->trace_recursion--; | 2243 | trace_recursion_dec(); |
2236 | } | 2244 | } |
2237 | 2245 | ||
2238 | #else | 2246 | #else |
@@ -2308,12 +2316,28 @@ static void | |||
2308 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 2316 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
2309 | struct ring_buffer_event *event) | 2317 | struct ring_buffer_event *event) |
2310 | { | 2318 | { |
2319 | u64 delta; | ||
2320 | |||
2311 | /* | 2321 | /* |
2312 | * The event first in the commit queue updates the | 2322 | * The event first in the commit queue updates the |
2313 | * time stamp. | 2323 | * time stamp. |
2314 | */ | 2324 | */ |
2315 | if (rb_event_is_commit(cpu_buffer, event)) | 2325 | if (rb_event_is_commit(cpu_buffer, event)) { |
2316 | cpu_buffer->write_stamp += event->time_delta; | 2326 | /* |
2327 | * A commit event that is first on a page | ||
2328 | * updates the write timestamp with the page stamp | ||
2329 | */ | ||
2330 | if (!rb_event_index(event)) | ||
2331 | cpu_buffer->write_stamp = | ||
2332 | cpu_buffer->commit_page->page->time_stamp; | ||
2333 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
2334 | delta = event->array[0]; | ||
2335 | delta <<= TS_SHIFT; | ||
2336 | delta += event->time_delta; | ||
2337 | cpu_buffer->write_stamp += delta; | ||
2338 | } else | ||
2339 | cpu_buffer->write_stamp += event->time_delta; | ||
2340 | } | ||
2317 | } | 2341 | } |
2318 | 2342 | ||
2319 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2343 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -2353,6 +2377,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | |||
2353 | 2377 | ||
2354 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2378 | static inline void rb_event_discard(struct ring_buffer_event *event) |
2355 | { | 2379 | { |
2380 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
2381 | event = skip_time_extend(event); | ||
2382 | |||
2356 | /* array[0] holds the actual length for the discarded event */ | 2383 | /* array[0] holds the actual length for the discarded event */ |
2357 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2384 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
2358 | event->type_len = RINGBUF_TYPE_PADDING; | 2385 | event->type_len = RINGBUF_TYPE_PADDING; |
@@ -2606,6 +2633,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | |||
2606 | } | 2633 | } |
2607 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | 2634 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); |
2608 | 2635 | ||
2636 | /* | ||
2637 | * The total entries in the ring buffer is the running counter | ||
2638 | * of entries entered into the ring buffer, minus the sum of | ||
2639 | * the entries read from the ring buffer and the number of | ||
2640 | * entries that were overwritten. | ||
2641 | */ | ||
2642 | static inline unsigned long | ||
2643 | rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) | ||
2644 | { | ||
2645 | return local_read(&cpu_buffer->entries) - | ||
2646 | (local_read(&cpu_buffer->overrun) + cpu_buffer->read); | ||
2647 | } | ||
2648 | |||
2609 | /** | 2649 | /** |
2610 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 2650 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
2611 | * @buffer: The ring buffer | 2651 | * @buffer: The ring buffer |
@@ -2614,16 +2654,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | |||
2614 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 2654 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
2615 | { | 2655 | { |
2616 | struct ring_buffer_per_cpu *cpu_buffer; | 2656 | struct ring_buffer_per_cpu *cpu_buffer; |
2617 | unsigned long ret; | ||
2618 | 2657 | ||
2619 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2658 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2620 | return 0; | 2659 | return 0; |
2621 | 2660 | ||
2622 | cpu_buffer = buffer->buffers[cpu]; | 2661 | cpu_buffer = buffer->buffers[cpu]; |
2623 | ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) | ||
2624 | - cpu_buffer->read; | ||
2625 | 2662 | ||
2626 | return ret; | 2663 | return rb_num_of_entries(cpu_buffer); |
2627 | } | 2664 | } |
2628 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 2665 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
2629 | 2666 | ||
@@ -2684,8 +2721,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
2684 | /* if you care about this being correct, lock the buffer */ | 2721 | /* if you care about this being correct, lock the buffer */ |
2685 | for_each_buffer_cpu(buffer, cpu) { | 2722 | for_each_buffer_cpu(buffer, cpu) { |
2686 | cpu_buffer = buffer->buffers[cpu]; | 2723 | cpu_buffer = buffer->buffers[cpu]; |
2687 | entries += (local_read(&cpu_buffer->entries) - | 2724 | entries += rb_num_of_entries(cpu_buffer); |
2688 | local_read(&cpu_buffer->overrun)) - cpu_buffer->read; | ||
2689 | } | 2725 | } |
2690 | 2726 | ||
2691 | return entries; | 2727 | return entries; |
@@ -2896,7 +2932,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2896 | /* | 2932 | /* |
2897 | * cpu_buffer->pages just needs to point to the buffer, it | 2933 | * cpu_buffer->pages just needs to point to the buffer, it |
2898 | * has no specific buffer page to point to. Lets move it out | 2934 | * has no specific buffer page to point to. Lets move it out |
2899 | * of our way so we don't accidently swap it. | 2935 | * of our way so we don't accidentally swap it. |
2900 | */ | 2936 | */ |
2901 | cpu_buffer->pages = reader->list.prev; | 2937 | cpu_buffer->pages = reader->list.prev; |
2902 | 2938 | ||
@@ -3040,12 +3076,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | |||
3040 | 3076 | ||
3041 | again: | 3077 | again: |
3042 | /* | 3078 | /* |
3043 | * We repeat when a timestamp is encountered. It is possible | 3079 | * We repeat when a time extend is encountered. |
3044 | * to get multiple timestamps from an interrupt entering just | 3080 | * Since the time extend is always attached to a data event, |
3045 | * as one timestamp is about to be written, or from discarded | 3081 | * we should never loop more than once. |
3046 | * commits. The most that we can have is the number on a single page. | 3082 | * (We never hit the following condition more than twice). |
3047 | */ | 3083 | */ |
3048 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3084 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3049 | return NULL; | 3085 | return NULL; |
3050 | 3086 | ||
3051 | reader = rb_get_reader_page(cpu_buffer); | 3087 | reader = rb_get_reader_page(cpu_buffer); |
@@ -3121,14 +3157,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3121 | return NULL; | 3157 | return NULL; |
3122 | 3158 | ||
3123 | /* | 3159 | /* |
3124 | * We repeat when a timestamp is encountered. | 3160 | * We repeat when a time extend is encountered. |
3125 | * We can get multiple timestamps by nested interrupts or also | 3161 | * Since the time extend is always attached to a data event, |
3126 | * if filtering is on (discarding commits). Since discarding | 3162 | * we should never loop more than once. |
3127 | * commits can be frequent we can get a lot of timestamps. | 3163 | * (We never hit the following condition more than twice). |
3128 | * But we limit them by not adding timestamps if they begin | ||
3129 | * at the start of a page. | ||
3130 | */ | 3164 | */ |
3131 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3165 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3132 | return NULL; | 3166 | return NULL; |
3133 | 3167 | ||
3134 | if (rb_per_cpu_empty(cpu_buffer)) | 3168 | if (rb_per_cpu_empty(cpu_buffer)) |
@@ -3826,7 +3860,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3826 | if (len > (commit - read)) | 3860 | if (len > (commit - read)) |
3827 | len = (commit - read); | 3861 | len = (commit - read); |
3828 | 3862 | ||
3829 | size = rb_event_length(event); | 3863 | /* Always keep the time extend and data together */ |
3864 | size = rb_event_ts_length(event); | ||
3830 | 3865 | ||
3831 | if (len < size) | 3866 | if (len < size) |
3832 | goto out_unlock; | 3867 | goto out_unlock; |
@@ -3836,6 +3871,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3836 | 3871 | ||
3837 | /* Need to copy one event at a time */ | 3872 | /* Need to copy one event at a time */ |
3838 | do { | 3873 | do { |
3874 | /* We need the size of one event, because | ||
3875 | * rb_advance_reader only advances by one event, | ||
3876 | * whereas rb_event_ts_length may include the size of | ||
3877 | * one or two events. | ||
3878 | * We have already ensured there's enough space if this | ||
3879 | * is a time extend. */ | ||
3880 | size = rb_event_length(event); | ||
3839 | memcpy(bpage->data + pos, rpage->data + rpos, size); | 3881 | memcpy(bpage->data + pos, rpage->data + rpos, size); |
3840 | 3882 | ||
3841 | len -= size; | 3883 | len -= size; |
@@ -3848,8 +3890,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3848 | break; | 3890 | break; |
3849 | 3891 | ||
3850 | event = rb_reader_event(cpu_buffer); | 3892 | event = rb_reader_event(cpu_buffer); |
3851 | size = rb_event_length(event); | 3893 | /* Always keep the time extend and data together */ |
3852 | } while (len > size); | 3894 | size = rb_event_ts_length(event); |
3895 | } while (len >= size); | ||
3853 | 3896 | ||
3854 | /* update bpage */ | 3897 | /* update bpage */ |
3855 | local_set(&bpage->commit, pos); | 3898 | local_set(&bpage->commit, pos); |
@@ -3965,6 +4008,7 @@ static const struct file_operations rb_simple_fops = { | |||
3965 | .open = tracing_open_generic, | 4008 | .open = tracing_open_generic, |
3966 | .read = rb_simple_read, | 4009 | .read = rb_simple_read, |
3967 | .write = rb_simple_write, | 4010 | .write = rb_simple_write, |
4011 | .llseek = default_llseek, | ||
3968 | }; | 4012 | }; |
3969 | 4013 | ||
3970 | 4014 | ||