aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c266
1 files changed, 142 insertions, 124 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3af77cd47f2..f50f43107e9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@ enum {
224 RB_LEN_TIME_STAMP = 16, 224 RB_LEN_TIME_STAMP = 16,
225}; 225};
226 226
227#define skip_time_extend(event) \
228 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
229
227static inline int rb_null_event(struct ring_buffer_event *event) 230static inline int rb_null_event(struct ring_buffer_event *event)
228{ 231{
229 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 232 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
248 return length + RB_EVNT_HDR_SIZE; 251 return length + RB_EVNT_HDR_SIZE;
249} 252}
250 253
251/* inline for ring buffer fast paths */ 254/*
252static unsigned 255 * Return the length of the given event. Will return
256 * the length of the time extend if the event is a
257 * time extend.
258 */
259static inline unsigned
253rb_event_length(struct ring_buffer_event *event) 260rb_event_length(struct ring_buffer_event *event)
254{ 261{
255 switch (event->type_len) { 262 switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
274 return 0; 281 return 0;
275} 282}
276 283
284/*
285 * Return total length of time extend and data,
286 * or just the event length for all other events.
287 */
288static inline unsigned
289rb_event_ts_length(struct ring_buffer_event *event)
290{
291 unsigned len = 0;
292
293 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
294 /* time extends include the data event after it */
295 len = RB_LEN_TIME_EXTEND;
296 event = skip_time_extend(event);
297 }
298 return len + rb_event_length(event);
299}
300
277/** 301/**
278 * ring_buffer_event_length - return the length of the event 302 * ring_buffer_event_length - return the length of the event
279 * @event: the event to get the length of 303 * @event: the event to get the length of
304 *
305 * Returns the size of the data load of a data event.
306 * If the event is something other than a data event, it
307 * returns the size of the event itself. With the exception
308 * of a TIME EXTEND, where it still returns the size of the
309 * data load of the data event after it.
280 */ 310 */
281unsigned ring_buffer_event_length(struct ring_buffer_event *event) 311unsigned ring_buffer_event_length(struct ring_buffer_event *event)
282{ 312{
283 unsigned length = rb_event_length(event); 313 unsigned length;
314
315 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
316 event = skip_time_extend(event);
317
318 length = rb_event_length(event);
284 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 319 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
285 return length; 320 return length;
286 length -= RB_EVNT_HDR_SIZE; 321 length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
294static void * 329static void *
295rb_event_data(struct ring_buffer_event *event) 330rb_event_data(struct ring_buffer_event *event)
296{ 331{
332 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
333 event = skip_time_extend(event);
297 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 334 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
298 /* If length is in len field, then array[0] has the data */ 335 /* If length is in len field, then array[0] has the data */
299 if (event->type_len) 336 if (event->type_len)
@@ -1546,6 +1583,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1546 iter->head = 0; 1583 iter->head = 0;
1547} 1584}
1548 1585
1586/* Slow path, do not inline */
1587static noinline struct ring_buffer_event *
1588rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1589{
1590 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1591
1592 /* Not the first event on the page? */
1593 if (rb_event_index(event)) {
1594 event->time_delta = delta & TS_MASK;
1595 event->array[0] = delta >> TS_SHIFT;
1596 } else {
1597 /* nope, just zero it */
1598 event->time_delta = 0;
1599 event->array[0] = 0;
1600 }
1601
1602 return skip_time_extend(event);
1603}
1604
1549/** 1605/**
1550 * ring_buffer_update_event - update event type and data 1606 * ring_buffer_update_event - update event type and data
1551 * @event: the even to update 1607 * @event: the even to update
@@ -1558,28 +1614,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1558 * data field. 1614 * data field.
1559 */ 1615 */
1560static void 1616static void
1561rb_update_event(struct ring_buffer_event *event, 1617rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1562 unsigned type, unsigned length) 1618 struct ring_buffer_event *event, unsigned length,
1619 int add_timestamp, u64 delta)
1563{ 1620{
1564 event->type_len = type; 1621 /* Only a commit updates the timestamp */
1565 1622 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1566 switch (type) { 1623 delta = 0;
1567
1568 case RINGBUF_TYPE_PADDING:
1569 case RINGBUF_TYPE_TIME_EXTEND:
1570 case RINGBUF_TYPE_TIME_STAMP:
1571 break;
1572 1624
1573 case 0: 1625 /*
1574 length -= RB_EVNT_HDR_SIZE; 1626 * If we need to add a timestamp, then we
1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) 1627 * add it to the start of the resevered space.
1576 event->array[0] = length; 1628 */
1577 else 1629 if (unlikely(add_timestamp)) {
1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1630 event = rb_add_time_stamp(event, delta);
1579 break; 1631 length -= RB_LEN_TIME_EXTEND;
1580 default: 1632 delta = 0;
1581 BUG();
1582 } 1633 }
1634
1635 event->time_delta = delta;
1636 length -= RB_EVNT_HDR_SIZE;
1637 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1638 event->type_len = 0;
1639 event->array[0] = length;
1640 } else
1641 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1583} 1642}
1584 1643
1585/* 1644/*
@@ -1932,12 +1991,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1932 1991
1933static struct ring_buffer_event * 1992static struct ring_buffer_event *
1934__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1993__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1935 unsigned type, unsigned long length, u64 ts) 1994 unsigned long length, u64 ts,
1995 u64 delta, int add_timestamp)
1936{ 1996{
1937 struct buffer_page *tail_page; 1997 struct buffer_page *tail_page;
1938 struct ring_buffer_event *event; 1998 struct ring_buffer_event *event;
1939 unsigned long tail, write; 1999 unsigned long tail, write;
1940 2000
2001 /*
2002 * If the time delta since the last event is too big to
2003 * hold in the time field of the event, then we append a
2004 * TIME EXTEND event ahead of the data event.
2005 */
2006 if (unlikely(add_timestamp))
2007 length += RB_LEN_TIME_EXTEND;
2008
1941 tail_page = cpu_buffer->tail_page; 2009 tail_page = cpu_buffer->tail_page;
1942 write = local_add_return(length, &tail_page->write); 2010 write = local_add_return(length, &tail_page->write);
1943 2011
@@ -1954,11 +2022,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1954 2022
1955 event = __rb_page_index(tail_page, tail); 2023 event = __rb_page_index(tail_page, tail);
1956 kmemcheck_annotate_bitfield(event, bitfield); 2024 kmemcheck_annotate_bitfield(event, bitfield);
1957 rb_update_event(event, type, length); 2025 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1958 2026
1959 /* The passed in type is zero for DATA */ 2027 local_inc(&tail_page->entries);
1960 if (likely(!type))
1961 local_inc(&tail_page->entries);
1962 2028
1963 /* 2029 /*
1964 * If this is the first commit on the page, then update 2030 * If this is the first commit on the page, then update
@@ -1980,7 +2046,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1980 unsigned long addr; 2046 unsigned long addr;
1981 2047
1982 new_index = rb_event_index(event); 2048 new_index = rb_event_index(event);
1983 old_index = new_index + rb_event_length(event); 2049 old_index = new_index + rb_event_ts_length(event);
1984 addr = (unsigned long)event; 2050 addr = (unsigned long)event;
1985 addr &= PAGE_MASK; 2051 addr &= PAGE_MASK;
1986 2052
@@ -2006,67 +2072,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2006 return 0; 2072 return 0;
2007} 2073}
2008 2074
2009static int
2010rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2011 u64 ts, u64 delta)
2012{
2013 struct ring_buffer_event *event;
2014 int ret;
2015
2016 WARN_ONCE(delta > (1ULL << 59),
2017 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2018 (unsigned long long)delta,
2019 (unsigned long long)ts,
2020 (unsigned long long)cpu_buffer->write_stamp);
2021
2022 /*
2023 * The delta is too big, we to add a
2024 * new timestamp.
2025 */
2026 event = __rb_reserve_next(cpu_buffer,
2027 RINGBUF_TYPE_TIME_EXTEND,
2028 RB_LEN_TIME_EXTEND,
2029 ts);
2030 if (!event)
2031 return -EBUSY;
2032
2033 if (PTR_ERR(event) == -EAGAIN)
2034 return -EAGAIN;
2035
2036 /* Only a commited time event can update the write stamp */
2037 if (rb_event_is_commit(cpu_buffer, event)) {
2038 /*
2039 * If this is the first on the page, then it was
2040 * updated with the page itself. Try to discard it
2041 * and if we can't just make it zero.
2042 */
2043 if (rb_event_index(event)) {
2044 event->time_delta = delta & TS_MASK;
2045 event->array[0] = delta >> TS_SHIFT;
2046 } else {
2047 /* try to discard, since we do not need this */
2048 if (!rb_try_to_discard(cpu_buffer, event)) {
2049 /* nope, just zero it */
2050 event->time_delta = 0;
2051 event->array[0] = 0;
2052 }
2053 }
2054 cpu_buffer->write_stamp = ts;
2055 /* let the caller know this was the commit */
2056 ret = 1;
2057 } else {
2058 /* Try to discard the event */
2059 if (!rb_try_to_discard(cpu_buffer, event)) {
2060 /* Darn, this is just wasted space */
2061 event->time_delta = 0;
2062 event->array[0] = 0;
2063 }
2064 ret = 0;
2065 }
2066
2067 return ret;
2068}
2069
2070static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2075static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2071{ 2076{
2072 local_inc(&cpu_buffer->committing); 2077 local_inc(&cpu_buffer->committing);
@@ -2111,9 +2116,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2111 unsigned long length) 2116 unsigned long length)
2112{ 2117{
2113 struct ring_buffer_event *event; 2118 struct ring_buffer_event *event;
2114 u64 ts, delta = 0; 2119 u64 ts, delta;
2115 int commit = 0;
2116 int nr_loops = 0; 2120 int nr_loops = 0;
2121 int add_timestamp;
2117 2122
2118 rb_start_commit(cpu_buffer); 2123 rb_start_commit(cpu_buffer);
2119 2124
@@ -2134,6 +2139,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2134 2139
2135 length = rb_calculate_event_length(length); 2140 length = rb_calculate_event_length(length);
2136 again: 2141 again:
2142 add_timestamp = 0;
2143 delta = 0;
2144
2137 /* 2145 /*
2138 * We allow for interrupts to reenter here and do a trace. 2146 * We allow for interrupts to reenter here and do a trace.
2139 * If one does, it will cause this original code to loop 2147 * If one does, it will cause this original code to loop
@@ -2172,33 +2180,24 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2172 2180
2173 delta = diff; 2181 delta = diff;
2174 if (unlikely(test_time_stamp(delta))) { 2182 if (unlikely(test_time_stamp(delta))) {
2175 2183 WARN_ONCE(delta > (1ULL << 59),
2176 commit = rb_add_time_stamp(cpu_buffer, ts, delta); 2184 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2177 delta = 0; 2185 (unsigned long long)delta,
2178 2186 (unsigned long long)ts,
2179 if (commit == -EBUSY) 2187 (unsigned long long)cpu_buffer->write_stamp);
2180 goto out_fail; 2188 add_timestamp = 1;
2181
2182 if (commit == -EAGAIN)
2183 goto again;
2184
2185 RB_WARN_ON(cpu_buffer, commit < 0);
2186 } 2189 }
2187 } 2190 }
2188 2191
2189 get_event: 2192 get_event:
2190 event = __rb_reserve_next(cpu_buffer, 0, length, ts); 2193 event = __rb_reserve_next(cpu_buffer, length, ts,
2194 delta, add_timestamp);
2191 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2195 if (unlikely(PTR_ERR(event) == -EAGAIN))
2192 goto again; 2196 goto again;
2193 2197
2194 if (!event) 2198 if (!event)
2195 goto out_fail; 2199 goto out_fail;
2196 2200
2197 if (!rb_event_is_commit(cpu_buffer, event))
2198 delta = 0;
2199
2200 event->time_delta = delta;
2201
2202 return event; 2201 return event;
2203 2202
2204 out_fail: 2203 out_fail:
@@ -2311,12 +2310,28 @@ static void
2311rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2310rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2312 struct ring_buffer_event *event) 2311 struct ring_buffer_event *event)
2313{ 2312{
2313 u64 delta;
2314
2314 /* 2315 /*
2315 * The event first in the commit queue updates the 2316 * The event first in the commit queue updates the
2316 * time stamp. 2317 * time stamp.
2317 */ 2318 */
2318 if (rb_event_is_commit(cpu_buffer, event)) 2319 if (rb_event_is_commit(cpu_buffer, event)) {
2319 cpu_buffer->write_stamp += event->time_delta; 2320 /*
2321 * A commit event that is first on a page
2322 * updates the write timestamp with the page stamp
2323 */
2324 if (!rb_event_index(event))
2325 cpu_buffer->write_stamp =
2326 cpu_buffer->commit_page->page->time_stamp;
2327 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2328 delta = event->array[0];
2329 delta <<= TS_SHIFT;
2330 delta += event->time_delta;
2331 cpu_buffer->write_stamp += delta;
2332 } else
2333 cpu_buffer->write_stamp += event->time_delta;
2334 }
2320} 2335}
2321 2336
2322static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2337static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2356,6 +2371,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2356 2371
2357static inline void rb_event_discard(struct ring_buffer_event *event) 2372static inline void rb_event_discard(struct ring_buffer_event *event)
2358{ 2373{
2374 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2375 event = skip_time_extend(event);
2376
2359 /* array[0] holds the actual length for the discarded event */ 2377 /* array[0] holds the actual length for the discarded event */
2360 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2378 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2361 event->type_len = RINGBUF_TYPE_PADDING; 2379 event->type_len = RINGBUF_TYPE_PADDING;
@@ -3043,12 +3061,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3043 3061
3044 again: 3062 again:
3045 /* 3063 /*
3046 * We repeat when a timestamp is encountered. It is possible 3064 * We repeat when a time extend is encountered.
3047 * to get multiple timestamps from an interrupt entering just 3065 * Since the time extend is always attached to a data event,
3048 * as one timestamp is about to be written, or from discarded 3066 * we should never loop more than once.
3049 * commits. The most that we can have is the number on a single page. 3067 * (We never hit the following condition more than twice).
3050 */ 3068 */
3051 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3069 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3052 return NULL; 3070 return NULL;
3053 3071
3054 reader = rb_get_reader_page(cpu_buffer); 3072 reader = rb_get_reader_page(cpu_buffer);
@@ -3124,14 +3142,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3124 return NULL; 3142 return NULL;
3125 3143
3126 /* 3144 /*
3127 * We repeat when a timestamp is encountered. 3145 * We repeat when a time extend is encountered.
3128 * We can get multiple timestamps by nested interrupts or also 3146 * Since the time extend is always attached to a data event,
3129 * if filtering is on (discarding commits). Since discarding 3147 * we should never loop more than once.
3130 * commits can be frequent we can get a lot of timestamps. 3148 * (We never hit the following condition more than twice).
3131 * But we limit them by not adding timestamps if they begin
3132 * at the start of a page.
3133 */ 3149 */
3134 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3150 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3135 return NULL; 3151 return NULL;
3136 3152
3137 if (rb_per_cpu_empty(cpu_buffer)) 3153 if (rb_per_cpu_empty(cpu_buffer))
@@ -3829,7 +3845,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3829 if (len > (commit - read)) 3845 if (len > (commit - read))
3830 len = (commit - read); 3846 len = (commit - read);
3831 3847
3832 size = rb_event_length(event); 3848 /* Always keep the time extend and data together */
3849 size = rb_event_ts_length(event);
3833 3850
3834 if (len < size) 3851 if (len < size)
3835 goto out_unlock; 3852 goto out_unlock;
@@ -3851,7 +3868,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3851 break; 3868 break;
3852 3869
3853 event = rb_reader_event(cpu_buffer); 3870 event = rb_reader_event(cpu_buffer);
3854 size = rb_event_length(event); 3871 /* Always keep the time extend and data together */
3872 size = rb_event_ts_length(event);
3855 } while (len > size); 3873 } while (len > size);
3856 3874
3857 /* update bpage */ 3875 /* update bpage */