diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 655 |
1 files changed, 417 insertions, 238 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index edefe3b2801b..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -14,12 +14,14 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/slab.h> | ||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/hash.h> | 19 | #include <linux/hash.h> |
19 | #include <linux/list.h> | 20 | #include <linux/list.h> |
20 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> |
21 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
22 | 23 | ||
24 | #include <asm/local.h> | ||
23 | #include "trace.h" | 25 | #include "trace.h" |
24 | 26 | ||
25 | /* | 27 | /* |
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
206 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
207 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | 209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ |
208 | 210 | ||
211 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | ||
212 | # define RB_FORCE_8BYTE_ALIGNMENT 0 | ||
213 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT | ||
214 | #else | ||
215 | # define RB_FORCE_8BYTE_ALIGNMENT 1 | ||
216 | # define RB_ARCH_ALIGNMENT 8U | ||
217 | #endif | ||
218 | |||
209 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 219 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
210 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 220 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
211 | 221 | ||
@@ -214,6 +224,9 @@ enum { | |||
214 | RB_LEN_TIME_STAMP = 16, | 224 | RB_LEN_TIME_STAMP = 16, |
215 | }; | 225 | }; |
216 | 226 | ||
227 | #define skip_time_extend(event) \ | ||
228 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | ||
229 | |||
217 | static inline int rb_null_event(struct ring_buffer_event *event) | 230 | static inline int rb_null_event(struct ring_buffer_event *event) |
218 | { | 231 | { |
219 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 232 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
@@ -238,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
238 | return length + RB_EVNT_HDR_SIZE; | 251 | return length + RB_EVNT_HDR_SIZE; |
239 | } | 252 | } |
240 | 253 | ||
241 | /* inline for ring buffer fast paths */ | 254 | /* |
242 | static unsigned | 255 | * Return the length of the given event. Will return |
256 | * the length of the time extend if the event is a | ||
257 | * time extend. | ||
258 | */ | ||
259 | static inline unsigned | ||
243 | rb_event_length(struct ring_buffer_event *event) | 260 | rb_event_length(struct ring_buffer_event *event) |
244 | { | 261 | { |
245 | switch (event->type_len) { | 262 | switch (event->type_len) { |
@@ -264,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) | |||
264 | return 0; | 281 | return 0; |
265 | } | 282 | } |
266 | 283 | ||
284 | /* | ||
285 | * Return total length of time extend and data, | ||
286 | * or just the event length for all other events. | ||
287 | */ | ||
288 | static inline unsigned | ||
289 | rb_event_ts_length(struct ring_buffer_event *event) | ||
290 | { | ||
291 | unsigned len = 0; | ||
292 | |||
293 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
294 | /* time extends include the data event after it */ | ||
295 | len = RB_LEN_TIME_EXTEND; | ||
296 | event = skip_time_extend(event); | ||
297 | } | ||
298 | return len + rb_event_length(event); | ||
299 | } | ||
300 | |||
267 | /** | 301 | /** |
268 | * ring_buffer_event_length - return the length of the event | 302 | * ring_buffer_event_length - return the length of the event |
269 | * @event: the event to get the length of | 303 | * @event: the event to get the length of |
304 | * | ||
305 | * Returns the size of the data load of a data event. | ||
306 | * If the event is something other than a data event, it | ||
307 | * returns the size of the event itself. With the exception | ||
308 | * of a TIME EXTEND, where it still returns the size of the | ||
309 | * data load of the data event after it. | ||
270 | */ | 310 | */ |
271 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 311 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
272 | { | 312 | { |
273 | unsigned length = rb_event_length(event); | 313 | unsigned length; |
314 | |||
315 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
316 | event = skip_time_extend(event); | ||
317 | |||
318 | length = rb_event_length(event); | ||
274 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 319 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
275 | return length; | 320 | return length; |
276 | length -= RB_EVNT_HDR_SIZE; | 321 | length -= RB_EVNT_HDR_SIZE; |
@@ -284,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
284 | static void * | 329 | static void * |
285 | rb_event_data(struct ring_buffer_event *event) | 330 | rb_event_data(struct ring_buffer_event *event) |
286 | { | 331 | { |
332 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
333 | event = skip_time_extend(event); | ||
287 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 334 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
288 | /* If length is in len field, then array[0] has the data */ | 335 | /* If length is in len field, then array[0] has the data */ |
289 | if (event->type_len) | 336 | if (event->type_len) |
@@ -309,6 +356,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
309 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 356 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
310 | #define TS_DELTA_TEST (~TS_MASK) | 357 | #define TS_DELTA_TEST (~TS_MASK) |
311 | 358 | ||
359 | /* Flag when events were overwritten */ | ||
360 | #define RB_MISSED_EVENTS (1 << 31) | ||
361 | /* Missed count stored at end */ | ||
362 | #define RB_MISSED_STORED (1 << 30) | ||
363 | |||
312 | struct buffer_data_page { | 364 | struct buffer_data_page { |
313 | u64 time_stamp; /* page time stamp */ | 365 | u64 time_stamp; /* page time stamp */ |
314 | local_t commit; /* write committed index */ | 366 | local_t commit; /* write committed index */ |
@@ -328,6 +380,7 @@ struct buffer_page { | |||
328 | local_t write; /* index for next write */ | 380 | local_t write; /* index for next write */ |
329 | unsigned read; /* index for next read */ | 381 | unsigned read; /* index for next read */ |
330 | local_t entries; /* entries on this page */ | 382 | local_t entries; /* entries on this page */ |
383 | unsigned long real_end; /* real end of data */ | ||
331 | struct buffer_data_page *page; /* Actual data page */ | 384 | struct buffer_data_page *page; /* Actual data page */ |
332 | }; | 385 | }; |
333 | 386 | ||
@@ -388,9 +441,6 @@ static inline int test_time_stamp(u64 delta) | |||
388 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 441 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
389 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 442 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
390 | 443 | ||
391 | /* Max number of timestamps that can fit on a page */ | ||
392 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP) | ||
393 | |||
394 | int ring_buffer_print_page_header(struct trace_seq *s) | 444 | int ring_buffer_print_page_header(struct trace_seq *s) |
395 | { | 445 | { |
396 | struct buffer_data_page field; | 446 | struct buffer_data_page field; |
@@ -407,6 +457,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
407 | (unsigned int)sizeof(field.commit), | 457 | (unsigned int)sizeof(field.commit), |
408 | (unsigned int)is_signed_type(long)); | 458 | (unsigned int)is_signed_type(long)); |
409 | 459 | ||
460 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
461 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
462 | (unsigned int)offsetof(typeof(field), commit), | ||
463 | 1, | ||
464 | (unsigned int)is_signed_type(long)); | ||
465 | |||
410 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 466 | ret = trace_seq_printf(s, "\tfield: char data;\t" |
411 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 467 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
412 | (unsigned int)offsetof(typeof(field), data), | 468 | (unsigned int)offsetof(typeof(field), data), |
@@ -421,6 +477,7 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
421 | */ | 477 | */ |
422 | struct ring_buffer_per_cpu { | 478 | struct ring_buffer_per_cpu { |
423 | int cpu; | 479 | int cpu; |
480 | atomic_t record_disabled; | ||
424 | struct ring_buffer *buffer; | 481 | struct ring_buffer *buffer; |
425 | spinlock_t reader_lock; /* serialize readers */ | 482 | spinlock_t reader_lock; /* serialize readers */ |
426 | arch_spinlock_t lock; | 483 | arch_spinlock_t lock; |
@@ -430,6 +487,8 @@ struct ring_buffer_per_cpu { | |||
430 | struct buffer_page *tail_page; /* write to tail */ | 487 | struct buffer_page *tail_page; /* write to tail */ |
431 | struct buffer_page *commit_page; /* committed pages */ | 488 | struct buffer_page *commit_page; /* committed pages */ |
432 | struct buffer_page *reader_page; | 489 | struct buffer_page *reader_page; |
490 | unsigned long lost_events; | ||
491 | unsigned long last_overrun; | ||
433 | local_t commit_overrun; | 492 | local_t commit_overrun; |
434 | local_t overrun; | 493 | local_t overrun; |
435 | local_t entries; | 494 | local_t entries; |
@@ -438,7 +497,6 @@ struct ring_buffer_per_cpu { | |||
438 | unsigned long read; | 497 | unsigned long read; |
439 | u64 write_stamp; | 498 | u64 write_stamp; |
440 | u64 read_stamp; | 499 | u64 read_stamp; |
441 | atomic_t record_disabled; | ||
442 | }; | 500 | }; |
443 | 501 | ||
444 | struct ring_buffer { | 502 | struct ring_buffer { |
@@ -464,6 +522,8 @@ struct ring_buffer_iter { | |||
464 | struct ring_buffer_per_cpu *cpu_buffer; | 522 | struct ring_buffer_per_cpu *cpu_buffer; |
465 | unsigned long head; | 523 | unsigned long head; |
466 | struct buffer_page *head_page; | 524 | struct buffer_page *head_page; |
525 | struct buffer_page *cache_reader_page; | ||
526 | unsigned long cache_read; | ||
467 | u64 read_stamp; | 527 | u64 read_stamp; |
468 | }; | 528 | }; |
469 | 529 | ||
@@ -1198,18 +1258,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
1198 | 1258 | ||
1199 | for (i = 0; i < nr_pages; i++) { | 1259 | for (i = 0; i < nr_pages; i++) { |
1200 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 1260 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
1201 | return; | 1261 | goto out; |
1202 | p = cpu_buffer->pages->next; | 1262 | p = cpu_buffer->pages->next; |
1203 | bpage = list_entry(p, struct buffer_page, list); | 1263 | bpage = list_entry(p, struct buffer_page, list); |
1204 | list_del_init(&bpage->list); | 1264 | list_del_init(&bpage->list); |
1205 | free_buffer_page(bpage); | 1265 | free_buffer_page(bpage); |
1206 | } | 1266 | } |
1207 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 1267 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
1208 | return; | 1268 | goto out; |
1209 | 1269 | ||
1210 | rb_reset_cpu(cpu_buffer); | 1270 | rb_reset_cpu(cpu_buffer); |
1211 | rb_check_pages(cpu_buffer); | 1271 | rb_check_pages(cpu_buffer); |
1212 | 1272 | ||
1273 | out: | ||
1213 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1274 | spin_unlock_irq(&cpu_buffer->reader_lock); |
1214 | } | 1275 | } |
1215 | 1276 | ||
@@ -1226,7 +1287,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1226 | 1287 | ||
1227 | for (i = 0; i < nr_pages; i++) { | 1288 | for (i = 0; i < nr_pages; i++) { |
1228 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 1289 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) |
1229 | return; | 1290 | goto out; |
1230 | p = pages->next; | 1291 | p = pages->next; |
1231 | bpage = list_entry(p, struct buffer_page, list); | 1292 | bpage = list_entry(p, struct buffer_page, list); |
1232 | list_del_init(&bpage->list); | 1293 | list_del_init(&bpage->list); |
@@ -1235,6 +1296,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1235 | rb_reset_cpu(cpu_buffer); | 1296 | rb_reset_cpu(cpu_buffer); |
1236 | rb_check_pages(cpu_buffer); | 1297 | rb_check_pages(cpu_buffer); |
1237 | 1298 | ||
1299 | out: | ||
1238 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1300 | spin_unlock_irq(&cpu_buffer->reader_lock); |
1239 | } | 1301 | } |
1240 | 1302 | ||
@@ -1518,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1518 | iter->head = 0; | 1580 | iter->head = 0; |
1519 | } | 1581 | } |
1520 | 1582 | ||
1583 | /* Slow path, do not inline */ | ||
1584 | static noinline struct ring_buffer_event * | ||
1585 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | ||
1586 | { | ||
1587 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
1588 | |||
1589 | /* Not the first event on the page? */ | ||
1590 | if (rb_event_index(event)) { | ||
1591 | event->time_delta = delta & TS_MASK; | ||
1592 | event->array[0] = delta >> TS_SHIFT; | ||
1593 | } else { | ||
1594 | /* nope, just zero it */ | ||
1595 | event->time_delta = 0; | ||
1596 | event->array[0] = 0; | ||
1597 | } | ||
1598 | |||
1599 | return skip_time_extend(event); | ||
1600 | } | ||
1601 | |||
1521 | /** | 1602 | /** |
1522 | * ring_buffer_update_event - update event type and data | 1603 | * ring_buffer_update_event - update event type and data |
1523 | * @event: the even to update | 1604 | * @event: the even to update |
@@ -1530,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1530 | * data field. | 1611 | * data field. |
1531 | */ | 1612 | */ |
1532 | static void | 1613 | static void |
1533 | rb_update_event(struct ring_buffer_event *event, | 1614 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
1534 | unsigned type, unsigned length) | 1615 | struct ring_buffer_event *event, unsigned length, |
1616 | int add_timestamp, u64 delta) | ||
1535 | { | 1617 | { |
1536 | event->type_len = type; | 1618 | /* Only a commit updates the timestamp */ |
1537 | 1619 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | |
1538 | switch (type) { | 1620 | delta = 0; |
1539 | |||
1540 | case RINGBUF_TYPE_PADDING: | ||
1541 | case RINGBUF_TYPE_TIME_EXTEND: | ||
1542 | case RINGBUF_TYPE_TIME_STAMP: | ||
1543 | break; | ||
1544 | 1621 | ||
1545 | case 0: | 1622 | /* |
1546 | length -= RB_EVNT_HDR_SIZE; | 1623 | * If we need to add a timestamp, then we |
1547 | if (length > RB_MAX_SMALL_DATA) | 1624 | * add it to the start of the resevered space. |
1548 | event->array[0] = length; | 1625 | */ |
1549 | else | 1626 | if (unlikely(add_timestamp)) { |
1550 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1627 | event = rb_add_time_stamp(event, delta); |
1551 | break; | 1628 | length -= RB_LEN_TIME_EXTEND; |
1552 | default: | 1629 | delta = 0; |
1553 | BUG(); | ||
1554 | } | 1630 | } |
1631 | |||
1632 | event->time_delta = delta; | ||
1633 | length -= RB_EVNT_HDR_SIZE; | ||
1634 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | ||
1635 | event->type_len = 0; | ||
1636 | event->array[0] = length; | ||
1637 | } else | ||
1638 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
1555 | } | 1639 | } |
1556 | 1640 | ||
1557 | /* | 1641 | /* |
@@ -1719,11 +1803,11 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1719 | if (!length) | 1803 | if (!length) |
1720 | length = 1; | 1804 | length = 1; |
1721 | 1805 | ||
1722 | if (length > RB_MAX_SMALL_DATA) | 1806 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) |
1723 | length += sizeof(event.array[0]); | 1807 | length += sizeof(event.array[0]); |
1724 | 1808 | ||
1725 | length += RB_EVNT_HDR_SIZE; | 1809 | length += RB_EVNT_HDR_SIZE; |
1726 | length = ALIGN(length, RB_ALIGNMENT); | 1810 | length = ALIGN(length, RB_ARCH_ALIGNMENT); |
1727 | 1811 | ||
1728 | return length; | 1812 | return length; |
1729 | } | 1813 | } |
@@ -1740,6 +1824,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1740 | * must fill the old tail_page with padding. | 1824 | * must fill the old tail_page with padding. |
1741 | */ | 1825 | */ |
1742 | if (tail >= BUF_PAGE_SIZE) { | 1826 | if (tail >= BUF_PAGE_SIZE) { |
1827 | /* | ||
1828 | * If the page was filled, then we still need | ||
1829 | * to update the real_end. Reset it to zero | ||
1830 | * and the reader will ignore it. | ||
1831 | */ | ||
1832 | if (tail == BUF_PAGE_SIZE) | ||
1833 | tail_page->real_end = 0; | ||
1834 | |||
1743 | local_sub(length, &tail_page->write); | 1835 | local_sub(length, &tail_page->write); |
1744 | return; | 1836 | return; |
1745 | } | 1837 | } |
@@ -1748,6 +1840,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1748 | kmemcheck_annotate_bitfield(event, bitfield); | 1840 | kmemcheck_annotate_bitfield(event, bitfield); |
1749 | 1841 | ||
1750 | /* | 1842 | /* |
1843 | * Save the original length to the meta data. | ||
1844 | * This will be used by the reader to add lost event | ||
1845 | * counter. | ||
1846 | */ | ||
1847 | tail_page->real_end = tail; | ||
1848 | |||
1849 | /* | ||
1751 | * If this event is bigger than the minimum size, then | 1850 | * If this event is bigger than the minimum size, then |
1752 | * we need to be careful that we don't subtract the | 1851 | * we need to be careful that we don't subtract the |
1753 | * write counter enough to allow another writer to slip | 1852 | * write counter enough to allow another writer to slip |
@@ -1780,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1780 | local_sub(length, &tail_page->write); | 1879 | local_sub(length, &tail_page->write); |
1781 | } | 1880 | } |
1782 | 1881 | ||
1783 | static struct ring_buffer_event * | 1882 | /* |
1883 | * This is the slow path, force gcc not to inline it. | ||
1884 | */ | ||
1885 | static noinline struct ring_buffer_event * | ||
1784 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1886 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
1785 | unsigned long length, unsigned long tail, | 1887 | unsigned long length, unsigned long tail, |
1786 | struct buffer_page *tail_page, u64 *ts) | 1888 | struct buffer_page *tail_page, u64 ts) |
1787 | { | 1889 | { |
1788 | struct buffer_page *commit_page = cpu_buffer->commit_page; | 1890 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
1789 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1891 | struct ring_buffer *buffer = cpu_buffer->buffer; |
@@ -1866,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1866 | * Nested commits always have zero deltas, so | 1968 | * Nested commits always have zero deltas, so |
1867 | * just reread the time stamp | 1969 | * just reread the time stamp |
1868 | */ | 1970 | */ |
1869 | *ts = rb_time_stamp(buffer); | 1971 | ts = rb_time_stamp(buffer); |
1870 | next_page->page->time_stamp = *ts; | 1972 | next_page->page->time_stamp = ts; |
1871 | } | 1973 | } |
1872 | 1974 | ||
1873 | out_again: | 1975 | out_again: |
@@ -1886,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1886 | 1988 | ||
1887 | static struct ring_buffer_event * | 1989 | static struct ring_buffer_event * |
1888 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1990 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
1889 | unsigned type, unsigned long length, u64 *ts) | 1991 | unsigned long length, u64 ts, |
1992 | u64 delta, int add_timestamp) | ||
1890 | { | 1993 | { |
1891 | struct buffer_page *tail_page; | 1994 | struct buffer_page *tail_page; |
1892 | struct ring_buffer_event *event; | 1995 | struct ring_buffer_event *event; |
1893 | unsigned long tail, write; | 1996 | unsigned long tail, write; |
1894 | 1997 | ||
1998 | /* | ||
1999 | * If the time delta since the last event is too big to | ||
2000 | * hold in the time field of the event, then we append a | ||
2001 | * TIME EXTEND event ahead of the data event. | ||
2002 | */ | ||
2003 | if (unlikely(add_timestamp)) | ||
2004 | length += RB_LEN_TIME_EXTEND; | ||
2005 | |||
1895 | tail_page = cpu_buffer->tail_page; | 2006 | tail_page = cpu_buffer->tail_page; |
1896 | write = local_add_return(length, &tail_page->write); | 2007 | write = local_add_return(length, &tail_page->write); |
1897 | 2008 | ||
@@ -1900,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1900 | tail = write - length; | 2011 | tail = write - length; |
1901 | 2012 | ||
1902 | /* See if we shot pass the end of this buffer page */ | 2013 | /* See if we shot pass the end of this buffer page */ |
1903 | if (write > BUF_PAGE_SIZE) | 2014 | if (unlikely(write > BUF_PAGE_SIZE)) |
1904 | return rb_move_tail(cpu_buffer, length, tail, | 2015 | return rb_move_tail(cpu_buffer, length, tail, |
1905 | tail_page, ts); | 2016 | tail_page, ts); |
1906 | 2017 | ||
@@ -1908,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1908 | 2019 | ||
1909 | event = __rb_page_index(tail_page, tail); | 2020 | event = __rb_page_index(tail_page, tail); |
1910 | kmemcheck_annotate_bitfield(event, bitfield); | 2021 | kmemcheck_annotate_bitfield(event, bitfield); |
1911 | rb_update_event(event, type, length); | 2022 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
1912 | 2023 | ||
1913 | /* The passed in type is zero for DATA */ | 2024 | local_inc(&tail_page->entries); |
1914 | if (likely(!type)) | ||
1915 | local_inc(&tail_page->entries); | ||
1916 | 2025 | ||
1917 | /* | 2026 | /* |
1918 | * If this is the first commit on the page, then update | 2027 | * If this is the first commit on the page, then update |
1919 | * its timestamp. | 2028 | * its timestamp. |
1920 | */ | 2029 | */ |
1921 | if (!tail) | 2030 | if (!tail) |
1922 | tail_page->page->time_stamp = *ts; | 2031 | tail_page->page->time_stamp = ts; |
1923 | 2032 | ||
1924 | return event; | 2033 | return event; |
1925 | } | 2034 | } |
@@ -1934,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1934 | unsigned long addr; | 2043 | unsigned long addr; |
1935 | 2044 | ||
1936 | new_index = rb_event_index(event); | 2045 | new_index = rb_event_index(event); |
1937 | old_index = new_index + rb_event_length(event); | 2046 | old_index = new_index + rb_event_ts_length(event); |
1938 | addr = (unsigned long)event; | 2047 | addr = (unsigned long)event; |
1939 | addr &= PAGE_MASK; | 2048 | addr &= PAGE_MASK; |
1940 | 2049 | ||
@@ -1960,80 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1960 | return 0; | 2069 | return 0; |
1961 | } | 2070 | } |
1962 | 2071 | ||
1963 | static int | ||
1964 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
1965 | u64 *ts, u64 *delta) | ||
1966 | { | ||
1967 | struct ring_buffer_event *event; | ||
1968 | static int once; | ||
1969 | int ret; | ||
1970 | |||
1971 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | ||
1972 | printk(KERN_WARNING "Delta way too big! %llu" | ||
1973 | " ts=%llu write stamp = %llu\n", | ||
1974 | (unsigned long long)*delta, | ||
1975 | (unsigned long long)*ts, | ||
1976 | (unsigned long long)cpu_buffer->write_stamp); | ||
1977 | WARN_ON(1); | ||
1978 | } | ||
1979 | |||
1980 | /* | ||
1981 | * The delta is too big, we to add a | ||
1982 | * new timestamp. | ||
1983 | */ | ||
1984 | event = __rb_reserve_next(cpu_buffer, | ||
1985 | RINGBUF_TYPE_TIME_EXTEND, | ||
1986 | RB_LEN_TIME_EXTEND, | ||
1987 | ts); | ||
1988 | if (!event) | ||
1989 | return -EBUSY; | ||
1990 | |||
1991 | if (PTR_ERR(event) == -EAGAIN) | ||
1992 | return -EAGAIN; | ||
1993 | |||
1994 | /* Only a commited time event can update the write stamp */ | ||
1995 | if (rb_event_is_commit(cpu_buffer, event)) { | ||
1996 | /* | ||
1997 | * If this is the first on the page, then it was | ||
1998 | * updated with the page itself. Try to discard it | ||
1999 | * and if we can't just make it zero. | ||
2000 | */ | ||
2001 | if (rb_event_index(event)) { | ||
2002 | event->time_delta = *delta & TS_MASK; | ||
2003 | event->array[0] = *delta >> TS_SHIFT; | ||
2004 | } else { | ||
2005 | /* try to discard, since we do not need this */ | ||
2006 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2007 | /* nope, just zero it */ | ||
2008 | event->time_delta = 0; | ||
2009 | event->array[0] = 0; | ||
2010 | } | ||
2011 | } | ||
2012 | cpu_buffer->write_stamp = *ts; | ||
2013 | /* let the caller know this was the commit */ | ||
2014 | ret = 1; | ||
2015 | } else { | ||
2016 | /* Try to discard the event */ | ||
2017 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2018 | /* Darn, this is just wasted space */ | ||
2019 | event->time_delta = 0; | ||
2020 | event->array[0] = 0; | ||
2021 | } | ||
2022 | ret = 0; | ||
2023 | } | ||
2024 | |||
2025 | *delta = 0; | ||
2026 | |||
2027 | return ret; | ||
2028 | } | ||
2029 | |||
2030 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2072 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2031 | { | 2073 | { |
2032 | local_inc(&cpu_buffer->committing); | 2074 | local_inc(&cpu_buffer->committing); |
2033 | local_inc(&cpu_buffer->commits); | 2075 | local_inc(&cpu_buffer->commits); |
2034 | } | 2076 | } |
2035 | 2077 | ||
2036 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2078 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2037 | { | 2079 | { |
2038 | unsigned long commits; | 2080 | unsigned long commits; |
2039 | 2081 | ||
@@ -2071,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2071 | unsigned long length) | 2113 | unsigned long length) |
2072 | { | 2114 | { |
2073 | struct ring_buffer_event *event; | 2115 | struct ring_buffer_event *event; |
2074 | u64 ts, delta = 0; | 2116 | u64 ts, delta; |
2075 | int commit = 0; | ||
2076 | int nr_loops = 0; | 2117 | int nr_loops = 0; |
2118 | int add_timestamp; | ||
2119 | u64 diff; | ||
2077 | 2120 | ||
2078 | rb_start_commit(cpu_buffer); | 2121 | rb_start_commit(cpu_buffer); |
2079 | 2122 | ||
@@ -2094,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2094 | 2137 | ||
2095 | length = rb_calculate_event_length(length); | 2138 | length = rb_calculate_event_length(length); |
2096 | again: | 2139 | again: |
2140 | add_timestamp = 0; | ||
2141 | delta = 0; | ||
2142 | |||
2097 | /* | 2143 | /* |
2098 | * We allow for interrupts to reenter here and do a trace. | 2144 | * We allow for interrupts to reenter here and do a trace. |
2099 | * If one does, it will cause this original code to loop | 2145 | * If one does, it will cause this original code to loop |
@@ -2107,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2107 | goto out_fail; | 2153 | goto out_fail; |
2108 | 2154 | ||
2109 | ts = rb_time_stamp(cpu_buffer->buffer); | 2155 | ts = rb_time_stamp(cpu_buffer->buffer); |
2156 | diff = ts - cpu_buffer->write_stamp; | ||
2110 | 2157 | ||
2111 | /* | 2158 | /* make sure this diff is calculated here */ |
2112 | * Only the first commit can update the timestamp. | 2159 | barrier(); |
2113 | * Yes there is a race here. If an interrupt comes in | ||
2114 | * just after the conditional and it traces too, then it | ||
2115 | * will also check the deltas. More than one timestamp may | ||
2116 | * also be made. But only the entry that did the actual | ||
2117 | * commit will be something other than zero. | ||
2118 | */ | ||
2119 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && | ||
2120 | rb_page_write(cpu_buffer->tail_page) == | ||
2121 | rb_commit_index(cpu_buffer))) { | ||
2122 | u64 diff; | ||
2123 | |||
2124 | diff = ts - cpu_buffer->write_stamp; | ||
2125 | |||
2126 | /* make sure this diff is calculated here */ | ||
2127 | barrier(); | ||
2128 | |||
2129 | /* Did the write stamp get updated already? */ | ||
2130 | if (unlikely(ts < cpu_buffer->write_stamp)) | ||
2131 | goto get_event; | ||
2132 | 2160 | ||
2161 | /* Did the write stamp get updated already? */ | ||
2162 | if (likely(ts >= cpu_buffer->write_stamp)) { | ||
2133 | delta = diff; | 2163 | delta = diff; |
2134 | if (unlikely(test_time_stamp(delta))) { | 2164 | if (unlikely(test_time_stamp(delta))) { |
2135 | 2165 | WARN_ONCE(delta > (1ULL << 59), | |
2136 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2166 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
2137 | if (commit == -EBUSY) | 2167 | (unsigned long long)delta, |
2138 | goto out_fail; | 2168 | (unsigned long long)ts, |
2139 | 2169 | (unsigned long long)cpu_buffer->write_stamp); | |
2140 | if (commit == -EAGAIN) | 2170 | add_timestamp = 1; |
2141 | goto again; | ||
2142 | |||
2143 | RB_WARN_ON(cpu_buffer, commit < 0); | ||
2144 | } | 2171 | } |
2145 | } | 2172 | } |
2146 | 2173 | ||
2147 | get_event: | 2174 | event = __rb_reserve_next(cpu_buffer, length, ts, |
2148 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); | 2175 | delta, add_timestamp); |
2149 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2176 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
2150 | goto again; | 2177 | goto again; |
2151 | 2178 | ||
2152 | if (!event) | 2179 | if (!event) |
2153 | goto out_fail; | 2180 | goto out_fail; |
2154 | 2181 | ||
2155 | if (!rb_event_is_commit(cpu_buffer, event)) | ||
2156 | delta = 0; | ||
2157 | |||
2158 | event->time_delta = delta; | ||
2159 | |||
2160 | return event; | 2182 | return event; |
2161 | 2183 | ||
2162 | out_fail: | 2184 | out_fail: |
@@ -2168,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2168 | 2190 | ||
2169 | #define TRACE_RECURSIVE_DEPTH 16 | 2191 | #define TRACE_RECURSIVE_DEPTH 16 |
2170 | 2192 | ||
2171 | static int trace_recursive_lock(void) | 2193 | /* Keep this code out of the fast path cache */ |
2194 | static noinline void trace_recursive_fail(void) | ||
2172 | { | 2195 | { |
2173 | current->trace_recursion++; | ||
2174 | |||
2175 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2176 | return 0; | ||
2177 | |||
2178 | /* Disable all tracing before we do anything else */ | 2196 | /* Disable all tracing before we do anything else */ |
2179 | tracing_off_permanent(); | 2197 | tracing_off_permanent(); |
2180 | 2198 | ||
@@ -2186,10 +2204,21 @@ static int trace_recursive_lock(void) | |||
2186 | in_nmi()); | 2204 | in_nmi()); |
2187 | 2205 | ||
2188 | WARN_ON_ONCE(1); | 2206 | WARN_ON_ONCE(1); |
2207 | } | ||
2208 | |||
2209 | static inline int trace_recursive_lock(void) | ||
2210 | { | ||
2211 | current->trace_recursion++; | ||
2212 | |||
2213 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2214 | return 0; | ||
2215 | |||
2216 | trace_recursive_fail(); | ||
2217 | |||
2189 | return -1; | 2218 | return -1; |
2190 | } | 2219 | } |
2191 | 2220 | ||
2192 | static void trace_recursive_unlock(void) | 2221 | static inline void trace_recursive_unlock(void) |
2193 | { | 2222 | { |
2194 | WARN_ON_ONCE(!current->trace_recursion); | 2223 | WARN_ON_ONCE(!current->trace_recursion); |
2195 | 2224 | ||
@@ -2203,8 +2232,6 @@ static void trace_recursive_unlock(void) | |||
2203 | 2232 | ||
2204 | #endif | 2233 | #endif |
2205 | 2234 | ||
2206 | static DEFINE_PER_CPU(int, rb_need_resched); | ||
2207 | |||
2208 | /** | 2235 | /** |
2209 | * ring_buffer_lock_reserve - reserve a part of the buffer | 2236 | * ring_buffer_lock_reserve - reserve a part of the buffer |
2210 | * @buffer: the ring buffer to reserve from | 2237 | * @buffer: the ring buffer to reserve from |
@@ -2225,16 +2252,16 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
2225 | { | 2252 | { |
2226 | struct ring_buffer_per_cpu *cpu_buffer; | 2253 | struct ring_buffer_per_cpu *cpu_buffer; |
2227 | struct ring_buffer_event *event; | 2254 | struct ring_buffer_event *event; |
2228 | int cpu, resched; | 2255 | int cpu; |
2229 | 2256 | ||
2230 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2257 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2231 | return NULL; | 2258 | return NULL; |
2232 | 2259 | ||
2233 | if (atomic_read(&buffer->record_disabled)) | ||
2234 | return NULL; | ||
2235 | |||
2236 | /* If we are tracing schedule, we don't want to recurse */ | 2260 | /* If we are tracing schedule, we don't want to recurse */ |
2237 | resched = ftrace_preempt_disable(); | 2261 | preempt_disable_notrace(); |
2262 | |||
2263 | if (atomic_read(&buffer->record_disabled)) | ||
2264 | goto out_nocheck; | ||
2238 | 2265 | ||
2239 | if (trace_recursive_lock()) | 2266 | if (trace_recursive_lock()) |
2240 | goto out_nocheck; | 2267 | goto out_nocheck; |
@@ -2256,21 +2283,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
2256 | if (!event) | 2283 | if (!event) |
2257 | goto out; | 2284 | goto out; |
2258 | 2285 | ||
2259 | /* | ||
2260 | * Need to store resched state on this cpu. | ||
2261 | * Only the first needs to. | ||
2262 | */ | ||
2263 | |||
2264 | if (preempt_count() == 1) | ||
2265 | per_cpu(rb_need_resched, cpu) = resched; | ||
2266 | |||
2267 | return event; | 2286 | return event; |
2268 | 2287 | ||
2269 | out: | 2288 | out: |
2270 | trace_recursive_unlock(); | 2289 | trace_recursive_unlock(); |
2271 | 2290 | ||
2272 | out_nocheck: | 2291 | out_nocheck: |
2273 | ftrace_preempt_enable(resched); | 2292 | preempt_enable_notrace(); |
2274 | return NULL; | 2293 | return NULL; |
2275 | } | 2294 | } |
2276 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | 2295 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); |
@@ -2279,12 +2298,28 @@ static void | |||
2279 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 2298 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
2280 | struct ring_buffer_event *event) | 2299 | struct ring_buffer_event *event) |
2281 | { | 2300 | { |
2301 | u64 delta; | ||
2302 | |||
2282 | /* | 2303 | /* |
2283 | * The event first in the commit queue updates the | 2304 | * The event first in the commit queue updates the |
2284 | * time stamp. | 2305 | * time stamp. |
2285 | */ | 2306 | */ |
2286 | if (rb_event_is_commit(cpu_buffer, event)) | 2307 | if (rb_event_is_commit(cpu_buffer, event)) { |
2287 | cpu_buffer->write_stamp += event->time_delta; | 2308 | /* |
2309 | * A commit event that is first on a page | ||
2310 | * updates the write timestamp with the page stamp | ||
2311 | */ | ||
2312 | if (!rb_event_index(event)) | ||
2313 | cpu_buffer->write_stamp = | ||
2314 | cpu_buffer->commit_page->page->time_stamp; | ||
2315 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
2316 | delta = event->array[0]; | ||
2317 | delta <<= TS_SHIFT; | ||
2318 | delta += event->time_delta; | ||
2319 | cpu_buffer->write_stamp += delta; | ||
2320 | } else | ||
2321 | cpu_buffer->write_stamp += event->time_delta; | ||
2322 | } | ||
2288 | } | 2323 | } |
2289 | 2324 | ||
2290 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2325 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -2316,13 +2351,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
2316 | 2351 | ||
2317 | trace_recursive_unlock(); | 2352 | trace_recursive_unlock(); |
2318 | 2353 | ||
2319 | /* | 2354 | preempt_enable_notrace(); |
2320 | * Only the last preempt count needs to restore preemption. | ||
2321 | */ | ||
2322 | if (preempt_count() == 1) | ||
2323 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
2324 | else | ||
2325 | preempt_enable_no_resched_notrace(); | ||
2326 | 2355 | ||
2327 | return 0; | 2356 | return 0; |
2328 | } | 2357 | } |
@@ -2330,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | |||
2330 | 2359 | ||
2331 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2360 | static inline void rb_event_discard(struct ring_buffer_event *event) |
2332 | { | 2361 | { |
2362 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
2363 | event = skip_time_extend(event); | ||
2364 | |||
2333 | /* array[0] holds the actual length for the discarded event */ | 2365 | /* array[0] holds the actual length for the discarded event */ |
2334 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2366 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
2335 | event->type_len = RINGBUF_TYPE_PADDING; | 2367 | event->type_len = RINGBUF_TYPE_PADDING; |
@@ -2430,13 +2462,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
2430 | 2462 | ||
2431 | trace_recursive_unlock(); | 2463 | trace_recursive_unlock(); |
2432 | 2464 | ||
2433 | /* | 2465 | preempt_enable_notrace(); |
2434 | * Only the last preempt count needs to restore preemption. | ||
2435 | */ | ||
2436 | if (preempt_count() == 1) | ||
2437 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
2438 | else | ||
2439 | preempt_enable_no_resched_notrace(); | ||
2440 | 2466 | ||
2441 | } | 2467 | } |
2442 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); | 2468 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); |
@@ -2462,15 +2488,15 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
2462 | struct ring_buffer_event *event; | 2488 | struct ring_buffer_event *event; |
2463 | void *body; | 2489 | void *body; |
2464 | int ret = -EBUSY; | 2490 | int ret = -EBUSY; |
2465 | int cpu, resched; | 2491 | int cpu; |
2466 | 2492 | ||
2467 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2493 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2468 | return -EBUSY; | 2494 | return -EBUSY; |
2469 | 2495 | ||
2470 | if (atomic_read(&buffer->record_disabled)) | 2496 | preempt_disable_notrace(); |
2471 | return -EBUSY; | ||
2472 | 2497 | ||
2473 | resched = ftrace_preempt_disable(); | 2498 | if (atomic_read(&buffer->record_disabled)) |
2499 | goto out; | ||
2474 | 2500 | ||
2475 | cpu = raw_smp_processor_id(); | 2501 | cpu = raw_smp_processor_id(); |
2476 | 2502 | ||
@@ -2497,7 +2523,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
2497 | 2523 | ||
2498 | ret = 0; | 2524 | ret = 0; |
2499 | out: | 2525 | out: |
2500 | ftrace_preempt_enable(resched); | 2526 | preempt_enable_notrace(); |
2501 | 2527 | ||
2502 | return ret; | 2528 | return ret; |
2503 | } | 2529 | } |
@@ -2539,7 +2565,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable); | |||
2539 | * @buffer: The ring buffer to enable writes | 2565 | * @buffer: The ring buffer to enable writes |
2540 | * | 2566 | * |
2541 | * Note, multiple disables will need the same number of enables | 2567 | * Note, multiple disables will need the same number of enables |
2542 | * to truely enable the writing (much like preempt_disable). | 2568 | * to truly enable the writing (much like preempt_disable). |
2543 | */ | 2569 | */ |
2544 | void ring_buffer_record_enable(struct ring_buffer *buffer) | 2570 | void ring_buffer_record_enable(struct ring_buffer *buffer) |
2545 | { | 2571 | { |
@@ -2575,7 +2601,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); | |||
2575 | * @cpu: The CPU to enable. | 2601 | * @cpu: The CPU to enable. |
2576 | * | 2602 | * |
2577 | * Note, multiple disables will need the same number of enables | 2603 | * Note, multiple disables will need the same number of enables |
2578 | * to truely enable the writing (much like preempt_disable). | 2604 | * to truly enable the writing (much like preempt_disable). |
2579 | */ | 2605 | */ |
2580 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | 2606 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) |
2581 | { | 2607 | { |
@@ -2589,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | |||
2589 | } | 2615 | } |
2590 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | 2616 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); |
2591 | 2617 | ||
2618 | /* | ||
2619 | * The total entries in the ring buffer is the running counter | ||
2620 | * of entries entered into the ring buffer, minus the sum of | ||
2621 | * the entries read from the ring buffer and the number of | ||
2622 | * entries that were overwritten. | ||
2623 | */ | ||
2624 | static inline unsigned long | ||
2625 | rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) | ||
2626 | { | ||
2627 | return local_read(&cpu_buffer->entries) - | ||
2628 | (local_read(&cpu_buffer->overrun) + cpu_buffer->read); | ||
2629 | } | ||
2630 | |||
2592 | /** | 2631 | /** |
2593 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 2632 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
2594 | * @buffer: The ring buffer | 2633 | * @buffer: The ring buffer |
@@ -2597,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | |||
2597 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 2636 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
2598 | { | 2637 | { |
2599 | struct ring_buffer_per_cpu *cpu_buffer; | 2638 | struct ring_buffer_per_cpu *cpu_buffer; |
2600 | unsigned long ret; | ||
2601 | 2639 | ||
2602 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2640 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2603 | return 0; | 2641 | return 0; |
2604 | 2642 | ||
2605 | cpu_buffer = buffer->buffers[cpu]; | 2643 | cpu_buffer = buffer->buffers[cpu]; |
2606 | ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) | ||
2607 | - cpu_buffer->read; | ||
2608 | 2644 | ||
2609 | return ret; | 2645 | return rb_num_of_entries(cpu_buffer); |
2610 | } | 2646 | } |
2611 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 2647 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
2612 | 2648 | ||
@@ -2667,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
2667 | /* if you care about this being correct, lock the buffer */ | 2703 | /* if you care about this being correct, lock the buffer */ |
2668 | for_each_buffer_cpu(buffer, cpu) { | 2704 | for_each_buffer_cpu(buffer, cpu) { |
2669 | cpu_buffer = buffer->buffers[cpu]; | 2705 | cpu_buffer = buffer->buffers[cpu]; |
2670 | entries += (local_read(&cpu_buffer->entries) - | 2706 | entries += rb_num_of_entries(cpu_buffer); |
2671 | local_read(&cpu_buffer->overrun)) - cpu_buffer->read; | ||
2672 | } | 2707 | } |
2673 | 2708 | ||
2674 | return entries; | 2709 | return entries; |
@@ -2716,6 +2751,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
2716 | iter->read_stamp = cpu_buffer->read_stamp; | 2751 | iter->read_stamp = cpu_buffer->read_stamp; |
2717 | else | 2752 | else |
2718 | iter->read_stamp = iter->head_page->page->time_stamp; | 2753 | iter->read_stamp = iter->head_page->page->time_stamp; |
2754 | iter->cache_reader_page = cpu_buffer->reader_page; | ||
2755 | iter->cache_read = cpu_buffer->read; | ||
2719 | } | 2756 | } |
2720 | 2757 | ||
2721 | /** | 2758 | /** |
@@ -2822,6 +2859,7 @@ static struct buffer_page * | |||
2822 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 2859 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
2823 | { | 2860 | { |
2824 | struct buffer_page *reader = NULL; | 2861 | struct buffer_page *reader = NULL; |
2862 | unsigned long overwrite; | ||
2825 | unsigned long flags; | 2863 | unsigned long flags; |
2826 | int nr_loops = 0; | 2864 | int nr_loops = 0; |
2827 | int ret; | 2865 | int ret; |
@@ -2863,6 +2901,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2863 | local_set(&cpu_buffer->reader_page->write, 0); | 2901 | local_set(&cpu_buffer->reader_page->write, 0); |
2864 | local_set(&cpu_buffer->reader_page->entries, 0); | 2902 | local_set(&cpu_buffer->reader_page->entries, 0); |
2865 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2903 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2904 | cpu_buffer->reader_page->real_end = 0; | ||
2866 | 2905 | ||
2867 | spin: | 2906 | spin: |
2868 | /* | 2907 | /* |
@@ -2883,6 +2922,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2883 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 2922 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); |
2884 | 2923 | ||
2885 | /* | 2924 | /* |
2925 | * We want to make sure we read the overruns after we set up our | ||
2926 | * pointers to the next object. The writer side does a | ||
2927 | * cmpxchg to cross pages which acts as the mb on the writer | ||
2928 | * side. Note, the reader will constantly fail the swap | ||
2929 | * while the writer is updating the pointers, so this | ||
2930 | * guarantees that the overwrite recorded here is the one we | ||
2931 | * want to compare with the last_overrun. | ||
2932 | */ | ||
2933 | smp_mb(); | ||
2934 | overwrite = local_read(&(cpu_buffer->overrun)); | ||
2935 | |||
2936 | /* | ||
2886 | * Here's the tricky part. | 2937 | * Here's the tricky part. |
2887 | * | 2938 | * |
2888 | * We need to move the pointer past the header page. | 2939 | * We need to move the pointer past the header page. |
@@ -2913,6 +2964,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2913 | cpu_buffer->reader_page = reader; | 2964 | cpu_buffer->reader_page = reader; |
2914 | rb_reset_reader_page(cpu_buffer); | 2965 | rb_reset_reader_page(cpu_buffer); |
2915 | 2966 | ||
2967 | if (overwrite != cpu_buffer->last_overrun) { | ||
2968 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | ||
2969 | cpu_buffer->last_overrun = overwrite; | ||
2970 | } | ||
2971 | |||
2916 | goto again; | 2972 | goto again; |
2917 | 2973 | ||
2918 | out: | 2974 | out: |
@@ -2947,13 +3003,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
2947 | 3003 | ||
2948 | static void rb_advance_iter(struct ring_buffer_iter *iter) | 3004 | static void rb_advance_iter(struct ring_buffer_iter *iter) |
2949 | { | 3005 | { |
2950 | struct ring_buffer *buffer; | ||
2951 | struct ring_buffer_per_cpu *cpu_buffer; | 3006 | struct ring_buffer_per_cpu *cpu_buffer; |
2952 | struct ring_buffer_event *event; | 3007 | struct ring_buffer_event *event; |
2953 | unsigned length; | 3008 | unsigned length; |
2954 | 3009 | ||
2955 | cpu_buffer = iter->cpu_buffer; | 3010 | cpu_buffer = iter->cpu_buffer; |
2956 | buffer = cpu_buffer->buffer; | ||
2957 | 3011 | ||
2958 | /* | 3012 | /* |
2959 | * Check if we are at the end of the buffer. | 3013 | * Check if we are at the end of the buffer. |
@@ -2989,8 +3043,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
2989 | rb_advance_iter(iter); | 3043 | rb_advance_iter(iter); |
2990 | } | 3044 | } |
2991 | 3045 | ||
3046 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | ||
3047 | { | ||
3048 | return cpu_buffer->lost_events; | ||
3049 | } | ||
3050 | |||
2992 | static struct ring_buffer_event * | 3051 | static struct ring_buffer_event * |
2993 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | 3052 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, |
3053 | unsigned long *lost_events) | ||
2994 | { | 3054 | { |
2995 | struct ring_buffer_event *event; | 3055 | struct ring_buffer_event *event; |
2996 | struct buffer_page *reader; | 3056 | struct buffer_page *reader; |
@@ -2998,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | |||
2998 | 3058 | ||
2999 | again: | 3059 | again: |
3000 | /* | 3060 | /* |
3001 | * We repeat when a timestamp is encountered. It is possible | 3061 | * We repeat when a time extend is encountered. |
3002 | * to get multiple timestamps from an interrupt entering just | 3062 | * Since the time extend is always attached to a data event, |
3003 | * as one timestamp is about to be written, or from discarded | 3063 | * we should never loop more than once. |
3004 | * commits. The most that we can have is the number on a single page. | 3064 | * (We never hit the following condition more than twice). |
3005 | */ | 3065 | */ |
3006 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3066 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3007 | return NULL; | 3067 | return NULL; |
3008 | 3068 | ||
3009 | reader = rb_get_reader_page(cpu_buffer); | 3069 | reader = rb_get_reader_page(cpu_buffer); |
@@ -3042,6 +3102,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | |||
3042 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 3102 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
3043 | cpu_buffer->cpu, ts); | 3103 | cpu_buffer->cpu, ts); |
3044 | } | 3104 | } |
3105 | if (lost_events) | ||
3106 | *lost_events = rb_lost_events(cpu_buffer); | ||
3045 | return event; | 3107 | return event; |
3046 | 3108 | ||
3047 | default: | 3109 | default: |
@@ -3060,27 +3122,39 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3060 | struct ring_buffer_event *event; | 3122 | struct ring_buffer_event *event; |
3061 | int nr_loops = 0; | 3123 | int nr_loops = 0; |
3062 | 3124 | ||
3063 | if (ring_buffer_iter_empty(iter)) | ||
3064 | return NULL; | ||
3065 | |||
3066 | cpu_buffer = iter->cpu_buffer; | 3125 | cpu_buffer = iter->cpu_buffer; |
3067 | buffer = cpu_buffer->buffer; | 3126 | buffer = cpu_buffer->buffer; |
3068 | 3127 | ||
3128 | /* | ||
3129 | * Check if someone performed a consuming read to | ||
3130 | * the buffer. A consuming read invalidates the iterator | ||
3131 | * and we need to reset the iterator in this case. | ||
3132 | */ | ||
3133 | if (unlikely(iter->cache_read != cpu_buffer->read || | ||
3134 | iter->cache_reader_page != cpu_buffer->reader_page)) | ||
3135 | rb_iter_reset(iter); | ||
3136 | |||
3069 | again: | 3137 | again: |
3138 | if (ring_buffer_iter_empty(iter)) | ||
3139 | return NULL; | ||
3140 | |||
3070 | /* | 3141 | /* |
3071 | * We repeat when a timestamp is encountered. | 3142 | * We repeat when a time extend is encountered. |
3072 | * We can get multiple timestamps by nested interrupts or also | 3143 | * Since the time extend is always attached to a data event, |
3073 | * if filtering is on (discarding commits). Since discarding | 3144 | * we should never loop more than once. |
3074 | * commits can be frequent we can get a lot of timestamps. | 3145 | * (We never hit the following condition more than twice). |
3075 | * But we limit them by not adding timestamps if they begin | ||
3076 | * at the start of a page. | ||
3077 | */ | 3146 | */ |
3078 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3147 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3079 | return NULL; | 3148 | return NULL; |
3080 | 3149 | ||
3081 | if (rb_per_cpu_empty(cpu_buffer)) | 3150 | if (rb_per_cpu_empty(cpu_buffer)) |
3082 | return NULL; | 3151 | return NULL; |
3083 | 3152 | ||
3153 | if (iter->head >= local_read(&iter->head_page->page->commit)) { | ||
3154 | rb_inc_iter(iter); | ||
3155 | goto again; | ||
3156 | } | ||
3157 | |||
3084 | event = rb_iter_head_event(iter); | 3158 | event = rb_iter_head_event(iter); |
3085 | 3159 | ||
3086 | switch (event->type_len) { | 3160 | switch (event->type_len) { |
@@ -3138,12 +3212,14 @@ static inline int rb_ok_to_lock(void) | |||
3138 | * @buffer: The ring buffer to read | 3212 | * @buffer: The ring buffer to read |
3139 | * @cpu: The cpu to peak at | 3213 | * @cpu: The cpu to peak at |
3140 | * @ts: The timestamp counter of this event. | 3214 | * @ts: The timestamp counter of this event. |
3215 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3141 | * | 3216 | * |
3142 | * This will return the event that will be read next, but does | 3217 | * This will return the event that will be read next, but does |
3143 | * not consume the data. | 3218 | * not consume the data. |
3144 | */ | 3219 | */ |
3145 | struct ring_buffer_event * | 3220 | struct ring_buffer_event * |
3146 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3221 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
3222 | unsigned long *lost_events) | ||
3147 | { | 3223 | { |
3148 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3224 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
3149 | struct ring_buffer_event *event; | 3225 | struct ring_buffer_event *event; |
@@ -3158,7 +3234,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3158 | local_irq_save(flags); | 3234 | local_irq_save(flags); |
3159 | if (dolock) | 3235 | if (dolock) |
3160 | spin_lock(&cpu_buffer->reader_lock); | 3236 | spin_lock(&cpu_buffer->reader_lock); |
3161 | event = rb_buffer_peek(cpu_buffer, ts); | 3237 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3162 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3238 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3163 | rb_advance_reader(cpu_buffer); | 3239 | rb_advance_reader(cpu_buffer); |
3164 | if (dolock) | 3240 | if (dolock) |
@@ -3200,13 +3276,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3200 | /** | 3276 | /** |
3201 | * ring_buffer_consume - return an event and consume it | 3277 | * ring_buffer_consume - return an event and consume it |
3202 | * @buffer: The ring buffer to get the next event from | 3278 | * @buffer: The ring buffer to get the next event from |
3279 | * @cpu: the cpu to read the buffer from | ||
3280 | * @ts: a variable to store the timestamp (may be NULL) | ||
3281 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3203 | * | 3282 | * |
3204 | * Returns the next event in the ring buffer, and that event is consumed. | 3283 | * Returns the next event in the ring buffer, and that event is consumed. |
3205 | * Meaning, that sequential reads will keep returning a different event, | 3284 | * Meaning, that sequential reads will keep returning a different event, |
3206 | * and eventually empty the ring buffer if the producer is slower. | 3285 | * and eventually empty the ring buffer if the producer is slower. |
3207 | */ | 3286 | */ |
3208 | struct ring_buffer_event * | 3287 | struct ring_buffer_event * |
3209 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 3288 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
3289 | unsigned long *lost_events) | ||
3210 | { | 3290 | { |
3211 | struct ring_buffer_per_cpu *cpu_buffer; | 3291 | struct ring_buffer_per_cpu *cpu_buffer; |
3212 | struct ring_buffer_event *event = NULL; | 3292 | struct ring_buffer_event *event = NULL; |
@@ -3227,9 +3307,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3227 | if (dolock) | 3307 | if (dolock) |
3228 | spin_lock(&cpu_buffer->reader_lock); | 3308 | spin_lock(&cpu_buffer->reader_lock); |
3229 | 3309 | ||
3230 | event = rb_buffer_peek(cpu_buffer, ts); | 3310 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3231 | if (event) | 3311 | if (event) { |
3312 | cpu_buffer->lost_events = 0; | ||
3232 | rb_advance_reader(cpu_buffer); | 3313 | rb_advance_reader(cpu_buffer); |
3314 | } | ||
3233 | 3315 | ||
3234 | if (dolock) | 3316 | if (dolock) |
3235 | spin_unlock(&cpu_buffer->reader_lock); | 3317 | spin_unlock(&cpu_buffer->reader_lock); |
@@ -3246,23 +3328,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3246 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 3328 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
3247 | 3329 | ||
3248 | /** | 3330 | /** |
3249 | * ring_buffer_read_start - start a non consuming read of the buffer | 3331 | * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer |
3250 | * @buffer: The ring buffer to read from | 3332 | * @buffer: The ring buffer to read from |
3251 | * @cpu: The cpu buffer to iterate over | 3333 | * @cpu: The cpu buffer to iterate over |
3252 | * | 3334 | * |
3253 | * This starts up an iteration through the buffer. It also disables | 3335 | * This performs the initial preparations necessary to iterate |
3254 | * the recording to the buffer until the reading is finished. | 3336 | * through the buffer. Memory is allocated, buffer recording |
3255 | * This prevents the reading from being corrupted. This is not | 3337 | * is disabled, and the iterator pointer is returned to the caller. |
3256 | * a consuming read, so a producer is not expected. | ||
3257 | * | 3338 | * |
3258 | * Must be paired with ring_buffer_finish. | 3339 | * Disabling buffer recordng prevents the reading from being |
3340 | * corrupted. This is not a consuming read, so a producer is not | ||
3341 | * expected. | ||
3342 | * | ||
3343 | * After a sequence of ring_buffer_read_prepare calls, the user is | ||
3344 | * expected to make at least one call to ring_buffer_prepare_sync. | ||
3345 | * Afterwards, ring_buffer_read_start is invoked to get things going | ||
3346 | * for real. | ||
3347 | * | ||
3348 | * This overall must be paired with ring_buffer_finish. | ||
3259 | */ | 3349 | */ |
3260 | struct ring_buffer_iter * | 3350 | struct ring_buffer_iter * |
3261 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | 3351 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) |
3262 | { | 3352 | { |
3263 | struct ring_buffer_per_cpu *cpu_buffer; | 3353 | struct ring_buffer_per_cpu *cpu_buffer; |
3264 | struct ring_buffer_iter *iter; | 3354 | struct ring_buffer_iter *iter; |
3265 | unsigned long flags; | ||
3266 | 3355 | ||
3267 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3356 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3268 | return NULL; | 3357 | return NULL; |
@@ -3276,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | |||
3276 | iter->cpu_buffer = cpu_buffer; | 3365 | iter->cpu_buffer = cpu_buffer; |
3277 | 3366 | ||
3278 | atomic_inc(&cpu_buffer->record_disabled); | 3367 | atomic_inc(&cpu_buffer->record_disabled); |
3368 | |||
3369 | return iter; | ||
3370 | } | ||
3371 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); | ||
3372 | |||
3373 | /** | ||
3374 | * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls | ||
3375 | * | ||
3376 | * All previously invoked ring_buffer_read_prepare calls to prepare | ||
3377 | * iterators will be synchronized. Afterwards, read_buffer_read_start | ||
3378 | * calls on those iterators are allowed. | ||
3379 | */ | ||
3380 | void | ||
3381 | ring_buffer_read_prepare_sync(void) | ||
3382 | { | ||
3279 | synchronize_sched(); | 3383 | synchronize_sched(); |
3384 | } | ||
3385 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | ||
3386 | |||
3387 | /** | ||
3388 | * ring_buffer_read_start - start a non consuming read of the buffer | ||
3389 | * @iter: The iterator returned by ring_buffer_read_prepare | ||
3390 | * | ||
3391 | * This finalizes the startup of an iteration through the buffer. | ||
3392 | * The iterator comes from a call to ring_buffer_read_prepare and | ||
3393 | * an intervening ring_buffer_read_prepare_sync must have been | ||
3394 | * performed. | ||
3395 | * | ||
3396 | * Must be paired with ring_buffer_finish. | ||
3397 | */ | ||
3398 | void | ||
3399 | ring_buffer_read_start(struct ring_buffer_iter *iter) | ||
3400 | { | ||
3401 | struct ring_buffer_per_cpu *cpu_buffer; | ||
3402 | unsigned long flags; | ||
3403 | |||
3404 | if (!iter) | ||
3405 | return; | ||
3406 | |||
3407 | cpu_buffer = iter->cpu_buffer; | ||
3280 | 3408 | ||
3281 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3409 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3282 | arch_spin_lock(&cpu_buffer->lock); | 3410 | arch_spin_lock(&cpu_buffer->lock); |
3283 | rb_iter_reset(iter); | 3411 | rb_iter_reset(iter); |
3284 | arch_spin_unlock(&cpu_buffer->lock); | 3412 | arch_spin_unlock(&cpu_buffer->lock); |
3285 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3413 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3286 | |||
3287 | return iter; | ||
3288 | } | 3414 | } |
3289 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 3415 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
3290 | 3416 | ||
@@ -3378,6 +3504,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
3378 | cpu_buffer->write_stamp = 0; | 3504 | cpu_buffer->write_stamp = 0; |
3379 | cpu_buffer->read_stamp = 0; | 3505 | cpu_buffer->read_stamp = 0; |
3380 | 3506 | ||
3507 | cpu_buffer->lost_events = 0; | ||
3508 | cpu_buffer->last_overrun = 0; | ||
3509 | |||
3381 | rb_head_page_activate(cpu_buffer); | 3510 | rb_head_page_activate(cpu_buffer); |
3382 | } | 3511 | } |
3383 | 3512 | ||
@@ -3653,6 +3782,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3653 | struct ring_buffer_event *event; | 3782 | struct ring_buffer_event *event; |
3654 | struct buffer_data_page *bpage; | 3783 | struct buffer_data_page *bpage; |
3655 | struct buffer_page *reader; | 3784 | struct buffer_page *reader; |
3785 | unsigned long missed_events; | ||
3656 | unsigned long flags; | 3786 | unsigned long flags; |
3657 | unsigned int commit; | 3787 | unsigned int commit; |
3658 | unsigned int read; | 3788 | unsigned int read; |
@@ -3689,6 +3819,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3689 | read = reader->read; | 3819 | read = reader->read; |
3690 | commit = rb_page_commit(reader); | 3820 | commit = rb_page_commit(reader); |
3691 | 3821 | ||
3822 | /* Check if any events were dropped */ | ||
3823 | missed_events = cpu_buffer->lost_events; | ||
3824 | |||
3692 | /* | 3825 | /* |
3693 | * If this page has been partially read or | 3826 | * If this page has been partially read or |
3694 | * if len is not big enough to read the rest of the page or | 3827 | * if len is not big enough to read the rest of the page or |
@@ -3709,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3709 | if (len > (commit - read)) | 3842 | if (len > (commit - read)) |
3710 | len = (commit - read); | 3843 | len = (commit - read); |
3711 | 3844 | ||
3712 | size = rb_event_length(event); | 3845 | /* Always keep the time extend and data together */ |
3846 | size = rb_event_ts_length(event); | ||
3713 | 3847 | ||
3714 | if (len < size) | 3848 | if (len < size) |
3715 | goto out_unlock; | 3849 | goto out_unlock; |
@@ -3719,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3719 | 3853 | ||
3720 | /* Need to copy one event at a time */ | 3854 | /* Need to copy one event at a time */ |
3721 | do { | 3855 | do { |
3856 | /* We need the size of one event, because | ||
3857 | * rb_advance_reader only advances by one event, | ||
3858 | * whereas rb_event_ts_length may include the size of | ||
3859 | * one or two events. | ||
3860 | * We have already ensured there's enough space if this | ||
3861 | * is a time extend. */ | ||
3862 | size = rb_event_length(event); | ||
3722 | memcpy(bpage->data + pos, rpage->data + rpos, size); | 3863 | memcpy(bpage->data + pos, rpage->data + rpos, size); |
3723 | 3864 | ||
3724 | len -= size; | 3865 | len -= size; |
@@ -3727,9 +3868,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3727 | rpos = reader->read; | 3868 | rpos = reader->read; |
3728 | pos += size; | 3869 | pos += size; |
3729 | 3870 | ||
3871 | if (rpos >= commit) | ||
3872 | break; | ||
3873 | |||
3730 | event = rb_reader_event(cpu_buffer); | 3874 | event = rb_reader_event(cpu_buffer); |
3731 | size = rb_event_length(event); | 3875 | /* Always keep the time extend and data together */ |
3732 | } while (len > size); | 3876 | size = rb_event_ts_length(event); |
3877 | } while (len >= size); | ||
3733 | 3878 | ||
3734 | /* update bpage */ | 3879 | /* update bpage */ |
3735 | local_set(&bpage->commit, pos); | 3880 | local_set(&bpage->commit, pos); |
@@ -3749,9 +3894,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3749 | local_set(&reader->entries, 0); | 3894 | local_set(&reader->entries, 0); |
3750 | reader->read = 0; | 3895 | reader->read = 0; |
3751 | *data_page = bpage; | 3896 | *data_page = bpage; |
3897 | |||
3898 | /* | ||
3899 | * Use the real_end for the data size, | ||
3900 | * This gives us a chance to store the lost events | ||
3901 | * on the page. | ||
3902 | */ | ||
3903 | if (reader->real_end) | ||
3904 | local_set(&bpage->commit, reader->real_end); | ||
3752 | } | 3905 | } |
3753 | ret = read; | 3906 | ret = read; |
3754 | 3907 | ||
3908 | cpu_buffer->lost_events = 0; | ||
3909 | |||
3910 | commit = local_read(&bpage->commit); | ||
3911 | /* | ||
3912 | * Set a flag in the commit field if we lost events | ||
3913 | */ | ||
3914 | if (missed_events) { | ||
3915 | /* If there is room at the end of the page to save the | ||
3916 | * missed events, then record it there. | ||
3917 | */ | ||
3918 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { | ||
3919 | memcpy(&bpage->data[commit], &missed_events, | ||
3920 | sizeof(missed_events)); | ||
3921 | local_add(RB_MISSED_STORED, &bpage->commit); | ||
3922 | commit += sizeof(missed_events); | ||
3923 | } | ||
3924 | local_add(RB_MISSED_EVENTS, &bpage->commit); | ||
3925 | } | ||
3926 | |||
3927 | /* | ||
3928 | * This page may be off to user land. Zero it out here. | ||
3929 | */ | ||
3930 | if (commit < BUF_PAGE_SIZE) | ||
3931 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); | ||
3932 | |||
3755 | out_unlock: | 3933 | out_unlock: |
3756 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3934 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3757 | 3935 | ||
@@ -3812,6 +3990,7 @@ static const struct file_operations rb_simple_fops = { | |||
3812 | .open = tracing_open_generic, | 3990 | .open = tracing_open_generic, |
3813 | .read = rb_simple_read, | 3991 | .read = rb_simple_read, |
3814 | .write = rb_simple_write, | 3992 | .write = rb_simple_write, |
3993 | .llseek = default_llseek, | ||
3815 | }; | 3994 | }; |
3816 | 3995 | ||
3817 | 3996 | ||