aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2008-10-31 09:58:35 -0400
committerIngo Molnar <mingo@elte.hu>2008-11-03 05:10:04 -0500
commit818e3dd30a4ff34fff6d90e87ae59c73f6a53691 (patch)
treeab9db9dec53a0c0383476c8dfca17d75c83317f6
parentb3aa557722b3d5858f14ca559e03461c24125aaf (diff)
tracing, ring-buffer: add paranoid checks for loops
While writing a new tracer, I had a bug where I caused the ring-buffer to recurse in a bad way. The bug was with the tracer I was writing and not the ring-buffer itself. But it took a long time to find the problem. This patch adds paranoid checks into the ring-buffer infrastructure that will catch bugs of this nature. Note: I put the bug back in the tracer and this patch showed the error nicely and prevented the lockup. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/trace/ring_buffer.c56
1 files changed, 56 insertions, 0 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cedf4e26828..3f338063864 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1022,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1022 struct ring_buffer_event *event; 1022 struct ring_buffer_event *event;
1023 u64 ts, delta; 1023 u64 ts, delta;
1024 int commit = 0; 1024 int commit = 0;
1025 int nr_loops = 0;
1025 1026
1026 again: 1027 again:
1028 /*
1029 * We allow for interrupts to reenter here and do a trace.
1030 * If one does, it will cause this original code to loop
1031 * back here. Even with heavy interrupts happening, this
1032 * should only happen a few times in a row. If this happens
1033 * 1000 times in a row, there must be either an interrupt
1034 * storm or we have something buggy.
1035 * Bail!
1036 */
1037 if (unlikely(++nr_loops > 1000)) {
1038 RB_WARN_ON(cpu_buffer, 1);
1039 return NULL;
1040 }
1041
1027 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1042 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1028 1043
1029 /* 1044 /*
@@ -1532,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1532{ 1547{
1533 struct buffer_page *reader = NULL; 1548 struct buffer_page *reader = NULL;
1534 unsigned long flags; 1549 unsigned long flags;
1550 int nr_loops = 0;
1535 1551
1536 spin_lock_irqsave(&cpu_buffer->lock, flags); 1552 spin_lock_irqsave(&cpu_buffer->lock, flags);
1537 1553
1538 again: 1554 again:
1555 /*
1556 * This should normally only loop twice. But because the
1557 * start of the reader inserts an empty page, it causes
1558 * a case where we will loop three times. There should be no
1559 * reason to loop four times (that I know of).
1560 */
1561 if (unlikely(++nr_loops > 3)) {
1562 RB_WARN_ON(cpu_buffer, 1);
1563 reader = NULL;
1564 goto out;
1565 }
1566
1539 reader = cpu_buffer->reader_page; 1567 reader = cpu_buffer->reader_page;
1540 1568
1541 /* If there's more to read, return this page */ 1569 /* If there's more to read, return this page */
@@ -1665,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1665 struct ring_buffer_per_cpu *cpu_buffer; 1693 struct ring_buffer_per_cpu *cpu_buffer;
1666 struct ring_buffer_event *event; 1694 struct ring_buffer_event *event;
1667 struct buffer_page *reader; 1695 struct buffer_page *reader;
1696 int nr_loops = 0;
1668 1697
1669 if (!cpu_isset(cpu, buffer->cpumask)) 1698 if (!cpu_isset(cpu, buffer->cpumask))
1670 return NULL; 1699 return NULL;
@@ -1672,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1672 cpu_buffer = buffer->buffers[cpu]; 1701 cpu_buffer = buffer->buffers[cpu];
1673 1702
1674 again: 1703 again:
1704 /*
1705 * We repeat when a timestamp is encountered. It is possible
1706 * to get multiple timestamps from an interrupt entering just
1707 * as one timestamp is about to be written. The max times
1708 * that this can happen is the number of nested interrupts we
1709 * can have. Nesting 10 deep of interrupts is clearly
1710 * an anomaly.
1711 */
1712 if (unlikely(++nr_loops > 10)) {
1713 RB_WARN_ON(cpu_buffer, 1);
1714 return NULL;
1715 }
1716
1675 reader = rb_get_reader_page(cpu_buffer); 1717 reader = rb_get_reader_page(cpu_buffer);
1676 if (!reader) 1718 if (!reader)
1677 return NULL; 1719 return NULL;
@@ -1722,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1722 struct ring_buffer *buffer; 1764 struct ring_buffer *buffer;
1723 struct ring_buffer_per_cpu *cpu_buffer; 1765 struct ring_buffer_per_cpu *cpu_buffer;
1724 struct ring_buffer_event *event; 1766 struct ring_buffer_event *event;
1767 int nr_loops = 0;
1725 1768
1726 if (ring_buffer_iter_empty(iter)) 1769 if (ring_buffer_iter_empty(iter))
1727 return NULL; 1770 return NULL;
@@ -1730,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1730 buffer = cpu_buffer->buffer; 1773 buffer = cpu_buffer->buffer;
1731 1774
1732 again: 1775 again:
1776 /*
1777 * We repeat when a timestamp is encountered. It is possible
1778 * to get multiple timestamps from an interrupt entering just
1779 * as one timestamp is about to be written. The max times
1780 * that this can happen is the number of nested interrupts we
1781 * can have. Nesting 10 deep of interrupts is clearly
1782 * an anomaly.
1783 */
1784 if (unlikely(++nr_loops > 10)) {
1785 RB_WARN_ON(cpu_buffer, 1);
1786 return NULL;
1787 }
1788
1733 if (rb_per_cpu_empty(cpu_buffer)) 1789 if (rb_per_cpu_empty(cpu_buffer))
1734 return NULL; 1790 return NULL;
1735 1791