Merge branch 'linus' into x86/memory-corruption-check

author: Ingo Molnar <mingo@elte.hu> 2008-11-20 03:03:38 -0500
committer: Ingo Molnar <mingo@elte.hu> 2008-11-20 03:03:38 -0500
commit: 90accd6fabf9b2fa2705945a4c601877a75d43bf (patch)
tree: d393cb54f0228b1313139e4e14adf4f5cf236b59 /kernel/trace/ring_buffer.c
parent: b43d196c4d3fe46d6dda7c987c47792612b80b1b (diff)
parent: ee2f6cc7f9ea2542ad46070ed62ba7aa04d08871 (diff)
1 files changed, 175 insertions, 4 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 94af1fe56bb4..036456cbb4f7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
 #include <linux/list.h>
 #include <linux/fs.h>
+#include "trace.h"
+/* Global flag to disable all recording to ring buffers */
+static int ring_buffers_off __read_mostly;
+/**
+ * tracing_on - enable all tracing buffers
+ *
+ * This function enables all tracing buffers that may have been
+ * disabled with tracing_off.
+ */
+void tracing_on(void)
+{
+        ring_buffers_off = 0;
+}
+/**
+ * tracing_off - turn off all tracing buffers
+ *
+ * This function stops all tracing buffers from recording data.
+ * It does not disable any overhead the tracers themselves may
+ * be causing. This function simply causes all recording to
+ * the ring buffers to fail.
+ */
+void tracing_off(void)
+{
+        ring_buffers_off = 1;
+}
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 /* FIXME!!! */
 u64 ring_buffer_time_stamp(int cpu)
 {
+        u64 time;
+        preempt_disable_notrace();
        /* shift to debug/test normalization and TIME_EXTENTS */
-        return sched_clock() << DEBUG_SHIFT;
+        time = sched_clock() << DEBUG_SHIFT;
+        preempt_enable_notrace();
+        return time;
 }
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -130,7 +165,7 @@ struct buffer_page {
 static inline void free_buffer_page(struct buffer_page *bpage)
 {
        if (bpage->page)
-                __free_page(bpage->page);
+                free_page((unsigned long)bpage->page);
        kfree(bpage);
 }
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        LIST_HEAD(pages);
        int i, cpu;
+        /*
+         * Always succeed at resizing a non-existent buffer:
+         */
+        if (!buffer)
+                return size;
        size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        size *= BUF_PAGE_SIZE;
        buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -966,7 +1007,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
        if (unlikely(*delta > (1ULL << 59) && !once++)) {
                printk(KERN_WARNING "Delta way too big! %llu"
                       " ts=%llu write stamp = %llu\n",
-                       *delta, *ts, cpu_buffer->write_stamp);
+                       (unsigned long long)*delta,
+                       (unsigned long long)*ts,
+                       (unsigned long long)cpu_buffer->write_stamp);
                WARN_ON(1);
        }
@@ -1020,8 +1063,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
        struct ring_buffer_event *event;
        u64 ts, delta;
        int commit = 0;
+        int nr_loops = 0;
 again:
+        /*
+         * We allow for interrupts to reenter here and do a trace.
+         * If one does, it will cause this original code to loop
+         * back here. Even with heavy interrupts happening, this
+         * should only happen a few times in a row. If this happens
+         * 1000 times in a row, there must be either an interrupt
+         * storm or we have something buggy.
+         * Bail!
+         */
+        if (unlikely(++nr_loops > 1000)) {
+                RB_WARN_ON(cpu_buffer, 1);
+                return NULL;
+        }
        ts = ring_buffer_time_stamp(cpu_buffer->cpu);
        /*
@@ -1043,7 +1101,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
                /* Did the write stamp get updated already? */
                if (unlikely(ts < cpu_buffer->write_stamp))
-                        goto again;
+                        delta = 0;
                if (test_time_stamp(delta)) {
@@ -1116,6 +1174,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
        struct ring_buffer_event *event;
        int cpu, resched;
+        if (ring_buffers_off)
+                return NULL;
        if (atomic_read(&buffer->record_disabled))
                return NULL;
@@ -1232,6 +1293,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
        int ret = -EBUSY;
        int cpu, resched;
+        if (ring_buffers_off)
+                return -EBUSY;
        if (atomic_read(&buffer->record_disabled))
                return -EBUSY;
@@ -1530,10 +1594,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
        struct buffer_page *reader = NULL;
        unsigned long flags;
+        int nr_loops = 0;
        spin_lock_irqsave(&cpu_buffer->lock, flags);
 again:
+        /*
+         * This should normally only loop twice. But because the
+         * start of the reader inserts an empty page, it causes
+         * a case where we will loop three times. There should be no
+         * reason to loop four times (that I know of).
+         */
+        if (unlikely(++nr_loops > 3)) {
+                RB_WARN_ON(cpu_buffer, 1);
+                reader = NULL;
+                goto out;
+        }
        reader = cpu_buffer->reader_page;
        /* If there's more to read, return this page */
@@ -1663,6 +1740,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
        struct buffer_page *reader;
+        int nr_loops = 0;
        if (!cpu_isset(cpu, buffer->cpumask))
                return NULL;
@@ -1670,6 +1748,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        cpu_buffer = buffer->buffers[cpu];
 again:
+        /*
+         * We repeat when a timestamp is encountered. It is possible
+         * to get multiple timestamps from an interrupt entering just
+         * as one timestamp is about to be written. The max times
+         * that this can happen is the number of nested interrupts we
+         * can have.  Nesting 10 deep of interrupts is clearly
+         * an anomaly.
+         */
+        if (unlikely(++nr_loops > 10)) {
+                RB_WARN_ON(cpu_buffer, 1);
+                return NULL;
+        }
        reader = rb_get_reader_page(cpu_buffer);
        if (!reader)
                return NULL;
@@ -1720,6 +1811,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        struct ring_buffer *buffer;
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
+        int nr_loops = 0;
        if (ring_buffer_iter_empty(iter))
                return NULL;
@@ -1728,6 +1820,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        buffer = cpu_buffer->buffer;
 again:
+        /*
+         * We repeat when a timestamp is encountered. It is possible
+         * to get multiple timestamps from an interrupt entering just
+         * as one timestamp is about to be written. The max times
+         * that this can happen is the number of nested interrupts we
+         * can have. Nesting 10 deep of interrupts is clearly
+         * an anomaly.
+         */
+        if (unlikely(++nr_loops > 10)) {
+                RB_WARN_ON(cpu_buffer, 1);
+                return NULL;
+        }
        if (rb_per_cpu_empty(cpu_buffer))
                return NULL;
@@ -2012,3 +2117,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
        return 0;
 }
+static ssize_t
+rb_simple_read(struct file *filp, char __user *ubuf,
+               size_t cnt, loff_t *ppos)
+{
+        int *p = filp->private_data;
+        char buf[64];
+        int r;
+        /* !ring_buffers_off == tracing_on */
+        r = sprintf(buf, "%d\n", !*p);
+        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+static ssize_t
+rb_simple_write(struct file *filp, const char __user *ubuf,
+                size_t cnt, loff_t *ppos)
+{
+        int *p = filp->private_data;
+        char buf[64];
+        long val;
+        int ret;
+        if (cnt >= sizeof(buf))
+                return -EINVAL;
+        if (copy_from_user(&buf, ubuf, cnt))
+                return -EFAULT;
+        buf[cnt] = 0;
+        ret = strict_strtoul(buf, 10, &val);
+        if (ret < 0)
+                return ret;
+        /* !ring_buffers_off == tracing_on */
+        *p = !val;
+        (*ppos)++;
+        return cnt;
+}
+static struct file_operations rb_simple_fops = {
+        .open           = tracing_open_generic,
+        .read           = rb_simple_read,
+        .write          = rb_simple_write,
+};
+static __init int rb_init_debugfs(void)
+{
+        struct dentry *d_tracer;
+        struct dentry *entry;
+        d_tracer = tracing_init_dentry();
+        entry = debugfs_create_file("tracing_on", 0644, d_tracer,
+                                    &ring_buffers_off, &rb_simple_fops);
+        if (!entry)
+                pr_warning("Could not create debugfs 'tracing_on' entry\n");
+        return 0;
+}
+fs_initcall(rb_init_debugfs);
author	Ingo Molnar <mingo@elte.hu>	2008-11-20 03:03:38 -0500
committer	Ingo Molnar <mingo@elte.hu>	2008-11-20 03:03:38 -0500
commit	90accd6fabf9b2fa2705945a4c601877a75d43bf (patch)
tree	d393cb54f0228b1313139e4e14adf4f5cf236b59 /kernel/trace/ring_buffer.c
parent	b43d196c4d3fe46d6dda7c987c47792612b80b1b (diff)
parent	ee2f6cc7f9ea2542ad46070ed62ba7aa04d08871 (diff)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 94af1fe56bb4..036456cbb4f7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
16	#include <linux/list.h>	16	#include <linux/list.h>
17	#include <linux/fs.h>	17	#include <linux/fs.h>
18		18
		19	#include "trace.h"
		20
		21	/* Global flag to disable all recording to ring buffers */
		22	static int ring_buffers_off __read_mostly;
		23
		24	/**
		25	* tracing_on - enable all tracing buffers
		26	*
		27	* This function enables all tracing buffers that may have been
		28	* disabled with tracing_off.
		29	*/
		30	void tracing_on(void)
		31	{
		32	ring_buffers_off = 0;
		33	}
		34
		35	/**
		36	* tracing_off - turn off all tracing buffers
		37	*
		38	* This function stops all tracing buffers from recording data.
		39	* It does not disable any overhead the tracers themselves may
		40	* be causing. This function simply causes all recording to
		41	* the ring buffers to fail.
		42	*/
		43	void tracing_off(void)
		44	{
		45	ring_buffers_off = 1;
		46	}
		47
19	/* Up this if you want to test the TIME_EXTENTS and normalization */	48	/* Up this if you want to test the TIME_EXTENTS and normalization */
20	#define DEBUG_SHIFT 0	49	#define DEBUG_SHIFT 0
21		50
22	/* FIXME!!! */	51	/* FIXME!!! */
23	u64 ring_buffer_time_stamp(int cpu)	52	u64 ring_buffer_time_stamp(int cpu)
24	{	53	{
		54	u64 time;
		55
		56	preempt_disable_notrace();
25	/* shift to debug/test normalization and TIME_EXTENTS */	57	/* shift to debug/test normalization and TIME_EXTENTS */
26	return sched_clock() << DEBUG_SHIFT;	58	time = sched_clock() << DEBUG_SHIFT;
		59	preempt_enable_notrace();
		60
		61	return time;
27	}	62	}
28		63
29	void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)	64	void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -130,7 +165,7 @@ struct buffer_page {
130	static inline void free_buffer_page(struct buffer_page *bpage)	165	static inline void free_buffer_page(struct buffer_page *bpage)
131	{	166	{
132	if (bpage->page)	167	if (bpage->page)
133	__free_page(bpage->page);	168	free_page((unsigned long)bpage->page);
134	kfree(bpage);	169	kfree(bpage);
135	}	170	}
136		171
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
503	LIST_HEAD(pages);	538	LIST_HEAD(pages);
504	int i, cpu;	539	int i, cpu;
505		540
		541	/*
		542	* Always succeed at resizing a non-existent buffer:
		543	*/
		544	if (!buffer)
		545	return size;
		546
506	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);	547	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507	size *= BUF_PAGE_SIZE;	548	size *= BUF_PAGE_SIZE;
508	buffer_size = buffer->pages * BUF_PAGE_SIZE;	549	buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -966,7 +1007,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
966	if (unlikely(*delta > (1ULL << 59) && !once++)) {	1007	if (unlikely(*delta > (1ULL << 59) && !once++)) {
967	printk(KERN_WARNING "Delta way too big! %llu"	1008	printk(KERN_WARNING "Delta way too big! %llu"
968	" ts=%llu write stamp = %llu\n",	1009	" ts=%llu write stamp = %llu\n",
969	delta, ts, cpu_buffer->write_stamp);	1010	(unsigned long long)*delta,
		1011	(unsigned long long)*ts,
		1012	(unsigned long long)cpu_buffer->write_stamp);
970	WARN_ON(1);	1013	WARN_ON(1);
971	}	1014	}
972		1015
@@ -1020,8 +1063,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1020	struct ring_buffer_event *event;	1063	struct ring_buffer_event *event;
1021	u64 ts, delta;	1064	u64 ts, delta;
1022	int commit = 0;	1065	int commit = 0;
		1066	int nr_loops = 0;
1023		1067
1024	again:	1068	again:
		1069	/*
		1070	* We allow for interrupts to reenter here and do a trace.
		1071	* If one does, it will cause this original code to loop
		1072	* back here. Even with heavy interrupts happening, this
		1073	* should only happen a few times in a row. If this happens
		1074	* 1000 times in a row, there must be either an interrupt
		1075	* storm or we have something buggy.
		1076	* Bail!
		1077	*/
		1078	if (unlikely(++nr_loops > 1000)) {
		1079	RB_WARN_ON(cpu_buffer, 1);
		1080	return NULL;
		1081	}
		1082
1025	ts = ring_buffer_time_stamp(cpu_buffer->cpu);	1083	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1026		1084
1027	/*	1085	/*
@@ -1043,7 +1101,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1043		1101
1044	/* Did the write stamp get updated already? */	1102	/* Did the write stamp get updated already? */
1045	if (unlikely(ts < cpu_buffer->write_stamp))	1103	if (unlikely(ts < cpu_buffer->write_stamp))
1046	goto again;	1104	delta = 0;
1047		1105
1048	if (test_time_stamp(delta)) {	1106	if (test_time_stamp(delta)) {
1049		1107
@@ -1116,6 +1174,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1116	struct ring_buffer_event *event;	1174	struct ring_buffer_event *event;
1117	int cpu, resched;	1175	int cpu, resched;
1118		1176
		1177	if (ring_buffers_off)
		1178	return NULL;
		1179
1119	if (atomic_read(&buffer->record_disabled))	1180	if (atomic_read(&buffer->record_disabled))
1120	return NULL;	1181	return NULL;
1121		1182
@@ -1232,6 +1293,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
1232	int ret = -EBUSY;	1293	int ret = -EBUSY;
1233	int cpu, resched;	1294	int cpu, resched;
1234		1295
		1296	if (ring_buffers_off)
		1297	return -EBUSY;
		1298
1235	if (atomic_read(&buffer->record_disabled))	1299	if (atomic_read(&buffer->record_disabled))
1236	return -EBUSY;	1300	return -EBUSY;
1237		1301
@@ -1530,10 +1594,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1530	{	1594	{
1531	struct buffer_page *reader = NULL;	1595	struct buffer_page *reader = NULL;
1532	unsigned long flags;	1596	unsigned long flags;
		1597	int nr_loops = 0;
1533		1598
1534	spin_lock_irqsave(&cpu_buffer->lock, flags);	1599	spin_lock_irqsave(&cpu_buffer->lock, flags);
1535		1600
1536	again:	1601	again:
		1602	/*
		1603	* This should normally only loop twice. But because the
		1604	* start of the reader inserts an empty page, it causes
		1605	* a case where we will loop three times. There should be no
		1606	* reason to loop four times (that I know of).
		1607	*/
		1608	if (unlikely(++nr_loops > 3)) {
		1609	RB_WARN_ON(cpu_buffer, 1);
		1610	reader = NULL;
		1611	goto out;
		1612	}
		1613
1537	reader = cpu_buffer->reader_page;	1614	reader = cpu_buffer->reader_page;
1538		1615
1539	/* If there's more to read, return this page */	1616	/* If there's more to read, return this page */
@@ -1663,6 +1740,7 @@ ring_buffer_peek(struct ring_buffer buffer, int cpu, u64 ts)
1663	struct ring_buffer_per_cpu *cpu_buffer;	1740	struct ring_buffer_per_cpu *cpu_buffer;
1664	struct ring_buffer_event *event;	1741	struct ring_buffer_event *event;
1665	struct buffer_page *reader;	1742	struct buffer_page *reader;
		1743	int nr_loops = 0;
1666		1744
1667	if (!cpu_isset(cpu, buffer->cpumask))	1745	if (!cpu_isset(cpu, buffer->cpumask))
1668	return NULL;	1746	return NULL;
@@ -1670,6 +1748,19 @@ ring_buffer_peek(struct ring_buffer buffer, int cpu, u64 ts)
1670	cpu_buffer = buffer->buffers[cpu];	1748	cpu_buffer = buffer->buffers[cpu];
1671		1749
1672	again:	1750	again:
		1751	/*
		1752	* We repeat when a timestamp is encountered. It is possible
		1753	* to get multiple timestamps from an interrupt entering just
		1754	* as one timestamp is about to be written. The max times
		1755	* that this can happen is the number of nested interrupts we
		1756	* can have. Nesting 10 deep of interrupts is clearly
		1757	* an anomaly.
		1758	*/
		1759	if (unlikely(++nr_loops > 10)) {
		1760	RB_WARN_ON(cpu_buffer, 1);
		1761	return NULL;
		1762	}
		1763
1673	reader = rb_get_reader_page(cpu_buffer);	1764	reader = rb_get_reader_page(cpu_buffer);
1674	if (!reader)	1765	if (!reader)
1675	return NULL;	1766	return NULL;
@@ -1720,6 +1811,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter iter, u64 ts)
1720	struct ring_buffer *buffer;	1811	struct ring_buffer *buffer;
1721	struct ring_buffer_per_cpu *cpu_buffer;	1812	struct ring_buffer_per_cpu *cpu_buffer;
1722	struct ring_buffer_event *event;	1813	struct ring_buffer_event *event;
		1814	int nr_loops = 0;
1723		1815
1724	if (ring_buffer_iter_empty(iter))	1816	if (ring_buffer_iter_empty(iter))
1725	return NULL;	1817	return NULL;
@@ -1728,6 +1820,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter iter, u64 ts)
1728	buffer = cpu_buffer->buffer;	1820	buffer = cpu_buffer->buffer;
1729		1821
1730	again:	1822	again:
		1823	/*
		1824	* We repeat when a timestamp is encountered. It is possible
		1825	* to get multiple timestamps from an interrupt entering just
		1826	* as one timestamp is about to be written. The max times
		1827	* that this can happen is the number of nested interrupts we
		1828	* can have. Nesting 10 deep of interrupts is clearly
		1829	* an anomaly.
		1830	*/
		1831	if (unlikely(++nr_loops > 10)) {
		1832	RB_WARN_ON(cpu_buffer, 1);
		1833	return NULL;
		1834	}
		1835
1731	if (rb_per_cpu_empty(cpu_buffer))	1836	if (rb_per_cpu_empty(cpu_buffer))
1732	return NULL;	1837	return NULL;
1733		1838
@@ -2012,3 +2117,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2012	return 0;	2117	return 0;
2013	}	2118	}
2014		2119
		2120	static ssize_t
		2121	rb_simple_read(struct file filp, char __user ubuf,
		2122	size_t cnt, loff_t *ppos)
		2123	{
		2124	int *p = filp->private_data;
		2125	char buf[64];
		2126	int r;
		2127
		2128	/* !ring_buffers_off == tracing_on */
		2129	r = sprintf(buf, "%d\n", !*p);
		2130
		2131	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
		2132	}
		2133
		2134	static ssize_t
		2135	rb_simple_write(struct file filp, const char __user ubuf,
		2136	size_t cnt, loff_t *ppos)
		2137	{
		2138	int *p = filp->private_data;
		2139	char buf[64];
		2140	long val;
		2141	int ret;
		2142
		2143	if (cnt >= sizeof(buf))
		2144	return -EINVAL;
		2145
		2146	if (copy_from_user(&buf, ubuf, cnt))
		2147	return -EFAULT;
		2148
		2149	buf[cnt] = 0;
		2150
		2151	ret = strict_strtoul(buf, 10, &val);
		2152	if (ret < 0)
		2153	return ret;
		2154
		2155	/* !ring_buffers_off == tracing_on */
		2156	*p = !val;
		2157
		2158	(*ppos)++;
		2159
		2160	return cnt;
		2161	}
		2162
		2163	static struct file_operations rb_simple_fops = {
		2164	.open = tracing_open_generic,
		2165	.read = rb_simple_read,
		2166	.write = rb_simple_write,
		2167	};
		2168
		2169
		2170	static __init int rb_init_debugfs(void)
		2171	{
		2172	struct dentry *d_tracer;
		2173	struct dentry *entry;
		2174
		2175	d_tracer = tracing_init_dentry();
		2176
		2177	entry = debugfs_create_file("tracing_on", 0644, d_tracer,
		2178	&ring_buffers_off, &rb_simple_fops);
		2179	if (!entry)
		2180	pr_warning("Could not create debugfs 'tracing_on' entry\n");
		2181
		2182	return 0;
		2183	}
		2184
		2185	fs_initcall(rb_init_debugfs);