tracing: use timestamp to determine start of latency traces

Currently the latency tracers reset the ring buffer. Unfortunately if a commit is in process (due to a trace event), this can corrupt the ring buffer. When this happens, the ring buffer will detect the corruption and then permanently disable the ring buffer. The bug does not crash the system, but it does prevent further tracing after the bug is hit. Instead of reseting the trace buffers, the timestamp of the start of the trace is used instead. The buffers will still contain the previous data, but the output will not count any data that is before the timestamp of the trace. Note, this only affects the static trace output (trace) and not the runtime trace output (trace_pipe). The runtime trace output does not make sense for the latency tracers anyway. Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
author: Steven Rostedt <srostedt@redhat.com> 2009-09-01 11:06:29 -0400
committer: Steven Rostedt <rostedt@goodmis.org> 2009-09-04 18:44:22 -0400
commit: 2f26ebd549b9ab55ac756b836ec759c11fe93f81 (patch)
tree: 0f6fb154e2b5e9233a683b3267e5efe862a846f4 /kernel/trace/trace.c
parent: 76f0d07376388f32698ba51b6090a26b90c1342f (diff)
1 files changed, 63 insertions, 17 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 54517a889791..7daf372e319a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -454,10 +454,6 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
        tr->buffer = max_tr.buffer;
        max_tr.buffer = buf;
-        ftrace_disable_cpu();
-        ring_buffer_reset(tr->buffer);
-        ftrace_enable_cpu();
        __update_max_tr(tr, tsk, cpu);
        __raw_spin_unlock(&ftrace_max_lock);
 }
@@ -483,7 +479,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
        ftrace_disable_cpu();
-        ring_buffer_reset(max_tr.buffer);
        ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
        ftrace_enable_cpu();
@@ -1374,6 +1369,37 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
        return ent;
 }
+static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
+{
+        struct trace_array *tr = iter->tr;
+        struct ring_buffer_event *event;
+        struct ring_buffer_iter *buf_iter;
+        unsigned long entries = 0;
+        u64 ts;
+        tr->data[cpu]->skipped_entries = 0;
+        if (!iter->buffer_iter[cpu])
+                return;
+        buf_iter = iter->buffer_iter[cpu];
+        ring_buffer_iter_reset(buf_iter);
+        /*
+         * We could have the case with the max latency tracers
+         * that a reset never took place on a cpu. This is evident
+         * by the timestamp being before the start of the buffer.
+         */
+        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
+                if (ts >= iter->tr->time_start)
+                        break;
+                entries++;
+                ring_buffer_read(buf_iter, NULL);
+        }
+        tr->data[cpu]->skipped_entries = entries;
+}
 /*
 * No necessary locking here. The worst thing which can
 * happen is loosing events consumed at the same time
@@ -1412,10 +1438,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                if (cpu_file == TRACE_PIPE_ALL_CPU) {
                        for_each_tracing_cpu(cpu)
-                                ring_buffer_iter_reset(iter->buffer_iter[cpu]);
+                                tracing_iter_reset(iter, cpu);
                } else
-                        ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
+                        tracing_iter_reset(iter, cpu_file);
                ftrace_enable_cpu();
@@ -1464,16 +1489,32 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
        struct trace_array *tr = iter->tr;
        struct trace_array_cpu *data = tr->data[tr->cpu];
        struct tracer *type = current_trace;
-        unsigned long total;
+        unsigned long entries = 0;
-        unsigned long entries;
+        unsigned long total = 0;
+        unsigned long count;
        const char *name = "preemption";
+        int cpu;
        if (type)
                name = type->name;
-        entries = ring_buffer_entries(iter->tr->buffer);
-        total = entries +
+        for_each_tracing_cpu(cpu) {
-                ring_buffer_overruns(iter->tr->buffer);
+                count = ring_buffer_entries_cpu(tr->buffer, cpu);
+                /*
+                 * If this buffer has skipped entries, then we hold all
+                 * entries for the trace and we need to ignore the
+                 * ones before the time stamp.
+                 */
+                if (tr->data[cpu]->skipped_entries) {
+                        count -= tr->data[cpu]->skipped_entries;
+                        /* total is the same as the entries */
+                        total += count;
+                } else
+                        total += count +
+                                ring_buffer_overrun_cpu(tr->buffer, cpu);
+                entries += count;
+        }
        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
                   name, UTS_RELEASE);
@@ -1534,6 +1575,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
        if (cpumask_test_cpu(iter->cpu, iter->started))
                return;
+        if (iter->tr->data[iter->cpu]->skipped_entries)
+                return;
        cpumask_set_cpu(iter->cpu, iter->started);
        /* Don't print started cpu buffer for the first entry of the trace */
@@ -1796,19 +1840,23 @@ __tracing_open(struct inode *inode, struct file *file)
        if (ring_buffer_overruns(iter->tr->buffer))
                iter->iter_flags |= TRACE_FILE_ANNOTATE;
+        /* stop the trace while dumping */
+        tracing_stop();
        if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
                for_each_tracing_cpu(cpu) {
                        iter->buffer_iter[cpu] =
                                ring_buffer_read_start(iter->tr->buffer, cpu);
+                        tracing_iter_reset(iter, cpu);
                }
        } else {
                cpu = iter->cpu_file;
                iter->buffer_iter[cpu] =
                                ring_buffer_read_start(iter->tr->buffer, cpu);
+                tracing_iter_reset(iter, cpu);
        }
-        /* TODO stop tracer */
        ret = seq_open(file, &tracer_seq_ops);
        if (ret < 0) {
                fail_ret = ERR_PTR(ret);
@@ -1818,9 +1866,6 @@ __tracing_open(struct inode *inode, struct file *file)
        m = file->private_data;
        m->private = iter;
-        /* stop the trace while dumping */
-        tracing_stop();
        mutex_unlock(&trace_types_lock);
        return iter;
@@ -1831,6 +1876,7 @@ __tracing_open(struct inode *inode, struct file *file)
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
        }
        free_cpumask_var(iter->started);
+        tracing_start();
 fail:
        mutex_unlock(&trace_types_lock);
        kfree(iter->trace);
author	Steven Rostedt <srostedt@redhat.com>	2009-09-01 11:06:29 -0400
committer	Steven Rostedt <rostedt@goodmis.org>	2009-09-04 18:44:22 -0400
commit	2f26ebd549b9ab55ac756b836ec759c11fe93f81 (patch)
tree	0f6fb154e2b5e9233a683b3267e5efe862a846f4 /kernel/trace/trace.c
parent	76f0d07376388f32698ba51b6090a26b90c1342f (diff)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 54517a889791..7daf372e319a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c
@@ -454,10 +454,6 @@ update_max_tr(struct trace_array tr, struct task_struct tsk, int cpu)
454	tr->buffer = max_tr.buffer;	454	tr->buffer = max_tr.buffer;
455	max_tr.buffer = buf;	455	max_tr.buffer = buf;
456		456
457	ftrace_disable_cpu();
458	ring_buffer_reset(tr->buffer);
459	ftrace_enable_cpu();
460
461	__update_max_tr(tr, tsk, cpu);	457	__update_max_tr(tr, tsk, cpu);
462	__raw_spin_unlock(&ftrace_max_lock);	458	__raw_spin_unlock(&ftrace_max_lock);
463	}	459	}
@@ -483,7 +479,6 @@ update_max_tr_single(struct trace_array tr, struct task_struct tsk, int cpu)
483		479
484	ftrace_disable_cpu();	480	ftrace_disable_cpu();
485		481
486	ring_buffer_reset(max_tr.buffer);
487	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);	482	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
488		483
489	ftrace_enable_cpu();	484	ftrace_enable_cpu();
@@ -1374,6 +1369,37 @@ static void s_next(struct seq_file m, void v, loff_t pos)
1374	return ent;	1369	return ent;
1375	}	1370	}
1376		1371
		1372	static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
		1373	{
		1374	struct trace_array *tr = iter->tr;
		1375	struct ring_buffer_event *event;
		1376	struct ring_buffer_iter *buf_iter;
		1377	unsigned long entries = 0;
		1378	u64 ts;
		1379
		1380	tr->data[cpu]->skipped_entries = 0;
		1381
		1382	if (!iter->buffer_iter[cpu])
		1383	return;
		1384
		1385	buf_iter = iter->buffer_iter[cpu];
		1386	ring_buffer_iter_reset(buf_iter);
		1387
		1388	/*
		1389	* We could have the case with the max latency tracers
		1390	* that a reset never took place on a cpu. This is evident
		1391	* by the timestamp being before the start of the buffer.
		1392	*/
		1393	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
		1394	if (ts >= iter->tr->time_start)
		1395	break;
		1396	entries++;
		1397	ring_buffer_read(buf_iter, NULL);
		1398	}
		1399
		1400	tr->data[cpu]->skipped_entries = entries;
		1401	}
		1402
1377	/*	1403	/*
1378	* No necessary locking here. The worst thing which can	1404	* No necessary locking here. The worst thing which can
1379	* happen is loosing events consumed at the same time	1405	* happen is loosing events consumed at the same time
@@ -1412,10 +1438,9 @@ static void s_start(struct seq_file m, loff_t *pos)
1412		1438
1413	if (cpu_file == TRACE_PIPE_ALL_CPU) {	1439	if (cpu_file == TRACE_PIPE_ALL_CPU) {
1414	for_each_tracing_cpu(cpu)	1440	for_each_tracing_cpu(cpu)
1415	ring_buffer_iter_reset(iter->buffer_iter[cpu]);	1441	tracing_iter_reset(iter, cpu);
1416	} else	1442	} else
1417	ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);	1443	tracing_iter_reset(iter, cpu_file);
1418
1419		1444
1420	ftrace_enable_cpu();	1445	ftrace_enable_cpu();
1421		1446
@@ -1464,16 +1489,32 @@ print_trace_header(struct seq_file m, struct trace_iterator iter)
1464	struct trace_array *tr = iter->tr;	1489	struct trace_array *tr = iter->tr;
1465	struct trace_array_cpu *data = tr->data[tr->cpu];	1490	struct trace_array_cpu *data = tr->data[tr->cpu];
1466	struct tracer *type = current_trace;	1491	struct tracer *type = current_trace;
1467	unsigned long total;	1492	unsigned long entries = 0;
1468	unsigned long entries;	1493	unsigned long total = 0;
		1494	unsigned long count;
1469	const char *name = "preemption";	1495	const char *name = "preemption";
		1496	int cpu;
1470		1497
1471	if (type)	1498	if (type)
1472	name = type->name;	1499	name = type->name;
1473		1500
1474	entries = ring_buffer_entries(iter->tr->buffer);	1501
1475	total = entries +	1502	for_each_tracing_cpu(cpu) {
1476	ring_buffer_overruns(iter->tr->buffer);	1503	count = ring_buffer_entries_cpu(tr->buffer, cpu);
		1504	/*
		1505	* If this buffer has skipped entries, then we hold all
		1506	* entries for the trace and we need to ignore the
		1507	* ones before the time stamp.
		1508	*/
		1509	if (tr->data[cpu]->skipped_entries) {
		1510	count -= tr->data[cpu]->skipped_entries;
		1511	/* total is the same as the entries */
		1512	total += count;
		1513	} else
		1514	total += count +
		1515	ring_buffer_overrun_cpu(tr->buffer, cpu);
		1516	entries += count;
		1517	}
1477		1518
1478	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",	1519	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1479	name, UTS_RELEASE);	1520	name, UTS_RELEASE);
@@ -1534,6 +1575,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1534	if (cpumask_test_cpu(iter->cpu, iter->started))	1575	if (cpumask_test_cpu(iter->cpu, iter->started))
1535	return;	1576	return;
1536		1577
		1578	if (iter->tr->data[iter->cpu]->skipped_entries)
		1579	return;
		1580
1537	cpumask_set_cpu(iter->cpu, iter->started);	1581	cpumask_set_cpu(iter->cpu, iter->started);
1538		1582
1539	/* Don't print started cpu buffer for the first entry of the trace */	1583	/* Don't print started cpu buffer for the first entry of the trace */
@@ -1796,19 +1840,23 @@ __tracing_open(struct inode inode, struct file file)
1796	if (ring_buffer_overruns(iter->tr->buffer))	1840	if (ring_buffer_overruns(iter->tr->buffer))
1797	iter->iter_flags \|= TRACE_FILE_ANNOTATE;	1841	iter->iter_flags \|= TRACE_FILE_ANNOTATE;
1798		1842
		1843	/* stop the trace while dumping */
		1844	tracing_stop();
		1845
1799	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {	1846	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1800	for_each_tracing_cpu(cpu) {	1847	for_each_tracing_cpu(cpu) {
1801		1848
1802	iter->buffer_iter[cpu] =	1849	iter->buffer_iter[cpu] =
1803	ring_buffer_read_start(iter->tr->buffer, cpu);	1850	ring_buffer_read_start(iter->tr->buffer, cpu);
		1851	tracing_iter_reset(iter, cpu);
1804	}	1852	}
1805	} else {	1853	} else {
1806	cpu = iter->cpu_file;	1854	cpu = iter->cpu_file;
1807	iter->buffer_iter[cpu] =	1855	iter->buffer_iter[cpu] =
1808	ring_buffer_read_start(iter->tr->buffer, cpu);	1856	ring_buffer_read_start(iter->tr->buffer, cpu);
		1857	tracing_iter_reset(iter, cpu);
1809	}	1858	}
1810		1859
1811	/* TODO stop tracer */
1812	ret = seq_open(file, &tracer_seq_ops);	1860	ret = seq_open(file, &tracer_seq_ops);
1813	if (ret < 0) {	1861	if (ret < 0) {
1814	fail_ret = ERR_PTR(ret);	1862	fail_ret = ERR_PTR(ret);
@@ -1818,9 +1866,6 @@ __tracing_open(struct inode inode, struct file file)
1818	m = file->private_data;	1866	m = file->private_data;
1819	m->private = iter;	1867	m->private = iter;
1820		1868
1821	/* stop the trace while dumping */
1822	tracing_stop();
1823
1824	mutex_unlock(&trace_types_lock);	1869	mutex_unlock(&trace_types_lock);
1825		1870
1826	return iter;	1871	return iter;
@@ -1831,6 +1876,7 @@ __tracing_open(struct inode inode, struct file file)
1831	ring_buffer_read_finish(iter->buffer_iter[cpu]);	1876	ring_buffer_read_finish(iter->buffer_iter[cpu]);
1832	}	1877	}
1833	free_cpumask_var(iter->started);	1878	free_cpumask_var(iter->started);
		1879	tracing_start();
1834	fail:	1880	fail:
1835	mutex_unlock(&trace_types_lock);	1881	mutex_unlock(&trace_types_lock);
1836	kfree(iter->trace);	1882	kfree(iter->trace);