diff options
author | Steven Rostedt <srostedt@redhat.com> | 2009-09-01 11:06:29 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2009-09-04 18:44:22 -0400 |
commit | 2f26ebd549b9ab55ac756b836ec759c11fe93f81 (patch) | |
tree | 0f6fb154e2b5e9233a683b3267e5efe862a846f4 /kernel/trace | |
parent | 76f0d07376388f32698ba51b6090a26b90c1342f (diff) |
tracing: use timestamp to determine start of latency traces
Currently the latency tracers reset the ring buffer. Unfortunately
if a commit is in process (due to a trace event), this can corrupt
the ring buffer. When this happens, the ring buffer will detect
the corruption and then permanently disable the ring buffer.
The bug does not crash the system, but it does prevent further tracing
after the bug is hit.
Instead of reseting the trace buffers, the timestamp of the start of
the trace is used instead. The buffers will still contain the previous
data, but the output will not count any data that is before the
timestamp of the trace.
Note, this only affects the static trace output (trace) and not the
runtime trace output (trace_pipe). The runtime trace output does not
make sense for the latency tracers anyway.
Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace.c | 80 | ||||
-rw-r--r-- | kernel/trace/trace.h | 1 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 7 |
4 files changed, 67 insertions, 24 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 54517a889791..7daf372e319a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -454,10 +454,6 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
454 | tr->buffer = max_tr.buffer; | 454 | tr->buffer = max_tr.buffer; |
455 | max_tr.buffer = buf; | 455 | max_tr.buffer = buf; |
456 | 456 | ||
457 | ftrace_disable_cpu(); | ||
458 | ring_buffer_reset(tr->buffer); | ||
459 | ftrace_enable_cpu(); | ||
460 | |||
461 | __update_max_tr(tr, tsk, cpu); | 457 | __update_max_tr(tr, tsk, cpu); |
462 | __raw_spin_unlock(&ftrace_max_lock); | 458 | __raw_spin_unlock(&ftrace_max_lock); |
463 | } | 459 | } |
@@ -483,7 +479,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
483 | 479 | ||
484 | ftrace_disable_cpu(); | 480 | ftrace_disable_cpu(); |
485 | 481 | ||
486 | ring_buffer_reset(max_tr.buffer); | ||
487 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); | 482 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); |
488 | 483 | ||
489 | ftrace_enable_cpu(); | 484 | ftrace_enable_cpu(); |
@@ -1374,6 +1369,37 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) | |||
1374 | return ent; | 1369 | return ent; |
1375 | } | 1370 | } |
1376 | 1371 | ||
1372 | static void tracing_iter_reset(struct trace_iterator *iter, int cpu) | ||
1373 | { | ||
1374 | struct trace_array *tr = iter->tr; | ||
1375 | struct ring_buffer_event *event; | ||
1376 | struct ring_buffer_iter *buf_iter; | ||
1377 | unsigned long entries = 0; | ||
1378 | u64 ts; | ||
1379 | |||
1380 | tr->data[cpu]->skipped_entries = 0; | ||
1381 | |||
1382 | if (!iter->buffer_iter[cpu]) | ||
1383 | return; | ||
1384 | |||
1385 | buf_iter = iter->buffer_iter[cpu]; | ||
1386 | ring_buffer_iter_reset(buf_iter); | ||
1387 | |||
1388 | /* | ||
1389 | * We could have the case with the max latency tracers | ||
1390 | * that a reset never took place on a cpu. This is evident | ||
1391 | * by the timestamp being before the start of the buffer. | ||
1392 | */ | ||
1393 | while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { | ||
1394 | if (ts >= iter->tr->time_start) | ||
1395 | break; | ||
1396 | entries++; | ||
1397 | ring_buffer_read(buf_iter, NULL); | ||
1398 | } | ||
1399 | |||
1400 | tr->data[cpu]->skipped_entries = entries; | ||
1401 | } | ||
1402 | |||
1377 | /* | 1403 | /* |
1378 | * No necessary locking here. The worst thing which can | 1404 | * No necessary locking here. The worst thing which can |
1379 | * happen is loosing events consumed at the same time | 1405 | * happen is loosing events consumed at the same time |
@@ -1412,10 +1438,9 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
1412 | 1438 | ||
1413 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | 1439 | if (cpu_file == TRACE_PIPE_ALL_CPU) { |
1414 | for_each_tracing_cpu(cpu) | 1440 | for_each_tracing_cpu(cpu) |
1415 | ring_buffer_iter_reset(iter->buffer_iter[cpu]); | 1441 | tracing_iter_reset(iter, cpu); |
1416 | } else | 1442 | } else |
1417 | ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); | 1443 | tracing_iter_reset(iter, cpu_file); |
1418 | |||
1419 | 1444 | ||
1420 | ftrace_enable_cpu(); | 1445 | ftrace_enable_cpu(); |
1421 | 1446 | ||
@@ -1464,16 +1489,32 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) | |||
1464 | struct trace_array *tr = iter->tr; | 1489 | struct trace_array *tr = iter->tr; |
1465 | struct trace_array_cpu *data = tr->data[tr->cpu]; | 1490 | struct trace_array_cpu *data = tr->data[tr->cpu]; |
1466 | struct tracer *type = current_trace; | 1491 | struct tracer *type = current_trace; |
1467 | unsigned long total; | 1492 | unsigned long entries = 0; |
1468 | unsigned long entries; | 1493 | unsigned long total = 0; |
1494 | unsigned long count; | ||
1469 | const char *name = "preemption"; | 1495 | const char *name = "preemption"; |
1496 | int cpu; | ||
1470 | 1497 | ||
1471 | if (type) | 1498 | if (type) |
1472 | name = type->name; | 1499 | name = type->name; |
1473 | 1500 | ||
1474 | entries = ring_buffer_entries(iter->tr->buffer); | 1501 | |
1475 | total = entries + | 1502 | for_each_tracing_cpu(cpu) { |
1476 | ring_buffer_overruns(iter->tr->buffer); | 1503 | count = ring_buffer_entries_cpu(tr->buffer, cpu); |
1504 | /* | ||
1505 | * If this buffer has skipped entries, then we hold all | ||
1506 | * entries for the trace and we need to ignore the | ||
1507 | * ones before the time stamp. | ||
1508 | */ | ||
1509 | if (tr->data[cpu]->skipped_entries) { | ||
1510 | count -= tr->data[cpu]->skipped_entries; | ||
1511 | /* total is the same as the entries */ | ||
1512 | total += count; | ||
1513 | } else | ||
1514 | total += count + | ||
1515 | ring_buffer_overrun_cpu(tr->buffer, cpu); | ||
1516 | entries += count; | ||
1517 | } | ||
1477 | 1518 | ||
1478 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", | 1519 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", |
1479 | name, UTS_RELEASE); | 1520 | name, UTS_RELEASE); |
@@ -1534,6 +1575,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter) | |||
1534 | if (cpumask_test_cpu(iter->cpu, iter->started)) | 1575 | if (cpumask_test_cpu(iter->cpu, iter->started)) |
1535 | return; | 1576 | return; |
1536 | 1577 | ||
1578 | if (iter->tr->data[iter->cpu]->skipped_entries) | ||
1579 | return; | ||
1580 | |||
1537 | cpumask_set_cpu(iter->cpu, iter->started); | 1581 | cpumask_set_cpu(iter->cpu, iter->started); |
1538 | 1582 | ||
1539 | /* Don't print started cpu buffer for the first entry of the trace */ | 1583 | /* Don't print started cpu buffer for the first entry of the trace */ |
@@ -1796,19 +1840,23 @@ __tracing_open(struct inode *inode, struct file *file) | |||
1796 | if (ring_buffer_overruns(iter->tr->buffer)) | 1840 | if (ring_buffer_overruns(iter->tr->buffer)) |
1797 | iter->iter_flags |= TRACE_FILE_ANNOTATE; | 1841 | iter->iter_flags |= TRACE_FILE_ANNOTATE; |
1798 | 1842 | ||
1843 | /* stop the trace while dumping */ | ||
1844 | tracing_stop(); | ||
1845 | |||
1799 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { | 1846 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { |
1800 | for_each_tracing_cpu(cpu) { | 1847 | for_each_tracing_cpu(cpu) { |
1801 | 1848 | ||
1802 | iter->buffer_iter[cpu] = | 1849 | iter->buffer_iter[cpu] = |
1803 | ring_buffer_read_start(iter->tr->buffer, cpu); | 1850 | ring_buffer_read_start(iter->tr->buffer, cpu); |
1851 | tracing_iter_reset(iter, cpu); | ||
1804 | } | 1852 | } |
1805 | } else { | 1853 | } else { |
1806 | cpu = iter->cpu_file; | 1854 | cpu = iter->cpu_file; |
1807 | iter->buffer_iter[cpu] = | 1855 | iter->buffer_iter[cpu] = |
1808 | ring_buffer_read_start(iter->tr->buffer, cpu); | 1856 | ring_buffer_read_start(iter->tr->buffer, cpu); |
1857 | tracing_iter_reset(iter, cpu); | ||
1809 | } | 1858 | } |
1810 | 1859 | ||
1811 | /* TODO stop tracer */ | ||
1812 | ret = seq_open(file, &tracer_seq_ops); | 1860 | ret = seq_open(file, &tracer_seq_ops); |
1813 | if (ret < 0) { | 1861 | if (ret < 0) { |
1814 | fail_ret = ERR_PTR(ret); | 1862 | fail_ret = ERR_PTR(ret); |
@@ -1818,9 +1866,6 @@ __tracing_open(struct inode *inode, struct file *file) | |||
1818 | m = file->private_data; | 1866 | m = file->private_data; |
1819 | m->private = iter; | 1867 | m->private = iter; |
1820 | 1868 | ||
1821 | /* stop the trace while dumping */ | ||
1822 | tracing_stop(); | ||
1823 | |||
1824 | mutex_unlock(&trace_types_lock); | 1869 | mutex_unlock(&trace_types_lock); |
1825 | 1870 | ||
1826 | return iter; | 1871 | return iter; |
@@ -1831,6 +1876,7 @@ __tracing_open(struct inode *inode, struct file *file) | |||
1831 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 1876 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
1832 | } | 1877 | } |
1833 | free_cpumask_var(iter->started); | 1878 | free_cpumask_var(iter->started); |
1879 | tracing_start(); | ||
1834 | fail: | 1880 | fail: |
1835 | mutex_unlock(&trace_types_lock); | 1881 | mutex_unlock(&trace_types_lock); |
1836 | kfree(iter->trace); | 1882 | kfree(iter->trace); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f2af713a8bcc..ca070de36227 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -241,6 +241,7 @@ struct trace_array_cpu { | |||
241 | unsigned long nice; | 241 | unsigned long nice; |
242 | unsigned long policy; | 242 | unsigned long policy; |
243 | unsigned long rt_priority; | 243 | unsigned long rt_priority; |
244 | unsigned long skipped_entries; | ||
244 | cycle_t preempt_timestamp; | 245 | cycle_t preempt_timestamp; |
245 | pid_t pid; | 246 | pid_t pid; |
246 | uid_t uid; | 247 | uid_t uid; |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b923d13e2fad..5555b75a0d12 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -178,7 +178,6 @@ out_unlock: | |||
178 | out: | 178 | out: |
179 | data->critical_sequence = max_sequence; | 179 | data->critical_sequence = max_sequence; |
180 | data->preempt_timestamp = ftrace_now(cpu); | 180 | data->preempt_timestamp = ftrace_now(cpu); |
181 | tracing_reset(tr, cpu); | ||
182 | trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); | 181 | trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); |
183 | } | 182 | } |
184 | 183 | ||
@@ -208,7 +207,6 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
208 | data->critical_sequence = max_sequence; | 207 | data->critical_sequence = max_sequence; |
209 | data->preempt_timestamp = ftrace_now(cpu); | 208 | data->preempt_timestamp = ftrace_now(cpu); |
210 | data->critical_start = parent_ip ? : ip; | 209 | data->critical_start = parent_ip ? : ip; |
211 | tracing_reset(tr, cpu); | ||
212 | 210 | ||
213 | local_save_flags(flags); | 211 | local_save_flags(flags); |
214 | 212 | ||
@@ -379,6 +377,7 @@ static void __irqsoff_tracer_init(struct trace_array *tr) | |||
379 | irqsoff_trace = tr; | 377 | irqsoff_trace = tr; |
380 | /* make sure that the tracer is visible */ | 378 | /* make sure that the tracer is visible */ |
381 | smp_wmb(); | 379 | smp_wmb(); |
380 | tracing_reset_online_cpus(tr); | ||
382 | start_irqsoff_tracer(tr); | 381 | start_irqsoff_tracer(tr); |
383 | } | 382 | } |
384 | 383 | ||
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index eacb27225173..ad69f105a7c6 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -186,11 +186,6 @@ out: | |||
186 | 186 | ||
187 | static void __wakeup_reset(struct trace_array *tr) | 187 | static void __wakeup_reset(struct trace_array *tr) |
188 | { | 188 | { |
189 | int cpu; | ||
190 | |||
191 | for_each_possible_cpu(cpu) | ||
192 | tracing_reset(tr, cpu); | ||
193 | |||
194 | wakeup_cpu = -1; | 189 | wakeup_cpu = -1; |
195 | wakeup_prio = -1; | 190 | wakeup_prio = -1; |
196 | 191 | ||
@@ -204,6 +199,8 @@ static void wakeup_reset(struct trace_array *tr) | |||
204 | { | 199 | { |
205 | unsigned long flags; | 200 | unsigned long flags; |
206 | 201 | ||
202 | tracing_reset_online_cpus(tr); | ||
203 | |||
207 | local_irq_save(flags); | 204 | local_irq_save(flags); |
208 | __raw_spin_lock(&wakeup_lock); | 205 | __raw_spin_lock(&wakeup_lock); |
209 | __wakeup_reset(tr); | 206 | __wakeup_reset(tr); |