diff options
author | Rabin Vincent <rabin@rab.in> | 2014-11-10 13:46:34 -0500 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2014-11-10 16:45:43 -0500 |
commit | e30f53aad2202b5526c40c36d8eeac8bf290bde5 (patch) | |
tree | f9b21ea9ccbe93b781d8426af09cbd472cbdfef8 | |
parent | 206c5f60a3d902bc4b56dab2de3e88de5eb06108 (diff) |
tracing: Do not busy wait in buffer splice
On a !PREEMPT kernel, attempting to use trace-cmd results in a soft
lockup:
# trace-cmd record -e raw_syscalls:* -F false
NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61]
...
Call Trace:
[<ffffffff8105b580>] ? __wake_up_common+0x90/0x90
[<ffffffff81092e25>] wait_on_pipe+0x35/0x40
[<ffffffff810936e3>] tracing_buffers_splice_read+0x2e3/0x3c0
[<ffffffff81093300>] ? tracing_stats_read+0x2a0/0x2a0
[<ffffffff812d10ab>] ? _raw_spin_unlock+0x2b/0x40
[<ffffffff810dc87b>] ? do_read_fault+0x21b/0x290
[<ffffffff810de56a>] ? handle_mm_fault+0x2ba/0xbd0
[<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80
[<ffffffff810951e2>] ? trace_buffer_lock_reserve+0x22/0x60
[<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80
[<ffffffff8112415d>] do_splice_to+0x6d/0x90
[<ffffffff81126971>] SyS_splice+0x7c1/0x800
[<ffffffff812d1edd>] tracesys_phase2+0xd3/0xd8
The problem is this: tracing_buffers_splice_read() calls
ring_buffer_wait() to wait for data in the ring buffers. The buffers
are not empty so ring_buffer_wait() returns immediately. But
tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1,
meaning it only wants to read a full page. When the full page is not
available, tracing_buffers_splice_read() tries to wait again with
ring_buffer_wait(), which again returns immediately, and so on.
Fix this by adding a "full" argument to ring_buffer_wait() which will
make ring_buffer_wait() wait until the writer has left the reader's
page, i.e. until full-page reads will succeed.
Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in
Cc: stable@vger.kernel.org # 3.16+
Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function")
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r-- | include/linux/ring_buffer.h | 2 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 81 | ||||
-rw-r--r-- | kernel/trace/trace.c | 23 |
3 files changed, 62 insertions, 44 deletions
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 49a4d6f59108..e2c13cd863bd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h | |||
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k | |||
97 | __ring_buffer_alloc((size), (flags), &__key); \ | 97 | __ring_buffer_alloc((size), (flags), &__key); \ |
98 | }) | 98 | }) |
99 | 99 | ||
100 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu); | 100 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full); |
101 | int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | 101 | int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, |
102 | struct file *filp, poll_table *poll_table); | 102 | struct file *filp, poll_table *poll_table); |
103 | 103 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2d75c94ae87d..a56e07c8d15b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work) | |||
538 | * ring_buffer_wait - wait for input to the ring buffer | 538 | * ring_buffer_wait - wait for input to the ring buffer |
539 | * @buffer: buffer to wait on | 539 | * @buffer: buffer to wait on |
540 | * @cpu: the cpu buffer to wait on | 540 | * @cpu: the cpu buffer to wait on |
541 | * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS | ||
541 | * | 542 | * |
542 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | 543 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon |
543 | * as data is added to any of the @buffer's cpu buffers. Otherwise | 544 | * as data is added to any of the @buffer's cpu buffers. Otherwise |
544 | * it will wait for data to be added to a specific cpu buffer. | 545 | * it will wait for data to be added to a specific cpu buffer. |
545 | */ | 546 | */ |
546 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | 547 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) |
547 | { | 548 | { |
548 | struct ring_buffer_per_cpu *cpu_buffer; | 549 | struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); |
549 | DEFINE_WAIT(wait); | 550 | DEFINE_WAIT(wait); |
550 | struct rb_irq_work *work; | 551 | struct rb_irq_work *work; |
552 | int ret = 0; | ||
551 | 553 | ||
552 | /* | 554 | /* |
553 | * Depending on what the caller is waiting for, either any | 555 | * Depending on what the caller is waiting for, either any |
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | |||
564 | } | 566 | } |
565 | 567 | ||
566 | 568 | ||
567 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | 569 | while (true) { |
570 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | ||
568 | 571 | ||
569 | /* | 572 | /* |
570 | * The events can happen in critical sections where | 573 | * The events can happen in critical sections where |
571 | * checking a work queue can cause deadlocks. | 574 | * checking a work queue can cause deadlocks. |
572 | * After adding a task to the queue, this flag is set | 575 | * After adding a task to the queue, this flag is set |
573 | * only to notify events to try to wake up the queue | 576 | * only to notify events to try to wake up the queue |
574 | * using irq_work. | 577 | * using irq_work. |
575 | * | 578 | * |
576 | * We don't clear it even if the buffer is no longer | 579 | * We don't clear it even if the buffer is no longer |
577 | * empty. The flag only causes the next event to run | 580 | * empty. The flag only causes the next event to run |
578 | * irq_work to do the work queue wake up. The worse | 581 | * irq_work to do the work queue wake up. The worse |
579 | * that can happen if we race with !trace_empty() is that | 582 | * that can happen if we race with !trace_empty() is that |
580 | * an event will cause an irq_work to try to wake up | 583 | * an event will cause an irq_work to try to wake up |
581 | * an empty queue. | 584 | * an empty queue. |
582 | * | 585 | * |
583 | * There's no reason to protect this flag either, as | 586 | * There's no reason to protect this flag either, as |
584 | * the work queue and irq_work logic will do the necessary | 587 | * the work queue and irq_work logic will do the necessary |
585 | * synchronization for the wake ups. The only thing | 588 | * synchronization for the wake ups. The only thing |
586 | * that is necessary is that the wake up happens after | 589 | * that is necessary is that the wake up happens after |
587 | * a task has been queued. It's OK for spurious wake ups. | 590 | * a task has been queued. It's OK for spurious wake ups. |
588 | */ | 591 | */ |
589 | work->waiters_pending = true; | 592 | work->waiters_pending = true; |
593 | |||
594 | if (signal_pending(current)) { | ||
595 | ret = -EINTR; | ||
596 | break; | ||
597 | } | ||
598 | |||
599 | if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) | ||
600 | break; | ||
601 | |||
602 | if (cpu != RING_BUFFER_ALL_CPUS && | ||
603 | !ring_buffer_empty_cpu(buffer, cpu)) { | ||
604 | unsigned long flags; | ||
605 | bool pagebusy; | ||
606 | |||
607 | if (!full) | ||
608 | break; | ||
609 | |||
610 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | ||
611 | pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; | ||
612 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | ||
613 | |||
614 | if (!pagebusy) | ||
615 | break; | ||
616 | } | ||
590 | 617 | ||
591 | if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || | ||
592 | (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) | ||
593 | schedule(); | 618 | schedule(); |
619 | } | ||
594 | 620 | ||
595 | finish_wait(&work->waiters, &wait); | 621 | finish_wait(&work->waiters, &wait); |
596 | return 0; | 622 | |
623 | return ret; | ||
597 | } | 624 | } |
598 | 625 | ||
599 | /** | 626 | /** |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a528392b1f4..15209335888d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
1076 | } | 1076 | } |
1077 | #endif /* CONFIG_TRACER_MAX_TRACE */ | 1077 | #endif /* CONFIG_TRACER_MAX_TRACE */ |
1078 | 1078 | ||
1079 | static int wait_on_pipe(struct trace_iterator *iter) | 1079 | static int wait_on_pipe(struct trace_iterator *iter, bool full) |
1080 | { | 1080 | { |
1081 | /* Iterators are static, they should be filled or empty */ | 1081 | /* Iterators are static, they should be filled or empty */ |
1082 | if (trace_buffer_iter(iter, iter->cpu_file)) | 1082 | if (trace_buffer_iter(iter, iter->cpu_file)) |
1083 | return 0; | 1083 | return 0; |
1084 | 1084 | ||
1085 | return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); | 1085 | return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, |
1086 | full); | ||
1086 | } | 1087 | } |
1087 | 1088 | ||
1088 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 1089 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp) | |||
4434 | 4435 | ||
4435 | mutex_unlock(&iter->mutex); | 4436 | mutex_unlock(&iter->mutex); |
4436 | 4437 | ||
4437 | ret = wait_on_pipe(iter); | 4438 | ret = wait_on_pipe(iter, false); |
4438 | 4439 | ||
4439 | mutex_lock(&iter->mutex); | 4440 | mutex_lock(&iter->mutex); |
4440 | 4441 | ||
4441 | if (ret) | 4442 | if (ret) |
4442 | return ret; | 4443 | return ret; |
4443 | |||
4444 | if (signal_pending(current)) | ||
4445 | return -EINTR; | ||
4446 | } | 4444 | } |
4447 | 4445 | ||
4448 | return 1; | 4446 | return 1; |
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
5372 | goto out_unlock; | 5370 | goto out_unlock; |
5373 | } | 5371 | } |
5374 | mutex_unlock(&trace_types_lock); | 5372 | mutex_unlock(&trace_types_lock); |
5375 | ret = wait_on_pipe(iter); | 5373 | ret = wait_on_pipe(iter, false); |
5376 | mutex_lock(&trace_types_lock); | 5374 | mutex_lock(&trace_types_lock); |
5377 | if (ret) { | 5375 | if (ret) { |
5378 | size = ret; | 5376 | size = ret; |
5379 | goto out_unlock; | 5377 | goto out_unlock; |
5380 | } | 5378 | } |
5381 | if (signal_pending(current)) { | ||
5382 | size = -EINTR; | ||
5383 | goto out_unlock; | ||
5384 | } | ||
5385 | goto again; | 5379 | goto again; |
5386 | } | 5380 | } |
5387 | size = 0; | 5381 | size = 0; |
@@ -5587,14 +5581,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
5587 | goto out; | 5581 | goto out; |
5588 | } | 5582 | } |
5589 | mutex_unlock(&trace_types_lock); | 5583 | mutex_unlock(&trace_types_lock); |
5590 | ret = wait_on_pipe(iter); | 5584 | ret = wait_on_pipe(iter, true); |
5591 | mutex_lock(&trace_types_lock); | 5585 | mutex_lock(&trace_types_lock); |
5592 | if (ret) | 5586 | if (ret) |
5593 | goto out; | 5587 | goto out; |
5594 | if (signal_pending(current)) { | 5588 | |
5595 | ret = -EINTR; | ||
5596 | goto out; | ||
5597 | } | ||
5598 | goto again; | 5589 | goto again; |
5599 | } | 5590 | } |
5600 | 5591 | ||