aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRabin Vincent <rabin@rab.in>2014-11-10 13:46:34 -0500
committerSteven Rostedt <rostedt@goodmis.org>2014-11-10 16:45:43 -0500
commite30f53aad2202b5526c40c36d8eeac8bf290bde5 (patch)
treef9b21ea9ccbe93b781d8426af09cbd472cbdfef8
parent206c5f60a3d902bc4b56dab2de3e88de5eb06108 (diff)
tracing: Do not busy wait in buffer splice
On a !PREEMPT kernel, attempting to use trace-cmd results in a soft lockup: # trace-cmd record -e raw_syscalls:* -F false NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61] ... Call Trace: [<ffffffff8105b580>] ? __wake_up_common+0x90/0x90 [<ffffffff81092e25>] wait_on_pipe+0x35/0x40 [<ffffffff810936e3>] tracing_buffers_splice_read+0x2e3/0x3c0 [<ffffffff81093300>] ? tracing_stats_read+0x2a0/0x2a0 [<ffffffff812d10ab>] ? _raw_spin_unlock+0x2b/0x40 [<ffffffff810dc87b>] ? do_read_fault+0x21b/0x290 [<ffffffff810de56a>] ? handle_mm_fault+0x2ba/0xbd0 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff810951e2>] ? trace_buffer_lock_reserve+0x22/0x60 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff8112415d>] do_splice_to+0x6d/0x90 [<ffffffff81126971>] SyS_splice+0x7c1/0x800 [<ffffffff812d1edd>] tracesys_phase2+0xd3/0xd8 The problem is this: tracing_buffers_splice_read() calls ring_buffer_wait() to wait for data in the ring buffers. The buffers are not empty so ring_buffer_wait() returns immediately. But tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1, meaning it only wants to read a full page. When the full page is not available, tracing_buffers_splice_read() tries to wait again with ring_buffer_wait(), which again returns immediately, and so on. Fix this by adding a "full" argument to ring_buffer_wait() which will make ring_buffer_wait() wait until the writer has left the reader's page, i.e. until full-page reads will succeed. Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in Cc: stable@vger.kernel.org # 3.16+ Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function") Signed-off-by: Rabin Vincent <rabin@rab.in> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--kernel/trace/ring_buffer.c81
-rw-r--r--kernel/trace/trace.c23
3 files changed, 62 insertions, 44 deletions
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 49a4d6f59108..e2c13cd863bd 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
97 __ring_buffer_alloc((size), (flags), &__key); \ 97 __ring_buffer_alloc((size), (flags), &__key); \
98}) 98})
99 99
100int ring_buffer_wait(struct ring_buffer *buffer, int cpu); 100int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full);
101int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, 101int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
102 struct file *filp, poll_table *poll_table); 102 struct file *filp, poll_table *poll_table);
103 103
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2d75c94ae87d..a56e07c8d15b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work)
538 * ring_buffer_wait - wait for input to the ring buffer 538 * ring_buffer_wait - wait for input to the ring buffer
539 * @buffer: buffer to wait on 539 * @buffer: buffer to wait on
540 * @cpu: the cpu buffer to wait on 540 * @cpu: the cpu buffer to wait on
541 * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
541 * 542 *
542 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon 543 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
543 * as data is added to any of the @buffer's cpu buffers. Otherwise 544 * as data is added to any of the @buffer's cpu buffers. Otherwise
544 * it will wait for data to be added to a specific cpu buffer. 545 * it will wait for data to be added to a specific cpu buffer.
545 */ 546 */
546int ring_buffer_wait(struct ring_buffer *buffer, int cpu) 547int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
547{ 548{
548 struct ring_buffer_per_cpu *cpu_buffer; 549 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
549 DEFINE_WAIT(wait); 550 DEFINE_WAIT(wait);
550 struct rb_irq_work *work; 551 struct rb_irq_work *work;
552 int ret = 0;
551 553
552 /* 554 /*
553 * Depending on what the caller is waiting for, either any 555 * Depending on what the caller is waiting for, either any
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
564 } 566 }
565 567
566 568
567 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 569 while (true) {
570 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
568 571
569 /* 572 /*
570 * The events can happen in critical sections where 573 * The events can happen in critical sections where
571 * checking a work queue can cause deadlocks. 574 * checking a work queue can cause deadlocks.
572 * After adding a task to the queue, this flag is set 575 * After adding a task to the queue, this flag is set
573 * only to notify events to try to wake up the queue 576 * only to notify events to try to wake up the queue
574 * using irq_work. 577 * using irq_work.
575 * 578 *
576 * We don't clear it even if the buffer is no longer 579 * We don't clear it even if the buffer is no longer
577 * empty. The flag only causes the next event to run 580 * empty. The flag only causes the next event to run
578 * irq_work to do the work queue wake up. The worse 581 * irq_work to do the work queue wake up. The worse
579 * that can happen if we race with !trace_empty() is that 582 * that can happen if we race with !trace_empty() is that
580 * an event will cause an irq_work to try to wake up 583 * an event will cause an irq_work to try to wake up
581 * an empty queue. 584 * an empty queue.
582 * 585 *
583 * There's no reason to protect this flag either, as 586 * There's no reason to protect this flag either, as
584 * the work queue and irq_work logic will do the necessary 587 * the work queue and irq_work logic will do the necessary
585 * synchronization for the wake ups. The only thing 588 * synchronization for the wake ups. The only thing
586 * that is necessary is that the wake up happens after 589 * that is necessary is that the wake up happens after
587 * a task has been queued. It's OK for spurious wake ups. 590 * a task has been queued. It's OK for spurious wake ups.
588 */ 591 */
589 work->waiters_pending = true; 592 work->waiters_pending = true;
593
594 if (signal_pending(current)) {
595 ret = -EINTR;
596 break;
597 }
598
599 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
600 break;
601
602 if (cpu != RING_BUFFER_ALL_CPUS &&
603 !ring_buffer_empty_cpu(buffer, cpu)) {
604 unsigned long flags;
605 bool pagebusy;
606
607 if (!full)
608 break;
609
610 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
611 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
612 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
613
614 if (!pagebusy)
615 break;
616 }
590 617
591 if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
592 (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
593 schedule(); 618 schedule();
619 }
594 620
595 finish_wait(&work->waiters, &wait); 621 finish_wait(&work->waiters, &wait);
596 return 0; 622
623 return ret;
597} 624}
598 625
599/** 626/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a528392b1f4..15209335888d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1076} 1076}
1077#endif /* CONFIG_TRACER_MAX_TRACE */ 1077#endif /* CONFIG_TRACER_MAX_TRACE */
1078 1078
1079static int wait_on_pipe(struct trace_iterator *iter) 1079static int wait_on_pipe(struct trace_iterator *iter, bool full)
1080{ 1080{
1081 /* Iterators are static, they should be filled or empty */ 1081 /* Iterators are static, they should be filled or empty */
1082 if (trace_buffer_iter(iter, iter->cpu_file)) 1082 if (trace_buffer_iter(iter, iter->cpu_file))
1083 return 0; 1083 return 0;
1084 1084
1085 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); 1085 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1086 full);
1086} 1087}
1087 1088
1088#ifdef CONFIG_FTRACE_STARTUP_TEST 1089#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp)
4434 4435
4435 mutex_unlock(&iter->mutex); 4436 mutex_unlock(&iter->mutex);
4436 4437
4437 ret = wait_on_pipe(iter); 4438 ret = wait_on_pipe(iter, false);
4438 4439
4439 mutex_lock(&iter->mutex); 4440 mutex_lock(&iter->mutex);
4440 4441
4441 if (ret) 4442 if (ret)
4442 return ret; 4443 return ret;
4443
4444 if (signal_pending(current))
4445 return -EINTR;
4446 } 4444 }
4447 4445
4448 return 1; 4446 return 1;
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
5372 goto out_unlock; 5370 goto out_unlock;
5373 } 5371 }
5374 mutex_unlock(&trace_types_lock); 5372 mutex_unlock(&trace_types_lock);
5375 ret = wait_on_pipe(iter); 5373 ret = wait_on_pipe(iter, false);
5376 mutex_lock(&trace_types_lock); 5374 mutex_lock(&trace_types_lock);
5377 if (ret) { 5375 if (ret) {
5378 size = ret; 5376 size = ret;
5379 goto out_unlock; 5377 goto out_unlock;
5380 } 5378 }
5381 if (signal_pending(current)) {
5382 size = -EINTR;
5383 goto out_unlock;
5384 }
5385 goto again; 5379 goto again;
5386 } 5380 }
5387 size = 0; 5381 size = 0;
@@ -5587,14 +5581,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5587 goto out; 5581 goto out;
5588 } 5582 }
5589 mutex_unlock(&trace_types_lock); 5583 mutex_unlock(&trace_types_lock);
5590 ret = wait_on_pipe(iter); 5584 ret = wait_on_pipe(iter, true);
5591 mutex_lock(&trace_types_lock); 5585 mutex_lock(&trace_types_lock);
5592 if (ret) 5586 if (ret)
5593 goto out; 5587 goto out;
5594 if (signal_pending(current)) { 5588
5595 ret = -EINTR;
5596 goto out;
5597 }
5598 goto again; 5589 goto again;
5599 } 5590 }
5600 5591