aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ring_buffer.h6
-rw-r--r--kernel/trace/ring_buffer.c146
-rw-r--r--kernel/trace/trace.c83
3 files changed, 164 insertions, 71 deletions
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 1342e69542f3..d69cf637a15a 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -4,6 +4,7 @@
4#include <linux/kmemcheck.h> 4#include <linux/kmemcheck.h>
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <linux/seq_file.h> 6#include <linux/seq_file.h>
7#include <linux/poll.h>
7 8
8struct ring_buffer; 9struct ring_buffer;
9struct ring_buffer_iter; 10struct ring_buffer_iter;
@@ -96,6 +97,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
96 __ring_buffer_alloc((size), (flags), &__key); \ 97 __ring_buffer_alloc((size), (flags), &__key); \
97}) 98})
98 99
100void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
101int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
102 struct file *filp, poll_table *poll_table);
103
104
99#define RING_BUFFER_ALL_CPUS -1 105#define RING_BUFFER_ALL_CPUS -1
100 106
101void ring_buffer_free(struct ring_buffer *buffer); 107void ring_buffer_free(struct ring_buffer *buffer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7244acde77b0..56b6ea32d2e7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -8,6 +8,7 @@
8#include <linux/trace_clock.h> 8#include <linux/trace_clock.h>
9#include <linux/trace_seq.h> 9#include <linux/trace_seq.h>
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/irq_work.h>
11#include <linux/debugfs.h> 12#include <linux/debugfs.h>
12#include <linux/uaccess.h> 13#include <linux/uaccess.h>
13#include <linux/hardirq.h> 14#include <linux/hardirq.h>
@@ -442,6 +443,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
442 return ret; 443 return ret;
443} 444}
444 445
446struct rb_irq_work {
447 struct irq_work work;
448 wait_queue_head_t waiters;
449 bool waiters_pending;
450};
451
445/* 452/*
446 * head_page == tail_page && head == tail then buffer is empty. 453 * head_page == tail_page && head == tail then buffer is empty.
447 */ 454 */
@@ -476,6 +483,8 @@ struct ring_buffer_per_cpu {
476 struct list_head new_pages; /* new pages to add */ 483 struct list_head new_pages; /* new pages to add */
477 struct work_struct update_pages_work; 484 struct work_struct update_pages_work;
478 struct completion update_done; 485 struct completion update_done;
486
487 struct rb_irq_work irq_work;
479}; 488};
480 489
481struct ring_buffer { 490struct ring_buffer {
@@ -495,6 +504,8 @@ struct ring_buffer {
495 struct notifier_block cpu_notify; 504 struct notifier_block cpu_notify;
496#endif 505#endif
497 u64 (*clock)(void); 506 u64 (*clock)(void);
507
508 struct rb_irq_work irq_work;
498}; 509};
499 510
500struct ring_buffer_iter { 511struct ring_buffer_iter {
@@ -506,6 +517,118 @@ struct ring_buffer_iter {
506 u64 read_stamp; 517 u64 read_stamp;
507}; 518};
508 519
520/*
521 * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
522 *
523 * Schedules a delayed work to wake up any task that is blocked on the
524 * ring buffer waiters queue.
525 */
526static void rb_wake_up_waiters(struct irq_work *work)
527{
528 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
529
530 wake_up_all(&rbwork->waiters);
531}
532
533/**
534 * ring_buffer_wait - wait for input to the ring buffer
535 * @buffer: buffer to wait on
536 * @cpu: the cpu buffer to wait on
537 *
538 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
539 * as data is added to any of the @buffer's cpu buffers. Otherwise
540 * it will wait for data to be added to a specific cpu buffer.
541 */
542void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
543{
544 struct ring_buffer_per_cpu *cpu_buffer;
545 DEFINE_WAIT(wait);
546 struct rb_irq_work *work;
547
548 /*
549 * Depending on what the caller is waiting for, either any
550 * data in any cpu buffer, or a specific buffer, put the
551 * caller on the appropriate wait queue.
552 */
553 if (cpu == RING_BUFFER_ALL_CPUS)
554 work = &buffer->irq_work;
555 else {
556 cpu_buffer = buffer->buffers[cpu];
557 work = &cpu_buffer->irq_work;
558 }
559
560
561 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
562
563 /*
564 * The events can happen in critical sections where
565 * checking a work queue can cause deadlocks.
566 * After adding a task to the queue, this flag is set
567 * only to notify events to try to wake up the queue
568 * using irq_work.
569 *
570 * We don't clear it even if the buffer is no longer
571 * empty. The flag only causes the next event to run
572 * irq_work to do the work queue wake up. The worse
573 * that can happen if we race with !trace_empty() is that
574 * an event will cause an irq_work to try to wake up
575 * an empty queue.
576 *
577 * There's no reason to protect this flag either, as
578 * the work queue and irq_work logic will do the necessary
579 * synchronization for the wake ups. The only thing
580 * that is necessary is that the wake up happens after
581 * a task has been queued. It's OK for spurious wake ups.
582 */
583 work->waiters_pending = true;
584
585 if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
586 (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
587 schedule();
588
589 finish_wait(&work->waiters, &wait);
590}
591
592/**
593 * ring_buffer_poll_wait - poll on buffer input
594 * @buffer: buffer to wait on
595 * @cpu: the cpu buffer to wait on
596 * @filp: the file descriptor
597 * @poll_table: The poll descriptor
598 *
599 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
600 * as data is added to any of the @buffer's cpu buffers. Otherwise
601 * it will wait for data to be added to a specific cpu buffer.
602 *
603 * Returns POLLIN | POLLRDNORM if data exists in the buffers,
604 * zero otherwise.
605 */
606int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
607 struct file *filp, poll_table *poll_table)
608{
609 struct ring_buffer_per_cpu *cpu_buffer;
610 struct rb_irq_work *work;
611
612 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
613 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
614 return POLLIN | POLLRDNORM;
615
616 if (cpu == RING_BUFFER_ALL_CPUS)
617 work = &buffer->irq_work;
618 else {
619 cpu_buffer = buffer->buffers[cpu];
620 work = &cpu_buffer->irq_work;
621 }
622
623 work->waiters_pending = true;
624 poll_wait(filp, &work->waiters, poll_table);
625
626 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
627 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
628 return POLLIN | POLLRDNORM;
629 return 0;
630}
631
509/* buffer may be either ring_buffer or ring_buffer_per_cpu */ 632/* buffer may be either ring_buffer or ring_buffer_per_cpu */
510#define RB_WARN_ON(b, cond) \ 633#define RB_WARN_ON(b, cond) \
511 ({ \ 634 ({ \
@@ -1061,6 +1184,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1061 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 1184 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1062 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); 1185 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1063 init_completion(&cpu_buffer->update_done); 1186 init_completion(&cpu_buffer->update_done);
1187 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1064 1188
1065 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1189 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1066 GFP_KERNEL, cpu_to_node(cpu)); 1190 GFP_KERNEL, cpu_to_node(cpu));
@@ -1156,6 +1280,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1156 buffer->clock = trace_clock_local; 1280 buffer->clock = trace_clock_local;
1157 buffer->reader_lock_key = key; 1281 buffer->reader_lock_key = key;
1158 1282
1283 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1284
1159 /* need at least two pages */ 1285 /* need at least two pages */
1160 if (nr_pages < 2) 1286 if (nr_pages < 2)
1161 nr_pages = 2; 1287 nr_pages = 2;
@@ -2610,6 +2736,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2610 rb_end_commit(cpu_buffer); 2736 rb_end_commit(cpu_buffer);
2611} 2737}
2612 2738
2739static __always_inline void
2740rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2741{
2742 if (buffer->irq_work.waiters_pending) {
2743 buffer->irq_work.waiters_pending = false;
2744 /* irq_work_queue() supplies it's own memory barriers */
2745 irq_work_queue(&buffer->irq_work.work);
2746 }
2747
2748 if (cpu_buffer->irq_work.waiters_pending) {
2749 cpu_buffer->irq_work.waiters_pending = false;
2750 /* irq_work_queue() supplies it's own memory barriers */
2751 irq_work_queue(&cpu_buffer->irq_work.work);
2752 }
2753}
2754
2613/** 2755/**
2614 * ring_buffer_unlock_commit - commit a reserved 2756 * ring_buffer_unlock_commit - commit a reserved
2615 * @buffer: The buffer to commit to 2757 * @buffer: The buffer to commit to
@@ -2629,6 +2771,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2629 2771
2630 rb_commit(cpu_buffer, event); 2772 rb_commit(cpu_buffer, event);
2631 2773
2774 rb_wakeups(buffer, cpu_buffer);
2775
2632 trace_recursive_unlock(); 2776 trace_recursive_unlock();
2633 2777
2634 preempt_enable_notrace(); 2778 preempt_enable_notrace();
@@ -2801,6 +2945,8 @@ int ring_buffer_write(struct ring_buffer *buffer,
2801 2945
2802 rb_commit(cpu_buffer, event); 2946 rb_commit(cpu_buffer, event);
2803 2947
2948 rb_wakeups(buffer, cpu_buffer);
2949
2804 ret = 0; 2950 ret = 0;
2805 out: 2951 out:
2806 preempt_enable_notrace(); 2952 preempt_enable_notrace();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3ec146c96df4..b5b25b6575a9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -19,7 +19,6 @@
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/notifier.h> 20#include <linux/notifier.h>
21#include <linux/irqflags.h> 21#include <linux/irqflags.h>
22#include <linux/irq_work.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
24#include <linux/pagemap.h> 23#include <linux/pagemap.h>
25#include <linux/hardirq.h> 24#include <linux/hardirq.h>
@@ -87,14 +86,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
87static DEFINE_PER_CPU(bool, trace_cmdline_save); 86static DEFINE_PER_CPU(bool, trace_cmdline_save);
88 87
89/* 88/*
90 * When a reader is waiting for data, then this variable is
91 * set to true.
92 */
93static bool trace_wakeup_needed;
94
95static struct irq_work trace_work_wakeup;
96
97/*
98 * Kill all tracing for good (never come back). 89 * Kill all tracing for good (never come back).
99 * It is initialized to 1 but will turn to zero if the initialization 90 * It is initialized to 1 but will turn to zero if the initialization
100 * of the tracer is successful. But that is the only place that sets 91 * of the tracer is successful. But that is the only place that sets
@@ -334,9 +325,6 @@ static inline void trace_access_lock_init(void)
334 325
335#endif 326#endif
336 327
337/* trace_wait is a waitqueue for tasks blocked on trace_poll */
338static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
339
340/* trace_flags holds trace_options default values */ 328/* trace_flags holds trace_options default values */
341unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 329unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
342 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 330 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
@@ -344,19 +332,6 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
344 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; 332 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
345 333
346/** 334/**
347 * trace_wake_up - wake up tasks waiting for trace input
348 *
349 * Schedules a delayed work to wake up any task that is blocked on the
350 * trace_wait queue. These is used with trace_poll for tasks polling the
351 * trace.
352 */
353static void trace_wake_up(struct irq_work *work)
354{
355 wake_up_all(&trace_wait);
356
357}
358
359/**
360 * tracing_on - enable tracing buffers 335 * tracing_on - enable tracing buffers
361 * 336 *
362 * This function enables tracing buffers that may have been 337 * This function enables tracing buffers that may have been
@@ -763,36 +738,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
763 738
764static void default_wait_pipe(struct trace_iterator *iter) 739static void default_wait_pipe(struct trace_iterator *iter)
765{ 740{
766 DEFINE_WAIT(wait); 741 /* Iterators are static, they should be filled or empty */
767 742 if (trace_buffer_iter(iter, iter->cpu_file))
768 prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); 743 return;
769
770 /*
771 * The events can happen in critical sections where
772 * checking a work queue can cause deadlocks.
773 * After adding a task to the queue, this flag is set
774 * only to notify events to try to wake up the queue
775 * using irq_work.
776 *
777 * We don't clear it even if the buffer is no longer
778 * empty. The flag only causes the next event to run
779 * irq_work to do the work queue wake up. The worse
780 * that can happen if we race with !trace_empty() is that
781 * an event will cause an irq_work to try to wake up
782 * an empty queue.
783 *
784 * There's no reason to protect this flag either, as
785 * the work queue and irq_work logic will do the necessary
786 * synchronization for the wake ups. The only thing
787 * that is necessary is that the wake up happens after
788 * a task has been queued. It's OK for spurious wake ups.
789 */
790 trace_wakeup_needed = true;
791
792 if (trace_empty(iter))
793 schedule();
794 744
795 finish_wait(&trace_wait, &wait); 745 ring_buffer_wait(iter->tr->buffer, iter->cpu_file);
796} 746}
797 747
798/** 748/**
@@ -1262,11 +1212,6 @@ void
1262__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) 1212__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1263{ 1213{
1264 __this_cpu_write(trace_cmdline_save, true); 1214 __this_cpu_write(trace_cmdline_save, true);
1265 if (trace_wakeup_needed) {
1266 trace_wakeup_needed = false;
1267 /* irq_work_queue() supplies it's own memory barriers */
1268 irq_work_queue(&trace_work_wakeup);
1269 }
1270 ring_buffer_unlock_commit(buffer, event); 1215 ring_buffer_unlock_commit(buffer, event);
1271} 1216}
1272 1217
@@ -3557,21 +3502,18 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
3557static unsigned int 3502static unsigned int
3558trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 3503trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
3559{ 3504{
3560 if (trace_flags & TRACE_ITER_BLOCK) { 3505 /* Iterators are static, they should be filled or empty */
3506 if (trace_buffer_iter(iter, iter->cpu_file))
3507 return POLLIN | POLLRDNORM;
3508
3509 if (trace_flags & TRACE_ITER_BLOCK)
3561 /* 3510 /*
3562 * Always select as readable when in blocking mode 3511 * Always select as readable when in blocking mode
3563 */ 3512 */
3564 return POLLIN | POLLRDNORM; 3513 return POLLIN | POLLRDNORM;
3565 } else { 3514 else
3566 if (!trace_empty(iter)) 3515 return ring_buffer_poll_wait(iter->tr->buffer, iter->cpu_file,
3567 return POLLIN | POLLRDNORM; 3516 filp, poll_table);
3568 trace_wakeup_needed = true;
3569 poll_wait(filp, &trace_wait, poll_table);
3570 if (!trace_empty(iter))
3571 return POLLIN | POLLRDNORM;
3572
3573 return 0;
3574 }
3575} 3517}
3576 3518
3577static unsigned int 3519static unsigned int
@@ -5701,7 +5643,6 @@ __init static int tracer_alloc_buffers(void)
5701#endif 5643#endif
5702 5644
5703 trace_init_cmdlines(); 5645 trace_init_cmdlines();
5704 init_irq_work(&trace_work_wakeup, trace_wake_up);
5705 5646
5706 register_tracer(&nop_trace); 5647 register_tracer(&nop_trace);
5707 5648