aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2013-02-28 19:59:17 -0500
committerSteven Rostedt <rostedt@goodmis.org>2013-03-15 00:34:50 -0400
commit15693458c4bc0693fd63a50d60f35b628fcf4e29 (patch)
tree9ce7c378ec85134b84852b474e301e135187c0c0 /kernel/trace
parentb627344fef0c38fa4e3050348e168e46db87c905 (diff)
tracing/ring-buffer: Move poll wake ups into ring buffer code
Move the logic to wake up on ring buffer data into the ring buffer code itself. This simplifies the tracing code a lot and also has the added benefit that waiters on one of the instance buffers can be woken only when data is added to that instance instead of data added to any instance. Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ring_buffer.c146
-rw-r--r--kernel/trace/trace.c83
2 files changed, 158 insertions, 71 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7244acde77b0..56b6ea32d2e7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -8,6 +8,7 @@
8#include <linux/trace_clock.h> 8#include <linux/trace_clock.h>
9#include <linux/trace_seq.h> 9#include <linux/trace_seq.h>
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/irq_work.h>
11#include <linux/debugfs.h> 12#include <linux/debugfs.h>
12#include <linux/uaccess.h> 13#include <linux/uaccess.h>
13#include <linux/hardirq.h> 14#include <linux/hardirq.h>
@@ -442,6 +443,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
442 return ret; 443 return ret;
443} 444}
444 445
446struct rb_irq_work {
447 struct irq_work work;
448 wait_queue_head_t waiters;
449 bool waiters_pending;
450};
451
445/* 452/*
446 * head_page == tail_page && head == tail then buffer is empty. 453 * head_page == tail_page && head == tail then buffer is empty.
447 */ 454 */
@@ -476,6 +483,8 @@ struct ring_buffer_per_cpu {
476 struct list_head new_pages; /* new pages to add */ 483 struct list_head new_pages; /* new pages to add */
477 struct work_struct update_pages_work; 484 struct work_struct update_pages_work;
478 struct completion update_done; 485 struct completion update_done;
486
487 struct rb_irq_work irq_work;
479}; 488};
480 489
481struct ring_buffer { 490struct ring_buffer {
@@ -495,6 +504,8 @@ struct ring_buffer {
495 struct notifier_block cpu_notify; 504 struct notifier_block cpu_notify;
496#endif 505#endif
497 u64 (*clock)(void); 506 u64 (*clock)(void);
507
508 struct rb_irq_work irq_work;
498}; 509};
499 510
500struct ring_buffer_iter { 511struct ring_buffer_iter {
@@ -506,6 +517,118 @@ struct ring_buffer_iter {
506 u64 read_stamp; 517 u64 read_stamp;
507}; 518};
508 519
520/*
521 * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
522 *
523 * Schedules a delayed work to wake up any task that is blocked on the
524 * ring buffer waiters queue.
525 */
526static void rb_wake_up_waiters(struct irq_work *work)
527{
528 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
529
530 wake_up_all(&rbwork->waiters);
531}
532
533/**
534 * ring_buffer_wait - wait for input to the ring buffer
535 * @buffer: buffer to wait on
536 * @cpu: the cpu buffer to wait on
537 *
538 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
539 * as data is added to any of the @buffer's cpu buffers. Otherwise
540 * it will wait for data to be added to a specific cpu buffer.
541 */
542void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
543{
544 struct ring_buffer_per_cpu *cpu_buffer;
545 DEFINE_WAIT(wait);
546 struct rb_irq_work *work;
547
548 /*
549 * Depending on what the caller is waiting for, either any
550 * data in any cpu buffer, or a specific buffer, put the
551 * caller on the appropriate wait queue.
552 */
553 if (cpu == RING_BUFFER_ALL_CPUS)
554 work = &buffer->irq_work;
555 else {
556 cpu_buffer = buffer->buffers[cpu];
557 work = &cpu_buffer->irq_work;
558 }
559
560
561 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
562
563 /*
564 * The events can happen in critical sections where
565 * checking a work queue can cause deadlocks.
566 * After adding a task to the queue, this flag is set
567 * only to notify events to try to wake up the queue
568 * using irq_work.
569 *
570 * We don't clear it even if the buffer is no longer
571 * empty. The flag only causes the next event to run
572 * irq_work to do the work queue wake up. The worse
573 * that can happen if we race with !trace_empty() is that
574 * an event will cause an irq_work to try to wake up
575 * an empty queue.
576 *
577 * There's no reason to protect this flag either, as
578 * the work queue and irq_work logic will do the necessary
579 * synchronization for the wake ups. The only thing
580 * that is necessary is that the wake up happens after
581 * a task has been queued. It's OK for spurious wake ups.
582 */
583 work->waiters_pending = true;
584
585 if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
586 (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
587 schedule();
588
589 finish_wait(&work->waiters, &wait);
590}
591
592/**
593 * ring_buffer_poll_wait - poll on buffer input
594 * @buffer: buffer to wait on
595 * @cpu: the cpu buffer to wait on
596 * @filp: the file descriptor
597 * @poll_table: The poll descriptor
598 *
599 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
600 * as data is added to any of the @buffer's cpu buffers. Otherwise
601 * it will wait for data to be added to a specific cpu buffer.
602 *
603 * Returns POLLIN | POLLRDNORM if data exists in the buffers,
604 * zero otherwise.
605 */
606int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
607 struct file *filp, poll_table *poll_table)
608{
609 struct ring_buffer_per_cpu *cpu_buffer;
610 struct rb_irq_work *work;
611
612 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
613 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
614 return POLLIN | POLLRDNORM;
615
616 if (cpu == RING_BUFFER_ALL_CPUS)
617 work = &buffer->irq_work;
618 else {
619 cpu_buffer = buffer->buffers[cpu];
620 work = &cpu_buffer->irq_work;
621 }
622
623 work->waiters_pending = true;
624 poll_wait(filp, &work->waiters, poll_table);
625
626 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
627 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
628 return POLLIN | POLLRDNORM;
629 return 0;
630}
631
509/* buffer may be either ring_buffer or ring_buffer_per_cpu */ 632/* buffer may be either ring_buffer or ring_buffer_per_cpu */
510#define RB_WARN_ON(b, cond) \ 633#define RB_WARN_ON(b, cond) \
511 ({ \ 634 ({ \
@@ -1061,6 +1184,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1061 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 1184 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1062 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); 1185 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1063 init_completion(&cpu_buffer->update_done); 1186 init_completion(&cpu_buffer->update_done);
1187 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1064 1188
1065 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1189 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1066 GFP_KERNEL, cpu_to_node(cpu)); 1190 GFP_KERNEL, cpu_to_node(cpu));
@@ -1156,6 +1280,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1156 buffer->clock = trace_clock_local; 1280 buffer->clock = trace_clock_local;
1157 buffer->reader_lock_key = key; 1281 buffer->reader_lock_key = key;
1158 1282
1283 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1284
1159 /* need at least two pages */ 1285 /* need at least two pages */
1160 if (nr_pages < 2) 1286 if (nr_pages < 2)
1161 nr_pages = 2; 1287 nr_pages = 2;
@@ -2610,6 +2736,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2610 rb_end_commit(cpu_buffer); 2736 rb_end_commit(cpu_buffer);
2611} 2737}
2612 2738
2739static __always_inline void
2740rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2741{
2742 if (buffer->irq_work.waiters_pending) {
2743 buffer->irq_work.waiters_pending = false;
2744 /* irq_work_queue() supplies it's own memory barriers */
2745 irq_work_queue(&buffer->irq_work.work);
2746 }
2747
2748 if (cpu_buffer->irq_work.waiters_pending) {
2749 cpu_buffer->irq_work.waiters_pending = false;
2750 /* irq_work_queue() supplies it's own memory barriers */
2751 irq_work_queue(&cpu_buffer->irq_work.work);
2752 }
2753}
2754
2613/** 2755/**
2614 * ring_buffer_unlock_commit - commit a reserved 2756 * ring_buffer_unlock_commit - commit a reserved
2615 * @buffer: The buffer to commit to 2757 * @buffer: The buffer to commit to
@@ -2629,6 +2771,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2629 2771
2630 rb_commit(cpu_buffer, event); 2772 rb_commit(cpu_buffer, event);
2631 2773
2774 rb_wakeups(buffer, cpu_buffer);
2775
2632 trace_recursive_unlock(); 2776 trace_recursive_unlock();
2633 2777
2634 preempt_enable_notrace(); 2778 preempt_enable_notrace();
@@ -2801,6 +2945,8 @@ int ring_buffer_write(struct ring_buffer *buffer,
2801 2945
2802 rb_commit(cpu_buffer, event); 2946 rb_commit(cpu_buffer, event);
2803 2947
2948 rb_wakeups(buffer, cpu_buffer);
2949
2804 ret = 0; 2950 ret = 0;
2805 out: 2951 out:
2806 preempt_enable_notrace(); 2952 preempt_enable_notrace();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3ec146c96df4..b5b25b6575a9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -19,7 +19,6 @@
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/notifier.h> 20#include <linux/notifier.h>
21#include <linux/irqflags.h> 21#include <linux/irqflags.h>
22#include <linux/irq_work.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
24#include <linux/pagemap.h> 23#include <linux/pagemap.h>
25#include <linux/hardirq.h> 24#include <linux/hardirq.h>
@@ -87,14 +86,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
87static DEFINE_PER_CPU(bool, trace_cmdline_save); 86static DEFINE_PER_CPU(bool, trace_cmdline_save);
88 87
89/* 88/*
90 * When a reader is waiting for data, then this variable is
91 * set to true.
92 */
93static bool trace_wakeup_needed;
94
95static struct irq_work trace_work_wakeup;
96
97/*
98 * Kill all tracing for good (never come back). 89 * Kill all tracing for good (never come back).
99 * It is initialized to 1 but will turn to zero if the initialization 90 * It is initialized to 1 but will turn to zero if the initialization
100 * of the tracer is successful. But that is the only place that sets 91 * of the tracer is successful. But that is the only place that sets
@@ -334,9 +325,6 @@ static inline void trace_access_lock_init(void)
334 325
335#endif 326#endif
336 327
337/* trace_wait is a waitqueue for tasks blocked on trace_poll */
338static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
339
340/* trace_flags holds trace_options default values */ 328/* trace_flags holds trace_options default values */
341unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 329unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
342 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 330 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
@@ -344,19 +332,6 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
344 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; 332 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
345 333
346/** 334/**
347 * trace_wake_up - wake up tasks waiting for trace input
348 *
349 * Schedules a delayed work to wake up any task that is blocked on the
350 * trace_wait queue. These is used with trace_poll for tasks polling the
351 * trace.
352 */
353static void trace_wake_up(struct irq_work *work)
354{
355 wake_up_all(&trace_wait);
356
357}
358
359/**
360 * tracing_on - enable tracing buffers 335 * tracing_on - enable tracing buffers
361 * 336 *
362 * This function enables tracing buffers that may have been 337 * This function enables tracing buffers that may have been
@@ -763,36 +738,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
763 738
764static void default_wait_pipe(struct trace_iterator *iter) 739static void default_wait_pipe(struct trace_iterator *iter)
765{ 740{
766 DEFINE_WAIT(wait); 741 /* Iterators are static, they should be filled or empty */
767 742 if (trace_buffer_iter(iter, iter->cpu_file))
768 prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); 743 return;
769
770 /*
771 * The events can happen in critical sections where
772 * checking a work queue can cause deadlocks.
773 * After adding a task to the queue, this flag is set
774 * only to notify events to try to wake up the queue
775 * using irq_work.
776 *
777 * We don't clear it even if the buffer is no longer
778 * empty. The flag only causes the next event to run
779 * irq_work to do the work queue wake up. The worse
780 * that can happen if we race with !trace_empty() is that
781 * an event will cause an irq_work to try to wake up
782 * an empty queue.
783 *
784 * There's no reason to protect this flag either, as
785 * the work queue and irq_work logic will do the necessary
786 * synchronization for the wake ups. The only thing
787 * that is necessary is that the wake up happens after
788 * a task has been queued. It's OK for spurious wake ups.
789 */
790 trace_wakeup_needed = true;
791
792 if (trace_empty(iter))
793 schedule();
794 744
795 finish_wait(&trace_wait, &wait); 745 ring_buffer_wait(iter->tr->buffer, iter->cpu_file);
796} 746}
797 747
798/** 748/**
@@ -1262,11 +1212,6 @@ void
1262__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) 1212__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1263{ 1213{
1264 __this_cpu_write(trace_cmdline_save, true); 1214 __this_cpu_write(trace_cmdline_save, true);
1265 if (trace_wakeup_needed) {
1266 trace_wakeup_needed = false;
1267 /* irq_work_queue() supplies it's own memory barriers */
1268 irq_work_queue(&trace_work_wakeup);
1269 }
1270 ring_buffer_unlock_commit(buffer, event); 1215 ring_buffer_unlock_commit(buffer, event);
1271} 1216}
1272 1217
@@ -3557,21 +3502,18 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
3557static unsigned int 3502static unsigned int
3558trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 3503trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
3559{ 3504{
3560 if (trace_flags & TRACE_ITER_BLOCK) { 3505 /* Iterators are static, they should be filled or empty */
3506 if (trace_buffer_iter(iter, iter->cpu_file))
3507 return POLLIN | POLLRDNORM;
3508
3509 if (trace_flags & TRACE_ITER_BLOCK)
3561 /* 3510 /*
3562 * Always select as readable when in blocking mode 3511 * Always select as readable when in blocking mode
3563 */ 3512 */
3564 return POLLIN | POLLRDNORM; 3513 return POLLIN | POLLRDNORM;
3565 } else { 3514 else
3566 if (!trace_empty(iter)) 3515 return ring_buffer_poll_wait(iter->tr->buffer, iter->cpu_file,
3567 return POLLIN | POLLRDNORM; 3516 filp, poll_table);
3568 trace_wakeup_needed = true;
3569 poll_wait(filp, &trace_wait, poll_table);
3570 if (!trace_empty(iter))
3571 return POLLIN | POLLRDNORM;
3572
3573 return 0;
3574 }
3575} 3517}
3576 3518
3577static unsigned int 3519static unsigned int
@@ -5701,7 +5643,6 @@ __init static int tracer_alloc_buffers(void)
5701#endif 5643#endif
5702 5644
5703 trace_init_cmdlines(); 5645 trace_init_cmdlines();
5704 init_irq_work(&trace_work_wakeup, trace_wake_up);
5705 5646
5706 register_tracer(&nop_trace); 5647 register_tracer(&nop_trace);
5707 5648