diff options
-rw-r--r-- | include/linux/ring_buffer.h | 6 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 146 | ||||
-rw-r--r-- | kernel/trace/trace.c | 83 |
3 files changed, 164 insertions, 71 deletions
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1342e69542f3..d69cf637a15a 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/kmemcheck.h> | 4 | #include <linux/kmemcheck.h> |
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | #include <linux/seq_file.h> | 6 | #include <linux/seq_file.h> |
7 | #include <linux/poll.h> | ||
7 | 8 | ||
8 | struct ring_buffer; | 9 | struct ring_buffer; |
9 | struct ring_buffer_iter; | 10 | struct ring_buffer_iter; |
@@ -96,6 +97,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k | |||
96 | __ring_buffer_alloc((size), (flags), &__key); \ | 97 | __ring_buffer_alloc((size), (flags), &__key); \ |
97 | }) | 98 | }) |
98 | 99 | ||
100 | void ring_buffer_wait(struct ring_buffer *buffer, int cpu); | ||
101 | int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | ||
102 | struct file *filp, poll_table *poll_table); | ||
103 | |||
104 | |||
99 | #define RING_BUFFER_ALL_CPUS -1 | 105 | #define RING_BUFFER_ALL_CPUS -1 |
100 | 106 | ||
101 | void ring_buffer_free(struct ring_buffer *buffer); | 107 | void ring_buffer_free(struct ring_buffer *buffer); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7244acde77b0..56b6ea32d2e7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/trace_clock.h> | 8 | #include <linux/trace_clock.h> |
9 | #include <linux/trace_seq.h> | 9 | #include <linux/trace_seq.h> |
10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
11 | #include <linux/irq_work.h> | ||
11 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
12 | #include <linux/uaccess.h> | 13 | #include <linux/uaccess.h> |
13 | #include <linux/hardirq.h> | 14 | #include <linux/hardirq.h> |
@@ -442,6 +443,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
442 | return ret; | 443 | return ret; |
443 | } | 444 | } |
444 | 445 | ||
446 | struct rb_irq_work { | ||
447 | struct irq_work work; | ||
448 | wait_queue_head_t waiters; | ||
449 | bool waiters_pending; | ||
450 | }; | ||
451 | |||
445 | /* | 452 | /* |
446 | * head_page == tail_page && head == tail then buffer is empty. | 453 | * head_page == tail_page && head == tail then buffer is empty. |
447 | */ | 454 | */ |
@@ -476,6 +483,8 @@ struct ring_buffer_per_cpu { | |||
476 | struct list_head new_pages; /* new pages to add */ | 483 | struct list_head new_pages; /* new pages to add */ |
477 | struct work_struct update_pages_work; | 484 | struct work_struct update_pages_work; |
478 | struct completion update_done; | 485 | struct completion update_done; |
486 | |||
487 | struct rb_irq_work irq_work; | ||
479 | }; | 488 | }; |
480 | 489 | ||
481 | struct ring_buffer { | 490 | struct ring_buffer { |
@@ -495,6 +504,8 @@ struct ring_buffer { | |||
495 | struct notifier_block cpu_notify; | 504 | struct notifier_block cpu_notify; |
496 | #endif | 505 | #endif |
497 | u64 (*clock)(void); | 506 | u64 (*clock)(void); |
507 | |||
508 | struct rb_irq_work irq_work; | ||
498 | }; | 509 | }; |
499 | 510 | ||
500 | struct ring_buffer_iter { | 511 | struct ring_buffer_iter { |
@@ -506,6 +517,118 @@ struct ring_buffer_iter { | |||
506 | u64 read_stamp; | 517 | u64 read_stamp; |
507 | }; | 518 | }; |
508 | 519 | ||
520 | /* | ||
521 | * rb_wake_up_waiters - wake up tasks waiting for ring buffer input | ||
522 | * | ||
523 | * Schedules a delayed work to wake up any task that is blocked on the | ||
524 | * ring buffer waiters queue. | ||
525 | */ | ||
526 | static void rb_wake_up_waiters(struct irq_work *work) | ||
527 | { | ||
528 | struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); | ||
529 | |||
530 | wake_up_all(&rbwork->waiters); | ||
531 | } | ||
532 | |||
533 | /** | ||
534 | * ring_buffer_wait - wait for input to the ring buffer | ||
535 | * @buffer: buffer to wait on | ||
536 | * @cpu: the cpu buffer to wait on | ||
537 | * | ||
538 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | ||
539 | * as data is added to any of the @buffer's cpu buffers. Otherwise | ||
540 | * it will wait for data to be added to a specific cpu buffer. | ||
541 | */ | ||
542 | void ring_buffer_wait(struct ring_buffer *buffer, int cpu) | ||
543 | { | ||
544 | struct ring_buffer_per_cpu *cpu_buffer; | ||
545 | DEFINE_WAIT(wait); | ||
546 | struct rb_irq_work *work; | ||
547 | |||
548 | /* | ||
549 | * Depending on what the caller is waiting for, either any | ||
550 | * data in any cpu buffer, or a specific buffer, put the | ||
551 | * caller on the appropriate wait queue. | ||
552 | */ | ||
553 | if (cpu == RING_BUFFER_ALL_CPUS) | ||
554 | work = &buffer->irq_work; | ||
555 | else { | ||
556 | cpu_buffer = buffer->buffers[cpu]; | ||
557 | work = &cpu_buffer->irq_work; | ||
558 | } | ||
559 | |||
560 | |||
561 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | ||
562 | |||
563 | /* | ||
564 | * The events can happen in critical sections where | ||
565 | * checking a work queue can cause deadlocks. | ||
566 | * After adding a task to the queue, this flag is set | ||
567 | * only to notify events to try to wake up the queue | ||
568 | * using irq_work. | ||
569 | * | ||
570 | * We don't clear it even if the buffer is no longer | ||
571 | * empty. The flag only causes the next event to run | ||
572 | * irq_work to do the work queue wake up. The worse | ||
573 | * that can happen if we race with !trace_empty() is that | ||
574 | * an event will cause an irq_work to try to wake up | ||
575 | * an empty queue. | ||
576 | * | ||
577 | * There's no reason to protect this flag either, as | ||
578 | * the work queue and irq_work logic will do the necessary | ||
579 | * synchronization for the wake ups. The only thing | ||
580 | * that is necessary is that the wake up happens after | ||
581 | * a task has been queued. It's OK for spurious wake ups. | ||
582 | */ | ||
583 | work->waiters_pending = true; | ||
584 | |||
585 | if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || | ||
586 | (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) | ||
587 | schedule(); | ||
588 | |||
589 | finish_wait(&work->waiters, &wait); | ||
590 | } | ||
591 | |||
592 | /** | ||
593 | * ring_buffer_poll_wait - poll on buffer input | ||
594 | * @buffer: buffer to wait on | ||
595 | * @cpu: the cpu buffer to wait on | ||
596 | * @filp: the file descriptor | ||
597 | * @poll_table: The poll descriptor | ||
598 | * | ||
599 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | ||
600 | * as data is added to any of the @buffer's cpu buffers. Otherwise | ||
601 | * it will wait for data to be added to a specific cpu buffer. | ||
602 | * | ||
603 | * Returns POLLIN | POLLRDNORM if data exists in the buffers, | ||
604 | * zero otherwise. | ||
605 | */ | ||
606 | int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | ||
607 | struct file *filp, poll_table *poll_table) | ||
608 | { | ||
609 | struct ring_buffer_per_cpu *cpu_buffer; | ||
610 | struct rb_irq_work *work; | ||
611 | |||
612 | if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || | ||
613 | (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) | ||
614 | return POLLIN | POLLRDNORM; | ||
615 | |||
616 | if (cpu == RING_BUFFER_ALL_CPUS) | ||
617 | work = &buffer->irq_work; | ||
618 | else { | ||
619 | cpu_buffer = buffer->buffers[cpu]; | ||
620 | work = &cpu_buffer->irq_work; | ||
621 | } | ||
622 | |||
623 | work->waiters_pending = true; | ||
624 | poll_wait(filp, &work->waiters, poll_table); | ||
625 | |||
626 | if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || | ||
627 | (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) | ||
628 | return POLLIN | POLLRDNORM; | ||
629 | return 0; | ||
630 | } | ||
631 | |||
509 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ | 632 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ |
510 | #define RB_WARN_ON(b, cond) \ | 633 | #define RB_WARN_ON(b, cond) \ |
511 | ({ \ | 634 | ({ \ |
@@ -1061,6 +1184,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) | |||
1061 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 1184 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
1062 | INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); | 1185 | INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); |
1063 | init_completion(&cpu_buffer->update_done); | 1186 | init_completion(&cpu_buffer->update_done); |
1187 | init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); | ||
1064 | 1188 | ||
1065 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1189 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
1066 | GFP_KERNEL, cpu_to_node(cpu)); | 1190 | GFP_KERNEL, cpu_to_node(cpu)); |
@@ -1156,6 +1280,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
1156 | buffer->clock = trace_clock_local; | 1280 | buffer->clock = trace_clock_local; |
1157 | buffer->reader_lock_key = key; | 1281 | buffer->reader_lock_key = key; |
1158 | 1282 | ||
1283 | init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters); | ||
1284 | |||
1159 | /* need at least two pages */ | 1285 | /* need at least two pages */ |
1160 | if (nr_pages < 2) | 1286 | if (nr_pages < 2) |
1161 | nr_pages = 2; | 1287 | nr_pages = 2; |
@@ -2610,6 +2736,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
2610 | rb_end_commit(cpu_buffer); | 2736 | rb_end_commit(cpu_buffer); |
2611 | } | 2737 | } |
2612 | 2738 | ||
2739 | static __always_inline void | ||
2740 | rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) | ||
2741 | { | ||
2742 | if (buffer->irq_work.waiters_pending) { | ||
2743 | buffer->irq_work.waiters_pending = false; | ||
2744 | /* irq_work_queue() supplies it's own memory barriers */ | ||
2745 | irq_work_queue(&buffer->irq_work.work); | ||
2746 | } | ||
2747 | |||
2748 | if (cpu_buffer->irq_work.waiters_pending) { | ||
2749 | cpu_buffer->irq_work.waiters_pending = false; | ||
2750 | /* irq_work_queue() supplies it's own memory barriers */ | ||
2751 | irq_work_queue(&cpu_buffer->irq_work.work); | ||
2752 | } | ||
2753 | } | ||
2754 | |||
2613 | /** | 2755 | /** |
2614 | * ring_buffer_unlock_commit - commit a reserved | 2756 | * ring_buffer_unlock_commit - commit a reserved |
2615 | * @buffer: The buffer to commit to | 2757 | * @buffer: The buffer to commit to |
@@ -2629,6 +2771,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
2629 | 2771 | ||
2630 | rb_commit(cpu_buffer, event); | 2772 | rb_commit(cpu_buffer, event); |
2631 | 2773 | ||
2774 | rb_wakeups(buffer, cpu_buffer); | ||
2775 | |||
2632 | trace_recursive_unlock(); | 2776 | trace_recursive_unlock(); |
2633 | 2777 | ||
2634 | preempt_enable_notrace(); | 2778 | preempt_enable_notrace(); |
@@ -2801,6 +2945,8 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
2801 | 2945 | ||
2802 | rb_commit(cpu_buffer, event); | 2946 | rb_commit(cpu_buffer, event); |
2803 | 2947 | ||
2948 | rb_wakeups(buffer, cpu_buffer); | ||
2949 | |||
2804 | ret = 0; | 2950 | ret = 0; |
2805 | out: | 2951 | out: |
2806 | preempt_enable_notrace(); | 2952 | preempt_enable_notrace(); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ec146c96df4..b5b25b6575a9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/notifier.h> | 20 | #include <linux/notifier.h> |
21 | #include <linux/irqflags.h> | 21 | #include <linux/irqflags.h> |
22 | #include <linux/irq_work.h> | ||
23 | #include <linux/debugfs.h> | 22 | #include <linux/debugfs.h> |
24 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
25 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
@@ -87,14 +86,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) | |||
87 | static DEFINE_PER_CPU(bool, trace_cmdline_save); | 86 | static DEFINE_PER_CPU(bool, trace_cmdline_save); |
88 | 87 | ||
89 | /* | 88 | /* |
90 | * When a reader is waiting for data, then this variable is | ||
91 | * set to true. | ||
92 | */ | ||
93 | static bool trace_wakeup_needed; | ||
94 | |||
95 | static struct irq_work trace_work_wakeup; | ||
96 | |||
97 | /* | ||
98 | * Kill all tracing for good (never come back). | 89 | * Kill all tracing for good (never come back). |
99 | * It is initialized to 1 but will turn to zero if the initialization | 90 | * It is initialized to 1 but will turn to zero if the initialization |
100 | * of the tracer is successful. But that is the only place that sets | 91 | * of the tracer is successful. But that is the only place that sets |
@@ -334,9 +325,6 @@ static inline void trace_access_lock_init(void) | |||
334 | 325 | ||
335 | #endif | 326 | #endif |
336 | 327 | ||
337 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | ||
338 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | ||
339 | |||
340 | /* trace_flags holds trace_options default values */ | 328 | /* trace_flags holds trace_options default values */ |
341 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | 329 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | |
342 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | | 330 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | |
@@ -344,19 +332,6 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
344 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; | 332 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; |
345 | 333 | ||
346 | /** | 334 | /** |
347 | * trace_wake_up - wake up tasks waiting for trace input | ||
348 | * | ||
349 | * Schedules a delayed work to wake up any task that is blocked on the | ||
350 | * trace_wait queue. These is used with trace_poll for tasks polling the | ||
351 | * trace. | ||
352 | */ | ||
353 | static void trace_wake_up(struct irq_work *work) | ||
354 | { | ||
355 | wake_up_all(&trace_wait); | ||
356 | |||
357 | } | ||
358 | |||
359 | /** | ||
360 | * tracing_on - enable tracing buffers | 335 | * tracing_on - enable tracing buffers |
361 | * | 336 | * |
362 | * This function enables tracing buffers that may have been | 337 | * This function enables tracing buffers that may have been |
@@ -763,36 +738,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
763 | 738 | ||
764 | static void default_wait_pipe(struct trace_iterator *iter) | 739 | static void default_wait_pipe(struct trace_iterator *iter) |
765 | { | 740 | { |
766 | DEFINE_WAIT(wait); | 741 | /* Iterators are static, they should be filled or empty */ |
767 | 742 | if (trace_buffer_iter(iter, iter->cpu_file)) | |
768 | prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); | 743 | return; |
769 | |||
770 | /* | ||
771 | * The events can happen in critical sections where | ||
772 | * checking a work queue can cause deadlocks. | ||
773 | * After adding a task to the queue, this flag is set | ||
774 | * only to notify events to try to wake up the queue | ||
775 | * using irq_work. | ||
776 | * | ||
777 | * We don't clear it even if the buffer is no longer | ||
778 | * empty. The flag only causes the next event to run | ||
779 | * irq_work to do the work queue wake up. The worse | ||
780 | * that can happen if we race with !trace_empty() is that | ||
781 | * an event will cause an irq_work to try to wake up | ||
782 | * an empty queue. | ||
783 | * | ||
784 | * There's no reason to protect this flag either, as | ||
785 | * the work queue and irq_work logic will do the necessary | ||
786 | * synchronization for the wake ups. The only thing | ||
787 | * that is necessary is that the wake up happens after | ||
788 | * a task has been queued. It's OK for spurious wake ups. | ||
789 | */ | ||
790 | trace_wakeup_needed = true; | ||
791 | |||
792 | if (trace_empty(iter)) | ||
793 | schedule(); | ||
794 | 744 | ||
795 | finish_wait(&trace_wait, &wait); | 745 | ring_buffer_wait(iter->tr->buffer, iter->cpu_file); |
796 | } | 746 | } |
797 | 747 | ||
798 | /** | 748 | /** |
@@ -1262,11 +1212,6 @@ void | |||
1262 | __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) | 1212 | __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) |
1263 | { | 1213 | { |
1264 | __this_cpu_write(trace_cmdline_save, true); | 1214 | __this_cpu_write(trace_cmdline_save, true); |
1265 | if (trace_wakeup_needed) { | ||
1266 | trace_wakeup_needed = false; | ||
1267 | /* irq_work_queue() supplies it's own memory barriers */ | ||
1268 | irq_work_queue(&trace_work_wakeup); | ||
1269 | } | ||
1270 | ring_buffer_unlock_commit(buffer, event); | 1215 | ring_buffer_unlock_commit(buffer, event); |
1271 | } | 1216 | } |
1272 | 1217 | ||
@@ -3557,21 +3502,18 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
3557 | static unsigned int | 3502 | static unsigned int |
3558 | trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) | 3503 | trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) |
3559 | { | 3504 | { |
3560 | if (trace_flags & TRACE_ITER_BLOCK) { | 3505 | /* Iterators are static, they should be filled or empty */ |
3506 | if (trace_buffer_iter(iter, iter->cpu_file)) | ||
3507 | return POLLIN | POLLRDNORM; | ||
3508 | |||
3509 | if (trace_flags & TRACE_ITER_BLOCK) | ||
3561 | /* | 3510 | /* |
3562 | * Always select as readable when in blocking mode | 3511 | * Always select as readable when in blocking mode |
3563 | */ | 3512 | */ |
3564 | return POLLIN | POLLRDNORM; | 3513 | return POLLIN | POLLRDNORM; |
3565 | } else { | 3514 | else |
3566 | if (!trace_empty(iter)) | 3515 | return ring_buffer_poll_wait(iter->tr->buffer, iter->cpu_file, |
3567 | return POLLIN | POLLRDNORM; | 3516 | filp, poll_table); |
3568 | trace_wakeup_needed = true; | ||
3569 | poll_wait(filp, &trace_wait, poll_table); | ||
3570 | if (!trace_empty(iter)) | ||
3571 | return POLLIN | POLLRDNORM; | ||
3572 | |||
3573 | return 0; | ||
3574 | } | ||
3575 | } | 3517 | } |
3576 | 3518 | ||
3577 | static unsigned int | 3519 | static unsigned int |
@@ -5701,7 +5643,6 @@ __init static int tracer_alloc_buffers(void) | |||
5701 | #endif | 5643 | #endif |
5702 | 5644 | ||
5703 | trace_init_cmdlines(); | 5645 | trace_init_cmdlines(); |
5704 | init_irq_work(&trace_work_wakeup, trace_wake_up); | ||
5705 | 5646 | ||
5706 | register_tracer(&nop_trace); | 5647 | register_tracer(&nop_trace); |
5707 | 5648 | ||