aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c128
1 files changed, 83 insertions, 45 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 925f629658d6..a56e07c8d15b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work)
538 * ring_buffer_wait - wait for input to the ring buffer 538 * ring_buffer_wait - wait for input to the ring buffer
539 * @buffer: buffer to wait on 539 * @buffer: buffer to wait on
540 * @cpu: the cpu buffer to wait on 540 * @cpu: the cpu buffer to wait on
541 * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
541 * 542 *
542 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon 543 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
543 * as data is added to any of the @buffer's cpu buffers. Otherwise 544 * as data is added to any of the @buffer's cpu buffers. Otherwise
544 * it will wait for data to be added to a specific cpu buffer. 545 * it will wait for data to be added to a specific cpu buffer.
545 */ 546 */
546int ring_buffer_wait(struct ring_buffer *buffer, int cpu) 547int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
547{ 548{
548 struct ring_buffer_per_cpu *cpu_buffer; 549 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
549 DEFINE_WAIT(wait); 550 DEFINE_WAIT(wait);
550 struct rb_irq_work *work; 551 struct rb_irq_work *work;
552 int ret = 0;
551 553
552 /* 554 /*
553 * Depending on what the caller is waiting for, either any 555 * Depending on what the caller is waiting for, either any
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
564 } 566 }
565 567
566 568
567 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 569 while (true) {
570 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
568 571
569 /* 572 /*
570 * The events can happen in critical sections where 573 * The events can happen in critical sections where
571 * checking a work queue can cause deadlocks. 574 * checking a work queue can cause deadlocks.
572 * After adding a task to the queue, this flag is set 575 * After adding a task to the queue, this flag is set
573 * only to notify events to try to wake up the queue 576 * only to notify events to try to wake up the queue
574 * using irq_work. 577 * using irq_work.
575 * 578 *
576 * We don't clear it even if the buffer is no longer 579 * We don't clear it even if the buffer is no longer
577 * empty. The flag only causes the next event to run 580 * empty. The flag only causes the next event to run
578 * irq_work to do the work queue wake up. The worse 581 * irq_work to do the work queue wake up. The worse
579 * that can happen if we race with !trace_empty() is that 582 * that can happen if we race with !trace_empty() is that
580 * an event will cause an irq_work to try to wake up 583 * an event will cause an irq_work to try to wake up
581 * an empty queue. 584 * an empty queue.
582 * 585 *
583 * There's no reason to protect this flag either, as 586 * There's no reason to protect this flag either, as
584 * the work queue and irq_work logic will do the necessary 587 * the work queue and irq_work logic will do the necessary
585 * synchronization for the wake ups. The only thing 588 * synchronization for the wake ups. The only thing
586 * that is necessary is that the wake up happens after 589 * that is necessary is that the wake up happens after
587 * a task has been queued. It's OK for spurious wake ups. 590 * a task has been queued. It's OK for spurious wake ups.
588 */ 591 */
589 work->waiters_pending = true; 592 work->waiters_pending = true;
593
594 if (signal_pending(current)) {
595 ret = -EINTR;
596 break;
597 }
598
599 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
600 break;
601
602 if (cpu != RING_BUFFER_ALL_CPUS &&
603 !ring_buffer_empty_cpu(buffer, cpu)) {
604 unsigned long flags;
605 bool pagebusy;
606
607 if (!full)
608 break;
609
610 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
611 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
612 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
613
614 if (!pagebusy)
615 break;
616 }
590 617
591 if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
592 (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
593 schedule(); 618 schedule();
619 }
594 620
595 finish_wait(&work->waiters, &wait); 621 finish_wait(&work->waiters, &wait);
596 return 0; 622
623 return ret;
597} 624}
598 625
599/** 626/**
@@ -626,8 +653,22 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
626 work = &cpu_buffer->irq_work; 653 work = &cpu_buffer->irq_work;
627 } 654 }
628 655
629 work->waiters_pending = true;
630 poll_wait(filp, &work->waiters, poll_table); 656 poll_wait(filp, &work->waiters, poll_table);
657 work->waiters_pending = true;
658 /*
659 * There's a tight race between setting the waiters_pending and
660 * checking if the ring buffer is empty. Once the waiters_pending bit
661 * is set, the next event will wake the task up, but we can get stuck
662 * if there's only a single event in.
663 *
664 * FIXME: Ideally, we need a memory barrier on the writer side as well,
665 * but adding a memory barrier to all events will cause too much of a
666 * performance hit in the fast path. We only need a memory barrier when
667 * the buffer goes from empty to having content. But as this race is
668 * extremely small, and it's not a problem if another event comes in, we
669 * will fix it later.
670 */
671 smp_mb();
631 672
632 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || 673 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
633 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) 674 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
@@ -1968,7 +2009,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1968 2009
1969/** 2010/**
1970 * rb_update_event - update event type and data 2011 * rb_update_event - update event type and data
1971 * @event: the even to update 2012 * @event: the event to update
1972 * @type: the type of event 2013 * @type: the type of event
1973 * @length: the size of the event field in the ring buffer 2014 * @length: the size of the event field in the ring buffer
1974 * 2015 *
@@ -3341,21 +3382,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
3341 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3382 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3342 3383
3343 /* Iterator usage is expected to have record disabled */ 3384 /* Iterator usage is expected to have record disabled */
3344 if (list_empty(&cpu_buffer->reader_page->list)) { 3385 iter->head_page = cpu_buffer->reader_page;
3345 iter->head_page = rb_set_head_page(cpu_buffer); 3386 iter->head = cpu_buffer->reader_page->read;
3346 if (unlikely(!iter->head_page)) 3387
3347 return; 3388 iter->cache_reader_page = iter->head_page;
3348 iter->head = iter->head_page->read; 3389 iter->cache_read = cpu_buffer->read;
3349 } else { 3390
3350 iter->head_page = cpu_buffer->reader_page;
3351 iter->head = cpu_buffer->reader_page->read;
3352 }
3353 if (iter->head) 3391 if (iter->head)
3354 iter->read_stamp = cpu_buffer->read_stamp; 3392 iter->read_stamp = cpu_buffer->read_stamp;
3355 else 3393 else
3356 iter->read_stamp = iter->head_page->page->time_stamp; 3394 iter->read_stamp = iter->head_page->page->time_stamp;
3357 iter->cache_reader_page = cpu_buffer->reader_page;
3358 iter->cache_read = cpu_buffer->read;
3359} 3395}
3360 3396
3361/** 3397/**
@@ -3748,12 +3784,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3748 return NULL; 3784 return NULL;
3749 3785
3750 /* 3786 /*
3751 * We repeat when a time extend is encountered. 3787 * We repeat when a time extend is encountered or we hit
3752 * Since the time extend is always attached to a data event, 3788 * the end of the page. Since the time extend is always attached
3753 * we should never loop more than once. 3789 * to a data event, we should never loop more than three times.
3754 * (We never hit the following condition more than twice). 3790 * Once for going to next page, once on time extend, and
3791 * finally once to get the event.
3792 * (We never hit the following condition more than thrice).
3755 */ 3793 */
3756 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) 3794 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
3757 return NULL; 3795 return NULL;
3758 3796
3759 if (rb_per_cpu_empty(cpu_buffer)) 3797 if (rb_per_cpu_empty(cpu_buffer))