diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 128 |
1 files changed, 83 insertions, 45 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 925f629658d6..a56e07c8d15b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work) | |||
| 538 | * ring_buffer_wait - wait for input to the ring buffer | 538 | * ring_buffer_wait - wait for input to the ring buffer |
| 539 | * @buffer: buffer to wait on | 539 | * @buffer: buffer to wait on |
| 540 | * @cpu: the cpu buffer to wait on | 540 | * @cpu: the cpu buffer to wait on |
| 541 | * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS | ||
| 541 | * | 542 | * |
| 542 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | 543 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon |
| 543 | * as data is added to any of the @buffer's cpu buffers. Otherwise | 544 | * as data is added to any of the @buffer's cpu buffers. Otherwise |
| 544 | * it will wait for data to be added to a specific cpu buffer. | 545 | * it will wait for data to be added to a specific cpu buffer. |
| 545 | */ | 546 | */ |
| 546 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | 547 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) |
| 547 | { | 548 | { |
| 548 | struct ring_buffer_per_cpu *cpu_buffer; | 549 | struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); |
| 549 | DEFINE_WAIT(wait); | 550 | DEFINE_WAIT(wait); |
| 550 | struct rb_irq_work *work; | 551 | struct rb_irq_work *work; |
| 552 | int ret = 0; | ||
| 551 | 553 | ||
| 552 | /* | 554 | /* |
| 553 | * Depending on what the caller is waiting for, either any | 555 | * Depending on what the caller is waiting for, either any |
| @@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | |||
| 564 | } | 566 | } |
| 565 | 567 | ||
| 566 | 568 | ||
| 567 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | 569 | while (true) { |
| 570 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | ||
| 568 | 571 | ||
| 569 | /* | 572 | /* |
| 570 | * The events can happen in critical sections where | 573 | * The events can happen in critical sections where |
| 571 | * checking a work queue can cause deadlocks. | 574 | * checking a work queue can cause deadlocks. |
| 572 | * After adding a task to the queue, this flag is set | 575 | * After adding a task to the queue, this flag is set |
| 573 | * only to notify events to try to wake up the queue | 576 | * only to notify events to try to wake up the queue |
| 574 | * using irq_work. | 577 | * using irq_work. |
| 575 | * | 578 | * |
| 576 | * We don't clear it even if the buffer is no longer | 579 | * We don't clear it even if the buffer is no longer |
| 577 | * empty. The flag only causes the next event to run | 580 | * empty. The flag only causes the next event to run |
| 578 | * irq_work to do the work queue wake up. The worse | 581 | * irq_work to do the work queue wake up. The worse |
| 579 | * that can happen if we race with !trace_empty() is that | 582 | * that can happen if we race with !trace_empty() is that |
| 580 | * an event will cause an irq_work to try to wake up | 583 | * an event will cause an irq_work to try to wake up |
| 581 | * an empty queue. | 584 | * an empty queue. |
| 582 | * | 585 | * |
| 583 | * There's no reason to protect this flag either, as | 586 | * There's no reason to protect this flag either, as |
| 584 | * the work queue and irq_work logic will do the necessary | 587 | * the work queue and irq_work logic will do the necessary |
| 585 | * synchronization for the wake ups. The only thing | 588 | * synchronization for the wake ups. The only thing |
| 586 | * that is necessary is that the wake up happens after | 589 | * that is necessary is that the wake up happens after |
| 587 | * a task has been queued. It's OK for spurious wake ups. | 590 | * a task has been queued. It's OK for spurious wake ups. |
| 588 | */ | 591 | */ |
| 589 | work->waiters_pending = true; | 592 | work->waiters_pending = true; |
| 593 | |||
| 594 | if (signal_pending(current)) { | ||
| 595 | ret = -EINTR; | ||
| 596 | break; | ||
| 597 | } | ||
| 598 | |||
| 599 | if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) | ||
| 600 | break; | ||
| 601 | |||
| 602 | if (cpu != RING_BUFFER_ALL_CPUS && | ||
| 603 | !ring_buffer_empty_cpu(buffer, cpu)) { | ||
| 604 | unsigned long flags; | ||
| 605 | bool pagebusy; | ||
| 606 | |||
| 607 | if (!full) | ||
| 608 | break; | ||
| 609 | |||
| 610 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | ||
| 611 | pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; | ||
| 612 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | ||
| 613 | |||
| 614 | if (!pagebusy) | ||
| 615 | break; | ||
| 616 | } | ||
| 590 | 617 | ||
| 591 | if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || | ||
| 592 | (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) | ||
| 593 | schedule(); | 618 | schedule(); |
| 619 | } | ||
| 594 | 620 | ||
| 595 | finish_wait(&work->waiters, &wait); | 621 | finish_wait(&work->waiters, &wait); |
| 596 | return 0; | 622 | |
| 623 | return ret; | ||
| 597 | } | 624 | } |
| 598 | 625 | ||
| 599 | /** | 626 | /** |
| @@ -626,8 +653,22 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | |||
| 626 | work = &cpu_buffer->irq_work; | 653 | work = &cpu_buffer->irq_work; |
| 627 | } | 654 | } |
| 628 | 655 | ||
| 629 | work->waiters_pending = true; | ||
| 630 | poll_wait(filp, &work->waiters, poll_table); | 656 | poll_wait(filp, &work->waiters, poll_table); |
| 657 | work->waiters_pending = true; | ||
| 658 | /* | ||
| 659 | * There's a tight race between setting the waiters_pending and | ||
| 660 | * checking if the ring buffer is empty. Once the waiters_pending bit | ||
| 661 | * is set, the next event will wake the task up, but we can get stuck | ||
| 662 | * if there's only a single event in. | ||
| 663 | * | ||
| 664 | * FIXME: Ideally, we need a memory barrier on the writer side as well, | ||
| 665 | * but adding a memory barrier to all events will cause too much of a | ||
| 666 | * performance hit in the fast path. We only need a memory barrier when | ||
| 667 | * the buffer goes from empty to having content. But as this race is | ||
| 668 | * extremely small, and it's not a problem if another event comes in, we | ||
| 669 | * will fix it later. | ||
| 670 | */ | ||
| 671 | smp_mb(); | ||
| 631 | 672 | ||
| 632 | if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || | 673 | if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || |
| 633 | (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) | 674 | (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) |
| @@ -1968,7 +2009,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | |||
| 1968 | 2009 | ||
| 1969 | /** | 2010 | /** |
| 1970 | * rb_update_event - update event type and data | 2011 | * rb_update_event - update event type and data |
| 1971 | * @event: the even to update | 2012 | * @event: the event to update |
| 1972 | * @type: the type of event | 2013 | * @type: the type of event |
| 1973 | * @length: the size of the event field in the ring buffer | 2014 | * @length: the size of the event field in the ring buffer |
| 1974 | * | 2015 | * |
| @@ -3341,21 +3382,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
| 3341 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3382 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
| 3342 | 3383 | ||
| 3343 | /* Iterator usage is expected to have record disabled */ | 3384 | /* Iterator usage is expected to have record disabled */ |
| 3344 | if (list_empty(&cpu_buffer->reader_page->list)) { | 3385 | iter->head_page = cpu_buffer->reader_page; |
| 3345 | iter->head_page = rb_set_head_page(cpu_buffer); | 3386 | iter->head = cpu_buffer->reader_page->read; |
| 3346 | if (unlikely(!iter->head_page)) | 3387 | |
| 3347 | return; | 3388 | iter->cache_reader_page = iter->head_page; |
| 3348 | iter->head = iter->head_page->read; | 3389 | iter->cache_read = cpu_buffer->read; |
| 3349 | } else { | 3390 | |
| 3350 | iter->head_page = cpu_buffer->reader_page; | ||
| 3351 | iter->head = cpu_buffer->reader_page->read; | ||
| 3352 | } | ||
| 3353 | if (iter->head) | 3391 | if (iter->head) |
| 3354 | iter->read_stamp = cpu_buffer->read_stamp; | 3392 | iter->read_stamp = cpu_buffer->read_stamp; |
| 3355 | else | 3393 | else |
| 3356 | iter->read_stamp = iter->head_page->page->time_stamp; | 3394 | iter->read_stamp = iter->head_page->page->time_stamp; |
| 3357 | iter->cache_reader_page = cpu_buffer->reader_page; | ||
| 3358 | iter->cache_read = cpu_buffer->read; | ||
| 3359 | } | 3395 | } |
| 3360 | 3396 | ||
| 3361 | /** | 3397 | /** |
| @@ -3748,12 +3784,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 3748 | return NULL; | 3784 | return NULL; |
| 3749 | 3785 | ||
| 3750 | /* | 3786 | /* |
| 3751 | * We repeat when a time extend is encountered. | 3787 | * We repeat when a time extend is encountered or we hit |
| 3752 | * Since the time extend is always attached to a data event, | 3788 | * the end of the page. Since the time extend is always attached |
| 3753 | * we should never loop more than once. | 3789 | * to a data event, we should never loop more than three times. |
| 3754 | * (We never hit the following condition more than twice). | 3790 | * Once for going to next page, once on time extend, and |
| 3791 | * finally once to get the event. | ||
| 3792 | * (We never hit the following condition more than thrice). | ||
| 3755 | */ | 3793 | */ |
| 3756 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) | 3794 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) |
| 3757 | return NULL; | 3795 | return NULL; |
| 3758 | 3796 | ||
| 3759 | if (rb_per_cpu_empty(cpu_buffer)) | 3797 | if (rb_per_cpu_empty(cpu_buffer)) |
