diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 1437 |
1 files changed, 1090 insertions, 347 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dc4dc70171ce..454e74e718cf 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
207 | #define RB_ALIGNMENT 4U | 207 | #define RB_ALIGNMENT 4U |
208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | ||
209 | 210 | ||
210 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 211 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
211 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 212 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
@@ -217,17 +218,12 @@ enum { | |||
217 | 218 | ||
218 | static inline int rb_null_event(struct ring_buffer_event *event) | 219 | static inline int rb_null_event(struct ring_buffer_event *event) |
219 | { | 220 | { |
220 | return event->type_len == RINGBUF_TYPE_PADDING | 221 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
221 | && event->time_delta == 0; | ||
222 | } | ||
223 | |||
224 | static inline int rb_discarded_event(struct ring_buffer_event *event) | ||
225 | { | ||
226 | return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; | ||
227 | } | 222 | } |
228 | 223 | ||
229 | static void rb_event_set_padding(struct ring_buffer_event *event) | 224 | static void rb_event_set_padding(struct ring_buffer_event *event) |
230 | { | 225 | { |
226 | /* padding has a NULL time_delta */ | ||
231 | event->type_len = RINGBUF_TYPE_PADDING; | 227 | event->type_len = RINGBUF_TYPE_PADDING; |
232 | event->time_delta = 0; | 228 | event->time_delta = 0; |
233 | } | 229 | } |
@@ -321,6 +317,14 @@ struct buffer_data_page { | |||
321 | unsigned char data[]; /* data of buffer page */ | 317 | unsigned char data[]; /* data of buffer page */ |
322 | }; | 318 | }; |
323 | 319 | ||
320 | /* | ||
321 | * Note, the buffer_page list must be first. The buffer pages | ||
322 | * are allocated in cache lines, which means that each buffer | ||
323 | * page will be at the beginning of a cache line, and thus | ||
324 | * the least significant bits will be zero. We use this to | ||
325 | * add flags in the list struct pointers, to make the ring buffer | ||
326 | * lockless. | ||
327 | */ | ||
324 | struct buffer_page { | 328 | struct buffer_page { |
325 | struct list_head list; /* list of buffer pages */ | 329 | struct list_head list; /* list of buffer pages */ |
326 | local_t write; /* index for next write */ | 330 | local_t write; /* index for next write */ |
@@ -329,6 +333,21 @@ struct buffer_page { | |||
329 | struct buffer_data_page *page; /* Actual data page */ | 333 | struct buffer_data_page *page; /* Actual data page */ |
330 | }; | 334 | }; |
331 | 335 | ||
336 | /* | ||
337 | * The buffer page counters, write and entries, must be reset | ||
338 | * atomically when crossing page boundaries. To synchronize this | ||
339 | * update, two counters are inserted into the number. One is | ||
340 | * the actual counter for the write position or count on the page. | ||
341 | * | ||
342 | * The other is a counter of updaters. Before an update happens | ||
343 | * the update partition of the counter is incremented. This will | ||
344 | * allow the updater to update the counter atomically. | ||
345 | * | ||
346 | * The counter is 20 bits, and the state data is 12. | ||
347 | */ | ||
348 | #define RB_WRITE_MASK 0xfffff | ||
349 | #define RB_WRITE_INTCNT (1 << 20) | ||
350 | |||
332 | static void rb_init_page(struct buffer_data_page *bpage) | 351 | static void rb_init_page(struct buffer_data_page *bpage) |
333 | { | 352 | { |
334 | local_set(&bpage->commit, 0); | 353 | local_set(&bpage->commit, 0); |
@@ -402,19 +421,20 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
402 | struct ring_buffer_per_cpu { | 421 | struct ring_buffer_per_cpu { |
403 | int cpu; | 422 | int cpu; |
404 | struct ring_buffer *buffer; | 423 | struct ring_buffer *buffer; |
405 | spinlock_t reader_lock; /* serialize readers */ | 424 | spinlock_t reader_lock; /* serialize readers */ |
406 | raw_spinlock_t lock; | 425 | raw_spinlock_t lock; |
407 | struct lock_class_key lock_key; | 426 | struct lock_class_key lock_key; |
408 | struct list_head pages; | 427 | struct list_head *pages; |
409 | struct buffer_page *head_page; /* read from head */ | 428 | struct buffer_page *head_page; /* read from head */ |
410 | struct buffer_page *tail_page; /* write to tail */ | 429 | struct buffer_page *tail_page; /* write to tail */ |
411 | struct buffer_page *commit_page; /* committed pages */ | 430 | struct buffer_page *commit_page; /* committed pages */ |
412 | struct buffer_page *reader_page; | 431 | struct buffer_page *reader_page; |
413 | unsigned long nmi_dropped; | 432 | local_t commit_overrun; |
414 | unsigned long commit_overrun; | 433 | local_t overrun; |
415 | unsigned long overrun; | ||
416 | unsigned long read; | ||
417 | local_t entries; | 434 | local_t entries; |
435 | local_t committing; | ||
436 | local_t commits; | ||
437 | unsigned long read; | ||
418 | u64 write_stamp; | 438 | u64 write_stamp; |
419 | u64 read_stamp; | 439 | u64 read_stamp; |
420 | atomic_t record_disabled; | 440 | atomic_t record_disabled; |
@@ -447,14 +467,19 @@ struct ring_buffer_iter { | |||
447 | }; | 467 | }; |
448 | 468 | ||
449 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ | 469 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ |
450 | #define RB_WARN_ON(buffer, cond) \ | 470 | #define RB_WARN_ON(b, cond) \ |
451 | ({ \ | 471 | ({ \ |
452 | int _____ret = unlikely(cond); \ | 472 | int _____ret = unlikely(cond); \ |
453 | if (_____ret) { \ | 473 | if (_____ret) { \ |
454 | atomic_inc(&buffer->record_disabled); \ | 474 | if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ |
455 | WARN_ON(1); \ | 475 | struct ring_buffer_per_cpu *__b = \ |
456 | } \ | 476 | (void *)b; \ |
457 | _____ret; \ | 477 | atomic_inc(&__b->buffer->record_disabled); \ |
478 | } else \ | ||
479 | atomic_inc(&b->record_disabled); \ | ||
480 | WARN_ON(1); \ | ||
481 | } \ | ||
482 | _____ret; \ | ||
458 | }) | 483 | }) |
459 | 484 | ||
460 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 485 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
@@ -486,6 +511,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | |||
486 | } | 511 | } |
487 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | 512 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); |
488 | 513 | ||
514 | /* | ||
515 | * Making the ring buffer lockless makes things tricky. | ||
516 | * Although writes only happen on the CPU that they are on, | ||
517 | * and they only need to worry about interrupts. Reads can | ||
518 | * happen on any CPU. | ||
519 | * | ||
520 | * The reader page is always off the ring buffer, but when the | ||
521 | * reader finishes with a page, it needs to swap its page with | ||
522 | * a new one from the buffer. The reader needs to take from | ||
523 | * the head (writes go to the tail). But if a writer is in overwrite | ||
524 | * mode and wraps, it must push the head page forward. | ||
525 | * | ||
526 | * Here lies the problem. | ||
527 | * | ||
528 | * The reader must be careful to replace only the head page, and | ||
529 | * not another one. As described at the top of the file in the | ||
530 | * ASCII art, the reader sets its old page to point to the next | ||
531 | * page after head. It then sets the page after head to point to | ||
532 | * the old reader page. But if the writer moves the head page | ||
533 | * during this operation, the reader could end up with the tail. | ||
534 | * | ||
535 | * We use cmpxchg to help prevent this race. We also do something | ||
536 | * special with the page before head. We set the LSB to 1. | ||
537 | * | ||
538 | * When the writer must push the page forward, it will clear the | ||
539 | * bit that points to the head page, move the head, and then set | ||
540 | * the bit that points to the new head page. | ||
541 | * | ||
542 | * We also don't want an interrupt coming in and moving the head | ||
543 | * page on another writer. Thus we use the second LSB to catch | ||
544 | * that too. Thus: | ||
545 | * | ||
546 | * head->list->prev->next bit 1 bit 0 | ||
547 | * ------- ------- | ||
548 | * Normal page 0 0 | ||
549 | * Points to head page 0 1 | ||
550 | * New head page 1 0 | ||
551 | * | ||
552 | * Note we can not trust the prev pointer of the head page, because: | ||
553 | * | ||
554 | * +----+ +-----+ +-----+ | ||
555 | * | |------>| T |---X--->| N | | ||
556 | * | |<------| | | | | ||
557 | * +----+ +-----+ +-----+ | ||
558 | * ^ ^ | | ||
559 | * | +-----+ | | | ||
560 | * +----------| R |----------+ | | ||
561 | * | |<-----------+ | ||
562 | * +-----+ | ||
563 | * | ||
564 | * Key: ---X--> HEAD flag set in pointer | ||
565 | * T Tail page | ||
566 | * R Reader page | ||
567 | * N Next page | ||
568 | * | ||
569 | * (see __rb_reserve_next() to see where this happens) | ||
570 | * | ||
571 | * What the above shows is that the reader just swapped out | ||
572 | * the reader page with a page in the buffer, but before it | ||
573 | * could make the new header point back to the new page added | ||
574 | * it was preempted by a writer. The writer moved forward onto | ||
575 | * the new page added by the reader and is about to move forward | ||
576 | * again. | ||
577 | * | ||
578 | * You can see, it is legitimate for the previous pointer of | ||
579 | * the head (or any page) not to point back to itself. But only | ||
580 | * temporarially. | ||
581 | */ | ||
582 | |||
583 | #define RB_PAGE_NORMAL 0UL | ||
584 | #define RB_PAGE_HEAD 1UL | ||
585 | #define RB_PAGE_UPDATE 2UL | ||
586 | |||
587 | |||
588 | #define RB_FLAG_MASK 3UL | ||
589 | |||
590 | /* PAGE_MOVED is not part of the mask */ | ||
591 | #define RB_PAGE_MOVED 4UL | ||
592 | |||
593 | /* | ||
594 | * rb_list_head - remove any bit | ||
595 | */ | ||
596 | static struct list_head *rb_list_head(struct list_head *list) | ||
597 | { | ||
598 | unsigned long val = (unsigned long)list; | ||
599 | |||
600 | return (struct list_head *)(val & ~RB_FLAG_MASK); | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * rb_is_head_page - test if the give page is the head page | ||
605 | * | ||
606 | * Because the reader may move the head_page pointer, we can | ||
607 | * not trust what the head page is (it may be pointing to | ||
608 | * the reader page). But if the next page is a header page, | ||
609 | * its flags will be non zero. | ||
610 | */ | ||
611 | static int inline | ||
612 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
613 | struct buffer_page *page, struct list_head *list) | ||
614 | { | ||
615 | unsigned long val; | ||
616 | |||
617 | val = (unsigned long)list->next; | ||
618 | |||
619 | if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) | ||
620 | return RB_PAGE_MOVED; | ||
621 | |||
622 | return val & RB_FLAG_MASK; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * rb_is_reader_page | ||
627 | * | ||
628 | * The unique thing about the reader page, is that, if the | ||
629 | * writer is ever on it, the previous pointer never points | ||
630 | * back to the reader page. | ||
631 | */ | ||
632 | static int rb_is_reader_page(struct buffer_page *page) | ||
633 | { | ||
634 | struct list_head *list = page->list.prev; | ||
635 | |||
636 | return rb_list_head(list->next) != &page->list; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * rb_set_list_to_head - set a list_head to be pointing to head. | ||
641 | */ | ||
642 | static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, | ||
643 | struct list_head *list) | ||
644 | { | ||
645 | unsigned long *ptr; | ||
646 | |||
647 | ptr = (unsigned long *)&list->next; | ||
648 | *ptr |= RB_PAGE_HEAD; | ||
649 | *ptr &= ~RB_PAGE_UPDATE; | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * rb_head_page_activate - sets up head page | ||
654 | */ | ||
655 | static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) | ||
656 | { | ||
657 | struct buffer_page *head; | ||
658 | |||
659 | head = cpu_buffer->head_page; | ||
660 | if (!head) | ||
661 | return; | ||
662 | |||
663 | /* | ||
664 | * Set the previous list pointer to have the HEAD flag. | ||
665 | */ | ||
666 | rb_set_list_to_head(cpu_buffer, head->list.prev); | ||
667 | } | ||
668 | |||
669 | static void rb_list_head_clear(struct list_head *list) | ||
670 | { | ||
671 | unsigned long *ptr = (unsigned long *)&list->next; | ||
672 | |||
673 | *ptr &= ~RB_FLAG_MASK; | ||
674 | } | ||
675 | |||
676 | /* | ||
677 | * rb_head_page_dactivate - clears head page ptr (for free list) | ||
678 | */ | ||
679 | static void | ||
680 | rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) | ||
681 | { | ||
682 | struct list_head *hd; | ||
683 | |||
684 | /* Go through the whole list and clear any pointers found. */ | ||
685 | rb_list_head_clear(cpu_buffer->pages); | ||
686 | |||
687 | list_for_each(hd, cpu_buffer->pages) | ||
688 | rb_list_head_clear(hd); | ||
689 | } | ||
690 | |||
691 | static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, | ||
692 | struct buffer_page *head, | ||
693 | struct buffer_page *prev, | ||
694 | int old_flag, int new_flag) | ||
695 | { | ||
696 | struct list_head *list; | ||
697 | unsigned long val = (unsigned long)&head->list; | ||
698 | unsigned long ret; | ||
699 | |||
700 | list = &prev->list; | ||
701 | |||
702 | val &= ~RB_FLAG_MASK; | ||
703 | |||
704 | ret = (unsigned long)cmpxchg(&list->next, | ||
705 | val | old_flag, val | new_flag); | ||
706 | |||
707 | /* check if the reader took the page */ | ||
708 | if ((ret & ~RB_FLAG_MASK) != val) | ||
709 | return RB_PAGE_MOVED; | ||
710 | |||
711 | return ret & RB_FLAG_MASK; | ||
712 | } | ||
713 | |||
714 | static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, | ||
715 | struct buffer_page *head, | ||
716 | struct buffer_page *prev, | ||
717 | int old_flag) | ||
718 | { | ||
719 | return rb_head_page_set(cpu_buffer, head, prev, | ||
720 | old_flag, RB_PAGE_UPDATE); | ||
721 | } | ||
722 | |||
723 | static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, | ||
724 | struct buffer_page *head, | ||
725 | struct buffer_page *prev, | ||
726 | int old_flag) | ||
727 | { | ||
728 | return rb_head_page_set(cpu_buffer, head, prev, | ||
729 | old_flag, RB_PAGE_HEAD); | ||
730 | } | ||
731 | |||
732 | static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, | ||
733 | struct buffer_page *head, | ||
734 | struct buffer_page *prev, | ||
735 | int old_flag) | ||
736 | { | ||
737 | return rb_head_page_set(cpu_buffer, head, prev, | ||
738 | old_flag, RB_PAGE_NORMAL); | ||
739 | } | ||
740 | |||
741 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
742 | struct buffer_page **bpage) | ||
743 | { | ||
744 | struct list_head *p = rb_list_head((*bpage)->list.next); | ||
745 | |||
746 | *bpage = list_entry(p, struct buffer_page, list); | ||
747 | } | ||
748 | |||
749 | static struct buffer_page * | ||
750 | rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) | ||
751 | { | ||
752 | struct buffer_page *head; | ||
753 | struct buffer_page *page; | ||
754 | struct list_head *list; | ||
755 | int i; | ||
756 | |||
757 | if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) | ||
758 | return NULL; | ||
759 | |||
760 | /* sanity check */ | ||
761 | list = cpu_buffer->pages; | ||
762 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) | ||
763 | return NULL; | ||
764 | |||
765 | page = head = cpu_buffer->head_page; | ||
766 | /* | ||
767 | * It is possible that the writer moves the header behind | ||
768 | * where we started, and we miss in one loop. | ||
769 | * A second loop should grab the header, but we'll do | ||
770 | * three loops just because I'm paranoid. | ||
771 | */ | ||
772 | for (i = 0; i < 3; i++) { | ||
773 | do { | ||
774 | if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { | ||
775 | cpu_buffer->head_page = page; | ||
776 | return page; | ||
777 | } | ||
778 | rb_inc_page(cpu_buffer, &page); | ||
779 | } while (page != head); | ||
780 | } | ||
781 | |||
782 | RB_WARN_ON(cpu_buffer, 1); | ||
783 | |||
784 | return NULL; | ||
785 | } | ||
786 | |||
787 | static int rb_head_page_replace(struct buffer_page *old, | ||
788 | struct buffer_page *new) | ||
789 | { | ||
790 | unsigned long *ptr = (unsigned long *)&old->list.prev->next; | ||
791 | unsigned long val; | ||
792 | unsigned long ret; | ||
793 | |||
794 | val = *ptr & ~RB_FLAG_MASK; | ||
795 | val |= RB_PAGE_HEAD; | ||
796 | |||
797 | ret = cmpxchg(ptr, val, &new->list); | ||
798 | |||
799 | return ret == val; | ||
800 | } | ||
801 | |||
802 | /* | ||
803 | * rb_tail_page_update - move the tail page forward | ||
804 | * | ||
805 | * Returns 1 if moved tail page, 0 if someone else did. | ||
806 | */ | ||
807 | static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, | ||
808 | struct buffer_page *tail_page, | ||
809 | struct buffer_page *next_page) | ||
810 | { | ||
811 | struct buffer_page *old_tail; | ||
812 | unsigned long old_entries; | ||
813 | unsigned long old_write; | ||
814 | int ret = 0; | ||
815 | |||
816 | /* | ||
817 | * The tail page now needs to be moved forward. | ||
818 | * | ||
819 | * We need to reset the tail page, but without messing | ||
820 | * with possible erasing of data brought in by interrupts | ||
821 | * that have moved the tail page and are currently on it. | ||
822 | * | ||
823 | * We add a counter to the write field to denote this. | ||
824 | */ | ||
825 | old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); | ||
826 | old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); | ||
827 | |||
828 | /* | ||
829 | * Just make sure we have seen our old_write and synchronize | ||
830 | * with any interrupts that come in. | ||
831 | */ | ||
832 | barrier(); | ||
833 | |||
834 | /* | ||
835 | * If the tail page is still the same as what we think | ||
836 | * it is, then it is up to us to update the tail | ||
837 | * pointer. | ||
838 | */ | ||
839 | if (tail_page == cpu_buffer->tail_page) { | ||
840 | /* Zero the write counter */ | ||
841 | unsigned long val = old_write & ~RB_WRITE_MASK; | ||
842 | unsigned long eval = old_entries & ~RB_WRITE_MASK; | ||
843 | |||
844 | /* | ||
845 | * This will only succeed if an interrupt did | ||
846 | * not come in and change it. In which case, we | ||
847 | * do not want to modify it. | ||
848 | * | ||
849 | * We add (void) to let the compiler know that we do not care | ||
850 | * about the return value of these functions. We use the | ||
851 | * cmpxchg to only update if an interrupt did not already | ||
852 | * do it for us. If the cmpxchg fails, we don't care. | ||
853 | */ | ||
854 | (void)local_cmpxchg(&next_page->write, old_write, val); | ||
855 | (void)local_cmpxchg(&next_page->entries, old_entries, eval); | ||
856 | |||
857 | /* | ||
858 | * No need to worry about races with clearing out the commit. | ||
859 | * it only can increment when a commit takes place. But that | ||
860 | * only happens in the outer most nested commit. | ||
861 | */ | ||
862 | local_set(&next_page->page->commit, 0); | ||
863 | |||
864 | old_tail = cmpxchg(&cpu_buffer->tail_page, | ||
865 | tail_page, next_page); | ||
866 | |||
867 | if (old_tail == tail_page) | ||
868 | ret = 1; | ||
869 | } | ||
870 | |||
871 | return ret; | ||
872 | } | ||
873 | |||
874 | static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, | ||
875 | struct buffer_page *bpage) | ||
876 | { | ||
877 | unsigned long val = (unsigned long)bpage; | ||
878 | |||
879 | if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK)) | ||
880 | return 1; | ||
881 | |||
882 | return 0; | ||
883 | } | ||
884 | |||
885 | /** | ||
886 | * rb_check_list - make sure a pointer to a list has the last bits zero | ||
887 | */ | ||
888 | static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, | ||
889 | struct list_head *list) | ||
890 | { | ||
891 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev)) | ||
892 | return 1; | ||
893 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next)) | ||
894 | return 1; | ||
895 | return 0; | ||
896 | } | ||
897 | |||
489 | /** | 898 | /** |
490 | * check_pages - integrity check of buffer pages | 899 | * check_pages - integrity check of buffer pages |
491 | * @cpu_buffer: CPU buffer with pages to test | 900 | * @cpu_buffer: CPU buffer with pages to test |
@@ -495,14 +904,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | |||
495 | */ | 904 | */ |
496 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 905 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) |
497 | { | 906 | { |
498 | struct list_head *head = &cpu_buffer->pages; | 907 | struct list_head *head = cpu_buffer->pages; |
499 | struct buffer_page *bpage, *tmp; | 908 | struct buffer_page *bpage, *tmp; |
500 | 909 | ||
910 | rb_head_page_deactivate(cpu_buffer); | ||
911 | |||
501 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) | 912 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) |
502 | return -1; | 913 | return -1; |
503 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) | 914 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) |
504 | return -1; | 915 | return -1; |
505 | 916 | ||
917 | if (rb_check_list(cpu_buffer, head)) | ||
918 | return -1; | ||
919 | |||
506 | list_for_each_entry_safe(bpage, tmp, head, list) { | 920 | list_for_each_entry_safe(bpage, tmp, head, list) { |
507 | if (RB_WARN_ON(cpu_buffer, | 921 | if (RB_WARN_ON(cpu_buffer, |
508 | bpage->list.next->prev != &bpage->list)) | 922 | bpage->list.next->prev != &bpage->list)) |
@@ -510,25 +924,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | |||
510 | if (RB_WARN_ON(cpu_buffer, | 924 | if (RB_WARN_ON(cpu_buffer, |
511 | bpage->list.prev->next != &bpage->list)) | 925 | bpage->list.prev->next != &bpage->list)) |
512 | return -1; | 926 | return -1; |
927 | if (rb_check_list(cpu_buffer, &bpage->list)) | ||
928 | return -1; | ||
513 | } | 929 | } |
514 | 930 | ||
931 | rb_head_page_activate(cpu_buffer); | ||
932 | |||
515 | return 0; | 933 | return 0; |
516 | } | 934 | } |
517 | 935 | ||
518 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | 936 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, |
519 | unsigned nr_pages) | 937 | unsigned nr_pages) |
520 | { | 938 | { |
521 | struct list_head *head = &cpu_buffer->pages; | ||
522 | struct buffer_page *bpage, *tmp; | 939 | struct buffer_page *bpage, *tmp; |
523 | unsigned long addr; | 940 | unsigned long addr; |
524 | LIST_HEAD(pages); | 941 | LIST_HEAD(pages); |
525 | unsigned i; | 942 | unsigned i; |
526 | 943 | ||
944 | WARN_ON(!nr_pages); | ||
945 | |||
527 | for (i = 0; i < nr_pages; i++) { | 946 | for (i = 0; i < nr_pages; i++) { |
528 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 947 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
529 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 948 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); |
530 | if (!bpage) | 949 | if (!bpage) |
531 | goto free_pages; | 950 | goto free_pages; |
951 | |||
952 | rb_check_bpage(cpu_buffer, bpage); | ||
953 | |||
532 | list_add(&bpage->list, &pages); | 954 | list_add(&bpage->list, &pages); |
533 | 955 | ||
534 | addr = __get_free_page(GFP_KERNEL); | 956 | addr = __get_free_page(GFP_KERNEL); |
@@ -538,7 +960,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
538 | rb_init_page(bpage->page); | 960 | rb_init_page(bpage->page); |
539 | } | 961 | } |
540 | 962 | ||
541 | list_splice(&pages, head); | 963 | /* |
964 | * The ring buffer page list is a circular list that does not | ||
965 | * start and end with a list head. All page list items point to | ||
966 | * other pages. | ||
967 | */ | ||
968 | cpu_buffer->pages = pages.next; | ||
969 | list_del(&pages); | ||
542 | 970 | ||
543 | rb_check_pages(cpu_buffer); | 971 | rb_check_pages(cpu_buffer); |
544 | 972 | ||
@@ -570,13 +998,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
570 | spin_lock_init(&cpu_buffer->reader_lock); | 998 | spin_lock_init(&cpu_buffer->reader_lock); |
571 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | 999 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); |
572 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 1000 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; |
573 | INIT_LIST_HEAD(&cpu_buffer->pages); | ||
574 | 1001 | ||
575 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1002 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
576 | GFP_KERNEL, cpu_to_node(cpu)); | 1003 | GFP_KERNEL, cpu_to_node(cpu)); |
577 | if (!bpage) | 1004 | if (!bpage) |
578 | goto fail_free_buffer; | 1005 | goto fail_free_buffer; |
579 | 1006 | ||
1007 | rb_check_bpage(cpu_buffer, bpage); | ||
1008 | |||
580 | cpu_buffer->reader_page = bpage; | 1009 | cpu_buffer->reader_page = bpage; |
581 | addr = __get_free_page(GFP_KERNEL); | 1010 | addr = __get_free_page(GFP_KERNEL); |
582 | if (!addr) | 1011 | if (!addr) |
@@ -591,9 +1020,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
591 | goto fail_free_reader; | 1020 | goto fail_free_reader; |
592 | 1021 | ||
593 | cpu_buffer->head_page | 1022 | cpu_buffer->head_page |
594 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 1023 | = list_entry(cpu_buffer->pages, struct buffer_page, list); |
595 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; | 1024 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; |
596 | 1025 | ||
1026 | rb_head_page_activate(cpu_buffer); | ||
1027 | |||
597 | return cpu_buffer; | 1028 | return cpu_buffer; |
598 | 1029 | ||
599 | fail_free_reader: | 1030 | fail_free_reader: |
@@ -606,24 +1037,25 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
606 | 1037 | ||
607 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | 1038 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) |
608 | { | 1039 | { |
609 | struct list_head *head = &cpu_buffer->pages; | 1040 | struct list_head *head = cpu_buffer->pages; |
610 | struct buffer_page *bpage, *tmp; | 1041 | struct buffer_page *bpage, *tmp; |
611 | 1042 | ||
612 | free_buffer_page(cpu_buffer->reader_page); | 1043 | free_buffer_page(cpu_buffer->reader_page); |
613 | 1044 | ||
614 | list_for_each_entry_safe(bpage, tmp, head, list) { | 1045 | rb_head_page_deactivate(cpu_buffer); |
615 | list_del_init(&bpage->list); | 1046 | |
1047 | if (head) { | ||
1048 | list_for_each_entry_safe(bpage, tmp, head, list) { | ||
1049 | list_del_init(&bpage->list); | ||
1050 | free_buffer_page(bpage); | ||
1051 | } | ||
1052 | bpage = list_entry(head, struct buffer_page, list); | ||
616 | free_buffer_page(bpage); | 1053 | free_buffer_page(bpage); |
617 | } | 1054 | } |
1055 | |||
618 | kfree(cpu_buffer); | 1056 | kfree(cpu_buffer); |
619 | } | 1057 | } |
620 | 1058 | ||
621 | /* | ||
622 | * Causes compile errors if the struct buffer_page gets bigger | ||
623 | * than the struct page. | ||
624 | */ | ||
625 | extern int ring_buffer_page_too_big(void); | ||
626 | |||
627 | #ifdef CONFIG_HOTPLUG_CPU | 1059 | #ifdef CONFIG_HOTPLUG_CPU |
628 | static int rb_cpu_notify(struct notifier_block *self, | 1060 | static int rb_cpu_notify(struct notifier_block *self, |
629 | unsigned long action, void *hcpu); | 1061 | unsigned long action, void *hcpu); |
@@ -646,11 +1078,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
646 | int bsize; | 1078 | int bsize; |
647 | int cpu; | 1079 | int cpu; |
648 | 1080 | ||
649 | /* Paranoid! Optimizes out when all is well */ | ||
650 | if (sizeof(struct buffer_page) > sizeof(struct page)) | ||
651 | ring_buffer_page_too_big(); | ||
652 | |||
653 | |||
654 | /* keep it in its own cache line */ | 1081 | /* keep it in its own cache line */ |
655 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), | 1082 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), |
656 | GFP_KERNEL); | 1083 | GFP_KERNEL); |
@@ -666,8 +1093,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
666 | buffer->reader_lock_key = key; | 1093 | buffer->reader_lock_key = key; |
667 | 1094 | ||
668 | /* need at least two pages */ | 1095 | /* need at least two pages */ |
669 | if (buffer->pages == 1) | 1096 | if (buffer->pages < 2) |
670 | buffer->pages++; | 1097 | buffer->pages = 2; |
671 | 1098 | ||
672 | /* | 1099 | /* |
673 | * In case of non-hotplug cpu, if the ring-buffer is allocated | 1100 | * In case of non-hotplug cpu, if the ring-buffer is allocated |
@@ -743,6 +1170,7 @@ ring_buffer_free(struct ring_buffer *buffer) | |||
743 | 1170 | ||
744 | put_online_cpus(); | 1171 | put_online_cpus(); |
745 | 1172 | ||
1173 | kfree(buffer->buffers); | ||
746 | free_cpumask_var(buffer->cpumask); | 1174 | free_cpumask_var(buffer->cpumask); |
747 | 1175 | ||
748 | kfree(buffer); | 1176 | kfree(buffer); |
@@ -767,15 +1195,17 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
767 | atomic_inc(&cpu_buffer->record_disabled); | 1195 | atomic_inc(&cpu_buffer->record_disabled); |
768 | synchronize_sched(); | 1196 | synchronize_sched(); |
769 | 1197 | ||
1198 | rb_head_page_deactivate(cpu_buffer); | ||
1199 | |||
770 | for (i = 0; i < nr_pages; i++) { | 1200 | for (i = 0; i < nr_pages; i++) { |
771 | if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) | 1201 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
772 | return; | 1202 | return; |
773 | p = cpu_buffer->pages.next; | 1203 | p = cpu_buffer->pages->next; |
774 | bpage = list_entry(p, struct buffer_page, list); | 1204 | bpage = list_entry(p, struct buffer_page, list); |
775 | list_del_init(&bpage->list); | 1205 | list_del_init(&bpage->list); |
776 | free_buffer_page(bpage); | 1206 | free_buffer_page(bpage); |
777 | } | 1207 | } |
778 | if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) | 1208 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
779 | return; | 1209 | return; |
780 | 1210 | ||
781 | rb_reset_cpu(cpu_buffer); | 1211 | rb_reset_cpu(cpu_buffer); |
@@ -797,15 +1227,19 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
797 | atomic_inc(&cpu_buffer->record_disabled); | 1227 | atomic_inc(&cpu_buffer->record_disabled); |
798 | synchronize_sched(); | 1228 | synchronize_sched(); |
799 | 1229 | ||
1230 | spin_lock_irq(&cpu_buffer->reader_lock); | ||
1231 | rb_head_page_deactivate(cpu_buffer); | ||
1232 | |||
800 | for (i = 0; i < nr_pages; i++) { | 1233 | for (i = 0; i < nr_pages; i++) { |
801 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 1234 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) |
802 | return; | 1235 | return; |
803 | p = pages->next; | 1236 | p = pages->next; |
804 | bpage = list_entry(p, struct buffer_page, list); | 1237 | bpage = list_entry(p, struct buffer_page, list); |
805 | list_del_init(&bpage->list); | 1238 | list_del_init(&bpage->list); |
806 | list_add_tail(&bpage->list, &cpu_buffer->pages); | 1239 | list_add_tail(&bpage->list, cpu_buffer->pages); |
807 | } | 1240 | } |
808 | rb_reset_cpu(cpu_buffer); | 1241 | rb_reset_cpu(cpu_buffer); |
1242 | spin_unlock_irq(&cpu_buffer->reader_lock); | ||
809 | 1243 | ||
810 | rb_check_pages(cpu_buffer); | 1244 | rb_check_pages(cpu_buffer); |
811 | 1245 | ||
@@ -956,21 +1390,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) | |||
956 | } | 1390 | } |
957 | 1391 | ||
958 | static inline struct ring_buffer_event * | 1392 | static inline struct ring_buffer_event * |
959 | rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) | ||
960 | { | ||
961 | return __rb_page_index(cpu_buffer->head_page, | ||
962 | cpu_buffer->head_page->read); | ||
963 | } | ||
964 | |||
965 | static inline struct ring_buffer_event * | ||
966 | rb_iter_head_event(struct ring_buffer_iter *iter) | 1393 | rb_iter_head_event(struct ring_buffer_iter *iter) |
967 | { | 1394 | { |
968 | return __rb_page_index(iter->head_page, iter->head); | 1395 | return __rb_page_index(iter->head_page, iter->head); |
969 | } | 1396 | } |
970 | 1397 | ||
971 | static inline unsigned rb_page_write(struct buffer_page *bpage) | 1398 | static inline unsigned long rb_page_write(struct buffer_page *bpage) |
972 | { | 1399 | { |
973 | return local_read(&bpage->write); | 1400 | return local_read(&bpage->write) & RB_WRITE_MASK; |
974 | } | 1401 | } |
975 | 1402 | ||
976 | static inline unsigned rb_page_commit(struct buffer_page *bpage) | 1403 | static inline unsigned rb_page_commit(struct buffer_page *bpage) |
@@ -978,6 +1405,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage) | |||
978 | return local_read(&bpage->page->commit); | 1405 | return local_read(&bpage->page->commit); |
979 | } | 1406 | } |
980 | 1407 | ||
1408 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) | ||
1409 | { | ||
1410 | return local_read(&bpage->entries) & RB_WRITE_MASK; | ||
1411 | } | ||
1412 | |||
981 | /* Size is determined by what has been commited */ | 1413 | /* Size is determined by what has been commited */ |
982 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 1414 | static inline unsigned rb_page_size(struct buffer_page *bpage) |
983 | { | 1415 | { |
@@ -990,33 +1422,17 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) | |||
990 | return rb_page_commit(cpu_buffer->commit_page); | 1422 | return rb_page_commit(cpu_buffer->commit_page); |
991 | } | 1423 | } |
992 | 1424 | ||
993 | static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) | ||
994 | { | ||
995 | return rb_page_commit(cpu_buffer->head_page); | ||
996 | } | ||
997 | |||
998 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
999 | struct buffer_page **bpage) | ||
1000 | { | ||
1001 | struct list_head *p = (*bpage)->list.next; | ||
1002 | |||
1003 | if (p == &cpu_buffer->pages) | ||
1004 | p = p->next; | ||
1005 | |||
1006 | *bpage = list_entry(p, struct buffer_page, list); | ||
1007 | } | ||
1008 | |||
1009 | static inline unsigned | 1425 | static inline unsigned |
1010 | rb_event_index(struct ring_buffer_event *event) | 1426 | rb_event_index(struct ring_buffer_event *event) |
1011 | { | 1427 | { |
1012 | unsigned long addr = (unsigned long)event; | 1428 | unsigned long addr = (unsigned long)event; |
1013 | 1429 | ||
1014 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 1430 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; |
1015 | } | 1431 | } |
1016 | 1432 | ||
1017 | static inline int | 1433 | static inline int |
1018 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1434 | rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1019 | struct ring_buffer_event *event) | 1435 | struct ring_buffer_event *event) |
1020 | { | 1436 | { |
1021 | unsigned long addr = (unsigned long)event; | 1437 | unsigned long addr = (unsigned long)event; |
1022 | unsigned long index; | 1438 | unsigned long index; |
@@ -1029,33 +1445,10 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1029 | } | 1445 | } |
1030 | 1446 | ||
1031 | static void | 1447 | static void |
1032 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | ||
1033 | struct ring_buffer_event *event) | ||
1034 | { | ||
1035 | unsigned long addr = (unsigned long)event; | ||
1036 | unsigned long index; | ||
1037 | |||
1038 | index = rb_event_index(event); | ||
1039 | addr &= PAGE_MASK; | ||
1040 | |||
1041 | while (cpu_buffer->commit_page->page != (void *)addr) { | ||
1042 | if (RB_WARN_ON(cpu_buffer, | ||
1043 | cpu_buffer->commit_page == cpu_buffer->tail_page)) | ||
1044 | return; | ||
1045 | cpu_buffer->commit_page->page->commit = | ||
1046 | cpu_buffer->commit_page->write; | ||
1047 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | ||
1048 | cpu_buffer->write_stamp = | ||
1049 | cpu_buffer->commit_page->page->time_stamp; | ||
1050 | } | ||
1051 | |||
1052 | /* Now set the commit to the event's index */ | ||
1053 | local_set(&cpu_buffer->commit_page->page->commit, index); | ||
1054 | } | ||
1055 | |||
1056 | static void | ||
1057 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1448 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
1058 | { | 1449 | { |
1450 | unsigned long max_count; | ||
1451 | |||
1059 | /* | 1452 | /* |
1060 | * We only race with interrupts and NMIs on this CPU. | 1453 | * We only race with interrupts and NMIs on this CPU. |
1061 | * If we own the commit event, then we can commit | 1454 | * If we own the commit event, then we can commit |
@@ -1065,9 +1458,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | |||
1065 | * assign the commit to the tail. | 1458 | * assign the commit to the tail. |
1066 | */ | 1459 | */ |
1067 | again: | 1460 | again: |
1461 | max_count = cpu_buffer->buffer->pages * 100; | ||
1462 | |||
1068 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { | 1463 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { |
1069 | cpu_buffer->commit_page->page->commit = | 1464 | if (RB_WARN_ON(cpu_buffer, !(--max_count))) |
1070 | cpu_buffer->commit_page->write; | 1465 | return; |
1466 | if (RB_WARN_ON(cpu_buffer, | ||
1467 | rb_is_reader_page(cpu_buffer->tail_page))) | ||
1468 | return; | ||
1469 | local_set(&cpu_buffer->commit_page->page->commit, | ||
1470 | rb_page_write(cpu_buffer->commit_page)); | ||
1071 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | 1471 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); |
1072 | cpu_buffer->write_stamp = | 1472 | cpu_buffer->write_stamp = |
1073 | cpu_buffer->commit_page->page->time_stamp; | 1473 | cpu_buffer->commit_page->page->time_stamp; |
@@ -1076,8 +1476,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | |||
1076 | } | 1476 | } |
1077 | while (rb_commit_index(cpu_buffer) != | 1477 | while (rb_commit_index(cpu_buffer) != |
1078 | rb_page_write(cpu_buffer->commit_page)) { | 1478 | rb_page_write(cpu_buffer->commit_page)) { |
1079 | cpu_buffer->commit_page->page->commit = | 1479 | |
1080 | cpu_buffer->commit_page->write; | 1480 | local_set(&cpu_buffer->commit_page->page->commit, |
1481 | rb_page_write(cpu_buffer->commit_page)); | ||
1482 | RB_WARN_ON(cpu_buffer, | ||
1483 | local_read(&cpu_buffer->commit_page->page->commit) & | ||
1484 | ~RB_WRITE_MASK); | ||
1081 | barrier(); | 1485 | barrier(); |
1082 | } | 1486 | } |
1083 | 1487 | ||
@@ -1110,7 +1514,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1110 | * to the head page instead of next. | 1514 | * to the head page instead of next. |
1111 | */ | 1515 | */ |
1112 | if (iter->head_page == cpu_buffer->reader_page) | 1516 | if (iter->head_page == cpu_buffer->reader_page) |
1113 | iter->head_page = cpu_buffer->head_page; | 1517 | iter->head_page = rb_set_head_page(cpu_buffer); |
1114 | else | 1518 | else |
1115 | rb_inc_page(cpu_buffer, &iter->head_page); | 1519 | rb_inc_page(cpu_buffer, &iter->head_page); |
1116 | 1520 | ||
@@ -1154,6 +1558,163 @@ rb_update_event(struct ring_buffer_event *event, | |||
1154 | } | 1558 | } |
1155 | } | 1559 | } |
1156 | 1560 | ||
1561 | /* | ||
1562 | * rb_handle_head_page - writer hit the head page | ||
1563 | * | ||
1564 | * Returns: +1 to retry page | ||
1565 | * 0 to continue | ||
1566 | * -1 on error | ||
1567 | */ | ||
1568 | static int | ||
1569 | rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
1570 | struct buffer_page *tail_page, | ||
1571 | struct buffer_page *next_page) | ||
1572 | { | ||
1573 | struct buffer_page *new_head; | ||
1574 | int entries; | ||
1575 | int type; | ||
1576 | int ret; | ||
1577 | |||
1578 | entries = rb_page_entries(next_page); | ||
1579 | |||
1580 | /* | ||
1581 | * The hard part is here. We need to move the head | ||
1582 | * forward, and protect against both readers on | ||
1583 | * other CPUs and writers coming in via interrupts. | ||
1584 | */ | ||
1585 | type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, | ||
1586 | RB_PAGE_HEAD); | ||
1587 | |||
1588 | /* | ||
1589 | * type can be one of four: | ||
1590 | * NORMAL - an interrupt already moved it for us | ||
1591 | * HEAD - we are the first to get here. | ||
1592 | * UPDATE - we are the interrupt interrupting | ||
1593 | * a current move. | ||
1594 | * MOVED - a reader on another CPU moved the next | ||
1595 | * pointer to its reader page. Give up | ||
1596 | * and try again. | ||
1597 | */ | ||
1598 | |||
1599 | switch (type) { | ||
1600 | case RB_PAGE_HEAD: | ||
1601 | /* | ||
1602 | * We changed the head to UPDATE, thus | ||
1603 | * it is our responsibility to update | ||
1604 | * the counters. | ||
1605 | */ | ||
1606 | local_add(entries, &cpu_buffer->overrun); | ||
1607 | |||
1608 | /* | ||
1609 | * The entries will be zeroed out when we move the | ||
1610 | * tail page. | ||
1611 | */ | ||
1612 | |||
1613 | /* still more to do */ | ||
1614 | break; | ||
1615 | |||
1616 | case RB_PAGE_UPDATE: | ||
1617 | /* | ||
1618 | * This is an interrupt that interrupt the | ||
1619 | * previous update. Still more to do. | ||
1620 | */ | ||
1621 | break; | ||
1622 | case RB_PAGE_NORMAL: | ||
1623 | /* | ||
1624 | * An interrupt came in before the update | ||
1625 | * and processed this for us. | ||
1626 | * Nothing left to do. | ||
1627 | */ | ||
1628 | return 1; | ||
1629 | case RB_PAGE_MOVED: | ||
1630 | /* | ||
1631 | * The reader is on another CPU and just did | ||
1632 | * a swap with our next_page. | ||
1633 | * Try again. | ||
1634 | */ | ||
1635 | return 1; | ||
1636 | default: | ||
1637 | RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ | ||
1638 | return -1; | ||
1639 | } | ||
1640 | |||
1641 | /* | ||
1642 | * Now that we are here, the old head pointer is | ||
1643 | * set to UPDATE. This will keep the reader from | ||
1644 | * swapping the head page with the reader page. | ||
1645 | * The reader (on another CPU) will spin till | ||
1646 | * we are finished. | ||
1647 | * | ||
1648 | * We just need to protect against interrupts | ||
1649 | * doing the job. We will set the next pointer | ||
1650 | * to HEAD. After that, we set the old pointer | ||
1651 | * to NORMAL, but only if it was HEAD before. | ||
1652 | * otherwise we are an interrupt, and only | ||
1653 | * want the outer most commit to reset it. | ||
1654 | */ | ||
1655 | new_head = next_page; | ||
1656 | rb_inc_page(cpu_buffer, &new_head); | ||
1657 | |||
1658 | ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, | ||
1659 | RB_PAGE_NORMAL); | ||
1660 | |||
1661 | /* | ||
1662 | * Valid returns are: | ||
1663 | * HEAD - an interrupt came in and already set it. | ||
1664 | * NORMAL - One of two things: | ||
1665 | * 1) We really set it. | ||
1666 | * 2) A bunch of interrupts came in and moved | ||
1667 | * the page forward again. | ||
1668 | */ | ||
1669 | switch (ret) { | ||
1670 | case RB_PAGE_HEAD: | ||
1671 | case RB_PAGE_NORMAL: | ||
1672 | /* OK */ | ||
1673 | break; | ||
1674 | default: | ||
1675 | RB_WARN_ON(cpu_buffer, 1); | ||
1676 | return -1; | ||
1677 | } | ||
1678 | |||
1679 | /* | ||
1680 | * It is possible that an interrupt came in, | ||
1681 | * set the head up, then more interrupts came in | ||
1682 | * and moved it again. When we get back here, | ||
1683 | * the page would have been set to NORMAL but we | ||
1684 | * just set it back to HEAD. | ||
1685 | * | ||
1686 | * How do you detect this? Well, if that happened | ||
1687 | * the tail page would have moved. | ||
1688 | */ | ||
1689 | if (ret == RB_PAGE_NORMAL) { | ||
1690 | /* | ||
1691 | * If the tail had moved passed next, then we need | ||
1692 | * to reset the pointer. | ||
1693 | */ | ||
1694 | if (cpu_buffer->tail_page != tail_page && | ||
1695 | cpu_buffer->tail_page != next_page) | ||
1696 | rb_head_page_set_normal(cpu_buffer, new_head, | ||
1697 | next_page, | ||
1698 | RB_PAGE_HEAD); | ||
1699 | } | ||
1700 | |||
1701 | /* | ||
1702 | * If this was the outer most commit (the one that | ||
1703 | * changed the original pointer from HEAD to UPDATE), | ||
1704 | * then it is up to us to reset it to NORMAL. | ||
1705 | */ | ||
1706 | if (type == RB_PAGE_HEAD) { | ||
1707 | ret = rb_head_page_set_normal(cpu_buffer, next_page, | ||
1708 | tail_page, | ||
1709 | RB_PAGE_UPDATE); | ||
1710 | if (RB_WARN_ON(cpu_buffer, | ||
1711 | ret != RB_PAGE_UPDATE)) | ||
1712 | return -1; | ||
1713 | } | ||
1714 | |||
1715 | return 0; | ||
1716 | } | ||
1717 | |||
1157 | static unsigned rb_calculate_event_length(unsigned length) | 1718 | static unsigned rb_calculate_event_length(unsigned length) |
1158 | { | 1719 | { |
1159 | struct ring_buffer_event event; /* Used only for sizeof array */ | 1720 | struct ring_buffer_event event; /* Used only for sizeof array */ |
@@ -1171,6 +1732,57 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1171 | return length; | 1732 | return length; |
1172 | } | 1733 | } |
1173 | 1734 | ||
1735 | static inline void | ||
1736 | rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | ||
1737 | struct buffer_page *tail_page, | ||
1738 | unsigned long tail, unsigned long length) | ||
1739 | { | ||
1740 | struct ring_buffer_event *event; | ||
1741 | |||
1742 | /* | ||
1743 | * Only the event that crossed the page boundary | ||
1744 | * must fill the old tail_page with padding. | ||
1745 | */ | ||
1746 | if (tail >= BUF_PAGE_SIZE) { | ||
1747 | local_sub(length, &tail_page->write); | ||
1748 | return; | ||
1749 | } | ||
1750 | |||
1751 | event = __rb_page_index(tail_page, tail); | ||
1752 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1753 | |||
1754 | /* | ||
1755 | * If this event is bigger than the minimum size, then | ||
1756 | * we need to be careful that we don't subtract the | ||
1757 | * write counter enough to allow another writer to slip | ||
1758 | * in on this page. | ||
1759 | * We put in a discarded commit instead, to make sure | ||
1760 | * that this space is not used again. | ||
1761 | * | ||
1762 | * If we are less than the minimum size, we don't need to | ||
1763 | * worry about it. | ||
1764 | */ | ||
1765 | if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { | ||
1766 | /* No room for any events */ | ||
1767 | |||
1768 | /* Mark the rest of the page with padding */ | ||
1769 | rb_event_set_padding(event); | ||
1770 | |||
1771 | /* Set the write back to the previous setting */ | ||
1772 | local_sub(length, &tail_page->write); | ||
1773 | return; | ||
1774 | } | ||
1775 | |||
1776 | /* Put in a discarded event */ | ||
1777 | event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; | ||
1778 | event->type_len = RINGBUF_TYPE_PADDING; | ||
1779 | /* time delta must be non zero */ | ||
1780 | event->time_delta = 1; | ||
1781 | |||
1782 | /* Set write to end of buffer */ | ||
1783 | length = (tail + length) - BUF_PAGE_SIZE; | ||
1784 | local_sub(length, &tail_page->write); | ||
1785 | } | ||
1174 | 1786 | ||
1175 | static struct ring_buffer_event * | 1787 | static struct ring_buffer_event * |
1176 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1788 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -1178,128 +1790,101 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1178 | struct buffer_page *commit_page, | 1790 | struct buffer_page *commit_page, |
1179 | struct buffer_page *tail_page, u64 *ts) | 1791 | struct buffer_page *tail_page, u64 *ts) |
1180 | { | 1792 | { |
1181 | struct buffer_page *next_page, *head_page, *reader_page; | ||
1182 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1793 | struct ring_buffer *buffer = cpu_buffer->buffer; |
1183 | struct ring_buffer_event *event; | 1794 | struct buffer_page *next_page; |
1184 | bool lock_taken = false; | 1795 | int ret; |
1185 | unsigned long flags; | ||
1186 | 1796 | ||
1187 | next_page = tail_page; | 1797 | next_page = tail_page; |
1188 | 1798 | ||
1189 | local_irq_save(flags); | ||
1190 | /* | ||
1191 | * Since the write to the buffer is still not | ||
1192 | * fully lockless, we must be careful with NMIs. | ||
1193 | * The locks in the writers are taken when a write | ||
1194 | * crosses to a new page. The locks protect against | ||
1195 | * races with the readers (this will soon be fixed | ||
1196 | * with a lockless solution). | ||
1197 | * | ||
1198 | * Because we can not protect against NMIs, and we | ||
1199 | * want to keep traces reentrant, we need to manage | ||
1200 | * what happens when we are in an NMI. | ||
1201 | * | ||
1202 | * NMIs can happen after we take the lock. | ||
1203 | * If we are in an NMI, only take the lock | ||
1204 | * if it is not already taken. Otherwise | ||
1205 | * simply fail. | ||
1206 | */ | ||
1207 | if (unlikely(in_nmi())) { | ||
1208 | if (!__raw_spin_trylock(&cpu_buffer->lock)) { | ||
1209 | cpu_buffer->nmi_dropped++; | ||
1210 | goto out_reset; | ||
1211 | } | ||
1212 | } else | ||
1213 | __raw_spin_lock(&cpu_buffer->lock); | ||
1214 | |||
1215 | lock_taken = true; | ||
1216 | |||
1217 | rb_inc_page(cpu_buffer, &next_page); | 1799 | rb_inc_page(cpu_buffer, &next_page); |
1218 | 1800 | ||
1219 | head_page = cpu_buffer->head_page; | ||
1220 | reader_page = cpu_buffer->reader_page; | ||
1221 | |||
1222 | /* we grabbed the lock before incrementing */ | ||
1223 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | ||
1224 | goto out_reset; | ||
1225 | |||
1226 | /* | 1801 | /* |
1227 | * If for some reason, we had an interrupt storm that made | 1802 | * If for some reason, we had an interrupt storm that made |
1228 | * it all the way around the buffer, bail, and warn | 1803 | * it all the way around the buffer, bail, and warn |
1229 | * about it. | 1804 | * about it. |
1230 | */ | 1805 | */ |
1231 | if (unlikely(next_page == commit_page)) { | 1806 | if (unlikely(next_page == commit_page)) { |
1232 | cpu_buffer->commit_overrun++; | 1807 | local_inc(&cpu_buffer->commit_overrun); |
1233 | goto out_reset; | 1808 | goto out_reset; |
1234 | } | 1809 | } |
1235 | 1810 | ||
1236 | if (next_page == head_page) { | ||
1237 | if (!(buffer->flags & RB_FL_OVERWRITE)) | ||
1238 | goto out_reset; | ||
1239 | |||
1240 | /* tail_page has not moved yet? */ | ||
1241 | if (tail_page == cpu_buffer->tail_page) { | ||
1242 | /* count overflows */ | ||
1243 | cpu_buffer->overrun += | ||
1244 | local_read(&head_page->entries); | ||
1245 | |||
1246 | rb_inc_page(cpu_buffer, &head_page); | ||
1247 | cpu_buffer->head_page = head_page; | ||
1248 | cpu_buffer->head_page->read = 0; | ||
1249 | } | ||
1250 | } | ||
1251 | |||
1252 | /* | 1811 | /* |
1253 | * If the tail page is still the same as what we think | 1812 | * This is where the fun begins! |
1254 | * it is, then it is up to us to update the tail | 1813 | * |
1255 | * pointer. | 1814 | * We are fighting against races between a reader that |
1815 | * could be on another CPU trying to swap its reader | ||
1816 | * page with the buffer head. | ||
1817 | * | ||
1818 | * We are also fighting against interrupts coming in and | ||
1819 | * moving the head or tail on us as well. | ||
1820 | * | ||
1821 | * If the next page is the head page then we have filled | ||
1822 | * the buffer, unless the commit page is still on the | ||
1823 | * reader page. | ||
1256 | */ | 1824 | */ |
1257 | if (tail_page == cpu_buffer->tail_page) { | 1825 | if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { |
1258 | local_set(&next_page->write, 0); | ||
1259 | local_set(&next_page->entries, 0); | ||
1260 | local_set(&next_page->page->commit, 0); | ||
1261 | cpu_buffer->tail_page = next_page; | ||
1262 | 1826 | ||
1263 | /* reread the time stamp */ | 1827 | /* |
1264 | *ts = rb_time_stamp(buffer, cpu_buffer->cpu); | 1828 | * If the commit is not on the reader page, then |
1265 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1829 | * move the header page. |
1830 | */ | ||
1831 | if (!rb_is_reader_page(cpu_buffer->commit_page)) { | ||
1832 | /* | ||
1833 | * If we are not in overwrite mode, | ||
1834 | * this is easy, just stop here. | ||
1835 | */ | ||
1836 | if (!(buffer->flags & RB_FL_OVERWRITE)) | ||
1837 | goto out_reset; | ||
1838 | |||
1839 | ret = rb_handle_head_page(cpu_buffer, | ||
1840 | tail_page, | ||
1841 | next_page); | ||
1842 | if (ret < 0) | ||
1843 | goto out_reset; | ||
1844 | if (ret) | ||
1845 | goto out_again; | ||
1846 | } else { | ||
1847 | /* | ||
1848 | * We need to be careful here too. The | ||
1849 | * commit page could still be on the reader | ||
1850 | * page. We could have a small buffer, and | ||
1851 | * have filled up the buffer with events | ||
1852 | * from interrupts and such, and wrapped. | ||
1853 | * | ||
1854 | * Note, if the tail page is also the on the | ||
1855 | * reader_page, we let it move out. | ||
1856 | */ | ||
1857 | if (unlikely((cpu_buffer->commit_page != | ||
1858 | cpu_buffer->tail_page) && | ||
1859 | (cpu_buffer->commit_page == | ||
1860 | cpu_buffer->reader_page))) { | ||
1861 | local_inc(&cpu_buffer->commit_overrun); | ||
1862 | goto out_reset; | ||
1863 | } | ||
1864 | } | ||
1266 | } | 1865 | } |
1267 | 1866 | ||
1268 | /* | 1867 | ret = rb_tail_page_update(cpu_buffer, tail_page, next_page); |
1269 | * The actual tail page has moved forward. | 1868 | if (ret) { |
1270 | */ | 1869 | /* |
1271 | if (tail < BUF_PAGE_SIZE) { | 1870 | * Nested commits always have zero deltas, so |
1272 | /* Mark the rest of the page with padding */ | 1871 | * just reread the time stamp |
1273 | event = __rb_page_index(tail_page, tail); | 1872 | */ |
1274 | kmemcheck_annotate_bitfield(event, bitfield); | 1873 | *ts = rb_time_stamp(buffer, cpu_buffer->cpu); |
1275 | rb_event_set_padding(event); | 1874 | next_page->page->time_stamp = *ts; |
1276 | } | 1875 | } |
1277 | 1876 | ||
1278 | /* Set the write back to the previous setting */ | 1877 | out_again: |
1279 | local_sub(length, &tail_page->write); | ||
1280 | 1878 | ||
1281 | /* | 1879 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1282 | * If this was a commit entry that failed, | ||
1283 | * increment that too | ||
1284 | */ | ||
1285 | if (tail_page == cpu_buffer->commit_page && | ||
1286 | tail == rb_commit_index(cpu_buffer)) { | ||
1287 | rb_set_commit_to_write(cpu_buffer); | ||
1288 | } | ||
1289 | |||
1290 | __raw_spin_unlock(&cpu_buffer->lock); | ||
1291 | local_irq_restore(flags); | ||
1292 | 1880 | ||
1293 | /* fail and let the caller try again */ | 1881 | /* fail and let the caller try again */ |
1294 | return ERR_PTR(-EAGAIN); | 1882 | return ERR_PTR(-EAGAIN); |
1295 | 1883 | ||
1296 | out_reset: | 1884 | out_reset: |
1297 | /* reset write */ | 1885 | /* reset write */ |
1298 | local_sub(length, &tail_page->write); | 1886 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1299 | 1887 | ||
1300 | if (likely(lock_taken)) | ||
1301 | __raw_spin_unlock(&cpu_buffer->lock); | ||
1302 | local_irq_restore(flags); | ||
1303 | return NULL; | 1888 | return NULL; |
1304 | } | 1889 | } |
1305 | 1890 | ||
@@ -1316,6 +1901,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1316 | barrier(); | 1901 | barrier(); |
1317 | tail_page = cpu_buffer->tail_page; | 1902 | tail_page = cpu_buffer->tail_page; |
1318 | write = local_add_return(length, &tail_page->write); | 1903 | write = local_add_return(length, &tail_page->write); |
1904 | |||
1905 | /* set write to only the index of the write */ | ||
1906 | write &= RB_WRITE_MASK; | ||
1319 | tail = write - length; | 1907 | tail = write - length; |
1320 | 1908 | ||
1321 | /* See if we shot pass the end of this buffer page */ | 1909 | /* See if we shot pass the end of this buffer page */ |
@@ -1325,9 +1913,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1325 | 1913 | ||
1326 | /* We reserved something on the buffer */ | 1914 | /* We reserved something on the buffer */ |
1327 | 1915 | ||
1328 | if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) | ||
1329 | return NULL; | ||
1330 | |||
1331 | event = __rb_page_index(tail_page, tail); | 1916 | event = __rb_page_index(tail_page, tail); |
1332 | kmemcheck_annotate_bitfield(event, bitfield); | 1917 | kmemcheck_annotate_bitfield(event, bitfield); |
1333 | rb_update_event(event, type, length); | 1918 | rb_update_event(event, type, length); |
@@ -1337,11 +1922,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1337 | local_inc(&tail_page->entries); | 1922 | local_inc(&tail_page->entries); |
1338 | 1923 | ||
1339 | /* | 1924 | /* |
1340 | * If this is a commit and the tail is zero, then update | 1925 | * If this is the first commit on the page, then update |
1341 | * this page's time stamp. | 1926 | * its timestamp. |
1342 | */ | 1927 | */ |
1343 | if (!tail && rb_is_commit(cpu_buffer, event)) | 1928 | if (!tail) |
1344 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1929 | tail_page->page->time_stamp = *ts; |
1345 | 1930 | ||
1346 | return event; | 1931 | return event; |
1347 | } | 1932 | } |
@@ -1363,12 +1948,16 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1363 | bpage = cpu_buffer->tail_page; | 1948 | bpage = cpu_buffer->tail_page; |
1364 | 1949 | ||
1365 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | 1950 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { |
1951 | unsigned long write_mask = | ||
1952 | local_read(&bpage->write) & ~RB_WRITE_MASK; | ||
1366 | /* | 1953 | /* |
1367 | * This is on the tail page. It is possible that | 1954 | * This is on the tail page. It is possible that |
1368 | * a write could come in and move the tail page | 1955 | * a write could come in and move the tail page |
1369 | * and write to the next page. That is fine | 1956 | * and write to the next page. That is fine |
1370 | * because we just shorten what is on this page. | 1957 | * because we just shorten what is on this page. |
1371 | */ | 1958 | */ |
1959 | old_index += write_mask; | ||
1960 | new_index += write_mask; | ||
1372 | index = local_cmpxchg(&bpage->write, old_index, new_index); | 1961 | index = local_cmpxchg(&bpage->write, old_index, new_index); |
1373 | if (index == old_index) | 1962 | if (index == old_index) |
1374 | return 1; | 1963 | return 1; |
@@ -1410,16 +1999,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1410 | return -EAGAIN; | 1999 | return -EAGAIN; |
1411 | 2000 | ||
1412 | /* Only a commited time event can update the write stamp */ | 2001 | /* Only a commited time event can update the write stamp */ |
1413 | if (rb_is_commit(cpu_buffer, event)) { | 2002 | if (rb_event_is_commit(cpu_buffer, event)) { |
1414 | /* | 2003 | /* |
1415 | * If this is the first on the page, then we need to | 2004 | * If this is the first on the page, then it was |
1416 | * update the page itself, and just put in a zero. | 2005 | * updated with the page itself. Try to discard it |
2006 | * and if we can't just make it zero. | ||
1417 | */ | 2007 | */ |
1418 | if (rb_event_index(event)) { | 2008 | if (rb_event_index(event)) { |
1419 | event->time_delta = *delta & TS_MASK; | 2009 | event->time_delta = *delta & TS_MASK; |
1420 | event->array[0] = *delta >> TS_SHIFT; | 2010 | event->array[0] = *delta >> TS_SHIFT; |
1421 | } else { | 2011 | } else { |
1422 | cpu_buffer->commit_page->page->time_stamp = *ts; | ||
1423 | /* try to discard, since we do not need this */ | 2012 | /* try to discard, since we do not need this */ |
1424 | if (!rb_try_to_discard(cpu_buffer, event)) { | 2013 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1425 | /* nope, just zero it */ | 2014 | /* nope, just zero it */ |
@@ -1445,8 +2034,47 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1445 | return ret; | 2034 | return ret; |
1446 | } | 2035 | } |
1447 | 2036 | ||
2037 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
2038 | { | ||
2039 | local_inc(&cpu_buffer->committing); | ||
2040 | local_inc(&cpu_buffer->commits); | ||
2041 | } | ||
2042 | |||
2043 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
2044 | { | ||
2045 | unsigned long commits; | ||
2046 | |||
2047 | if (RB_WARN_ON(cpu_buffer, | ||
2048 | !local_read(&cpu_buffer->committing))) | ||
2049 | return; | ||
2050 | |||
2051 | again: | ||
2052 | commits = local_read(&cpu_buffer->commits); | ||
2053 | /* synchronize with interrupts */ | ||
2054 | barrier(); | ||
2055 | if (local_read(&cpu_buffer->committing) == 1) | ||
2056 | rb_set_commit_to_write(cpu_buffer); | ||
2057 | |||
2058 | local_dec(&cpu_buffer->committing); | ||
2059 | |||
2060 | /* synchronize with interrupts */ | ||
2061 | barrier(); | ||
2062 | |||
2063 | /* | ||
2064 | * Need to account for interrupts coming in between the | ||
2065 | * updating of the commit page and the clearing of the | ||
2066 | * committing counter. | ||
2067 | */ | ||
2068 | if (unlikely(local_read(&cpu_buffer->commits) != commits) && | ||
2069 | !local_read(&cpu_buffer->committing)) { | ||
2070 | local_inc(&cpu_buffer->committing); | ||
2071 | goto again; | ||
2072 | } | ||
2073 | } | ||
2074 | |||
1448 | static struct ring_buffer_event * | 2075 | static struct ring_buffer_event * |
1449 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 2076 | rb_reserve_next_event(struct ring_buffer *buffer, |
2077 | struct ring_buffer_per_cpu *cpu_buffer, | ||
1450 | unsigned long length) | 2078 | unsigned long length) |
1451 | { | 2079 | { |
1452 | struct ring_buffer_event *event; | 2080 | struct ring_buffer_event *event; |
@@ -1454,6 +2082,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1454 | int commit = 0; | 2082 | int commit = 0; |
1455 | int nr_loops = 0; | 2083 | int nr_loops = 0; |
1456 | 2084 | ||
2085 | rb_start_commit(cpu_buffer); | ||
2086 | |||
2087 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | ||
2088 | /* | ||
2089 | * Due to the ability to swap a cpu buffer from a buffer | ||
2090 | * it is possible it was swapped before we committed. | ||
2091 | * (committing stops a swap). We check for it here and | ||
2092 | * if it happened, we have to fail the write. | ||
2093 | */ | ||
2094 | barrier(); | ||
2095 | if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { | ||
2096 | local_dec(&cpu_buffer->committing); | ||
2097 | local_dec(&cpu_buffer->commits); | ||
2098 | return NULL; | ||
2099 | } | ||
2100 | #endif | ||
2101 | |||
1457 | length = rb_calculate_event_length(length); | 2102 | length = rb_calculate_event_length(length); |
1458 | again: | 2103 | again: |
1459 | /* | 2104 | /* |
@@ -1466,7 +2111,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1466 | * Bail! | 2111 | * Bail! |
1467 | */ | 2112 | */ |
1468 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 2113 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
1469 | return NULL; | 2114 | goto out_fail; |
1470 | 2115 | ||
1471 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 2116 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
1472 | 2117 | ||
@@ -1497,7 +2142,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1497 | 2142 | ||
1498 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2143 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); |
1499 | if (commit == -EBUSY) | 2144 | if (commit == -EBUSY) |
1500 | return NULL; | 2145 | goto out_fail; |
1501 | 2146 | ||
1502 | if (commit == -EAGAIN) | 2147 | if (commit == -EAGAIN) |
1503 | goto again; | 2148 | goto again; |
@@ -1511,30 +2156,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1511 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2156 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
1512 | goto again; | 2157 | goto again; |
1513 | 2158 | ||
1514 | if (!event) { | 2159 | if (!event) |
1515 | if (unlikely(commit)) | 2160 | goto out_fail; |
1516 | /* | ||
1517 | * Ouch! We needed a timestamp and it was commited. But | ||
1518 | * we didn't get our event reserved. | ||
1519 | */ | ||
1520 | rb_set_commit_to_write(cpu_buffer); | ||
1521 | return NULL; | ||
1522 | } | ||
1523 | 2161 | ||
1524 | /* | 2162 | if (!rb_event_is_commit(cpu_buffer, event)) |
1525 | * If the timestamp was commited, make the commit our entry | ||
1526 | * now so that we will update it when needed. | ||
1527 | */ | ||
1528 | if (unlikely(commit)) | ||
1529 | rb_set_commit_event(cpu_buffer, event); | ||
1530 | else if (!rb_is_commit(cpu_buffer, event)) | ||
1531 | delta = 0; | 2163 | delta = 0; |
1532 | 2164 | ||
1533 | event->time_delta = delta; | 2165 | event->time_delta = delta; |
1534 | 2166 | ||
1535 | return event; | 2167 | return event; |
2168 | |||
2169 | out_fail: | ||
2170 | rb_end_commit(cpu_buffer); | ||
2171 | return NULL; | ||
1536 | } | 2172 | } |
1537 | 2173 | ||
2174 | #ifdef CONFIG_TRACING | ||
2175 | |||
1538 | #define TRACE_RECURSIVE_DEPTH 16 | 2176 | #define TRACE_RECURSIVE_DEPTH 16 |
1539 | 2177 | ||
1540 | static int trace_recursive_lock(void) | 2178 | static int trace_recursive_lock(void) |
@@ -1565,6 +2203,13 @@ static void trace_recursive_unlock(void) | |||
1565 | current->trace_recursion--; | 2203 | current->trace_recursion--; |
1566 | } | 2204 | } |
1567 | 2205 | ||
2206 | #else | ||
2207 | |||
2208 | #define trace_recursive_lock() (0) | ||
2209 | #define trace_recursive_unlock() do { } while (0) | ||
2210 | |||
2211 | #endif | ||
2212 | |||
1568 | static DEFINE_PER_CPU(int, rb_need_resched); | 2213 | static DEFINE_PER_CPU(int, rb_need_resched); |
1569 | 2214 | ||
1570 | /** | 2215 | /** |
@@ -1614,7 +2259,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
1614 | if (length > BUF_MAX_DATA_SIZE) | 2259 | if (length > BUF_MAX_DATA_SIZE) |
1615 | goto out; | 2260 | goto out; |
1616 | 2261 | ||
1617 | event = rb_reserve_next_event(cpu_buffer, length); | 2262 | event = rb_reserve_next_event(buffer, cpu_buffer, length); |
1618 | if (!event) | 2263 | if (!event) |
1619 | goto out; | 2264 | goto out; |
1620 | 2265 | ||
@@ -1637,18 +2282,24 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
1637 | } | 2282 | } |
1638 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | 2283 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); |
1639 | 2284 | ||
2285 | static void | ||
2286 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
2287 | struct ring_buffer_event *event) | ||
2288 | { | ||
2289 | /* | ||
2290 | * The event first in the commit queue updates the | ||
2291 | * time stamp. | ||
2292 | */ | ||
2293 | if (rb_event_is_commit(cpu_buffer, event)) | ||
2294 | cpu_buffer->write_stamp += event->time_delta; | ||
2295 | } | ||
2296 | |||
1640 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2297 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1641 | struct ring_buffer_event *event) | 2298 | struct ring_buffer_event *event) |
1642 | { | 2299 | { |
1643 | local_inc(&cpu_buffer->entries); | 2300 | local_inc(&cpu_buffer->entries); |
1644 | 2301 | rb_update_write_stamp(cpu_buffer, event); | |
1645 | /* Only process further if we own the commit */ | 2302 | rb_end_commit(cpu_buffer); |
1646 | if (!rb_is_commit(cpu_buffer, event)) | ||
1647 | return; | ||
1648 | |||
1649 | cpu_buffer->write_stamp += event->time_delta; | ||
1650 | |||
1651 | rb_set_commit_to_write(cpu_buffer); | ||
1652 | } | 2303 | } |
1653 | 2304 | ||
1654 | /** | 2305 | /** |
@@ -1694,32 +2345,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event) | |||
1694 | event->time_delta = 1; | 2345 | event->time_delta = 1; |
1695 | } | 2346 | } |
1696 | 2347 | ||
1697 | /** | 2348 | /* |
1698 | * ring_buffer_event_discard - discard any event in the ring buffer | 2349 | * Decrement the entries to the page that an event is on. |
1699 | * @event: the event to discard | 2350 | * The event does not even need to exist, only the pointer |
1700 | * | 2351 | * to the page it is on. This may only be called before the commit |
1701 | * Sometimes a event that is in the ring buffer needs to be ignored. | 2352 | * takes place. |
1702 | * This function lets the user discard an event in the ring buffer | ||
1703 | * and then that event will not be read later. | ||
1704 | * | ||
1705 | * Note, it is up to the user to be careful with this, and protect | ||
1706 | * against races. If the user discards an event that has been consumed | ||
1707 | * it is possible that it could corrupt the ring buffer. | ||
1708 | */ | 2353 | */ |
1709 | void ring_buffer_event_discard(struct ring_buffer_event *event) | 2354 | static inline void |
2355 | rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, | ||
2356 | struct ring_buffer_event *event) | ||
1710 | { | 2357 | { |
1711 | rb_event_discard(event); | 2358 | unsigned long addr = (unsigned long)event; |
2359 | struct buffer_page *bpage = cpu_buffer->commit_page; | ||
2360 | struct buffer_page *start; | ||
2361 | |||
2362 | addr &= PAGE_MASK; | ||
2363 | |||
2364 | /* Do the likely case first */ | ||
2365 | if (likely(bpage->page == (void *)addr)) { | ||
2366 | local_dec(&bpage->entries); | ||
2367 | return; | ||
2368 | } | ||
2369 | |||
2370 | /* | ||
2371 | * Because the commit page may be on the reader page we | ||
2372 | * start with the next page and check the end loop there. | ||
2373 | */ | ||
2374 | rb_inc_page(cpu_buffer, &bpage); | ||
2375 | start = bpage; | ||
2376 | do { | ||
2377 | if (bpage->page == (void *)addr) { | ||
2378 | local_dec(&bpage->entries); | ||
2379 | return; | ||
2380 | } | ||
2381 | rb_inc_page(cpu_buffer, &bpage); | ||
2382 | } while (bpage != start); | ||
2383 | |||
2384 | /* commit not part of this buffer?? */ | ||
2385 | RB_WARN_ON(cpu_buffer, 1); | ||
1712 | } | 2386 | } |
1713 | EXPORT_SYMBOL_GPL(ring_buffer_event_discard); | ||
1714 | 2387 | ||
1715 | /** | 2388 | /** |
1716 | * ring_buffer_commit_discard - discard an event that has not been committed | 2389 | * ring_buffer_commit_discard - discard an event that has not been committed |
1717 | * @buffer: the ring buffer | 2390 | * @buffer: the ring buffer |
1718 | * @event: non committed event to discard | 2391 | * @event: non committed event to discard |
1719 | * | 2392 | * |
1720 | * This is similar to ring_buffer_event_discard but must only be | 2393 | * Sometimes an event that is in the ring buffer needs to be ignored. |
1721 | * performed on an event that has not been committed yet. The difference | 2394 | * This function lets the user discard an event in the ring buffer |
1722 | * is that this will also try to free the event from the ring buffer | 2395 | * and then that event will not be read later. |
2396 | * | ||
2397 | * This function only works if it is called before the the item has been | ||
2398 | * committed. It will try to free the event from the ring buffer | ||
1723 | * if another event has not been added behind it. | 2399 | * if another event has not been added behind it. |
1724 | * | 2400 | * |
1725 | * If another event has been added behind it, it will set the event | 2401 | * If another event has been added behind it, it will set the event |
@@ -1737,32 +2413,27 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1737 | /* The event is discarded regardless */ | 2413 | /* The event is discarded regardless */ |
1738 | rb_event_discard(event); | 2414 | rb_event_discard(event); |
1739 | 2415 | ||
2416 | cpu = smp_processor_id(); | ||
2417 | cpu_buffer = buffer->buffers[cpu]; | ||
2418 | |||
1740 | /* | 2419 | /* |
1741 | * This must only be called if the event has not been | 2420 | * This must only be called if the event has not been |
1742 | * committed yet. Thus we can assume that preemption | 2421 | * committed yet. Thus we can assume that preemption |
1743 | * is still disabled. | 2422 | * is still disabled. |
1744 | */ | 2423 | */ |
1745 | RB_WARN_ON(buffer, preemptible()); | 2424 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); |
1746 | 2425 | ||
1747 | cpu = smp_processor_id(); | 2426 | rb_decrement_entry(cpu_buffer, event); |
1748 | cpu_buffer = buffer->buffers[cpu]; | 2427 | if (rb_try_to_discard(cpu_buffer, event)) |
1749 | |||
1750 | if (!rb_try_to_discard(cpu_buffer, event)) | ||
1751 | goto out; | 2428 | goto out; |
1752 | 2429 | ||
1753 | /* | 2430 | /* |
1754 | * The commit is still visible by the reader, so we | 2431 | * The commit is still visible by the reader, so we |
1755 | * must increment entries. | 2432 | * must still update the timestamp. |
1756 | */ | 2433 | */ |
1757 | local_inc(&cpu_buffer->entries); | 2434 | rb_update_write_stamp(cpu_buffer, event); |
1758 | out: | 2435 | out: |
1759 | /* | 2436 | rb_end_commit(cpu_buffer); |
1760 | * If a write came in and pushed the tail page | ||
1761 | * we still need to update the commit pointer | ||
1762 | * if we were the commit. | ||
1763 | */ | ||
1764 | if (rb_is_commit(cpu_buffer, event)) | ||
1765 | rb_set_commit_to_write(cpu_buffer); | ||
1766 | 2437 | ||
1767 | trace_recursive_unlock(); | 2438 | trace_recursive_unlock(); |
1768 | 2439 | ||
@@ -1821,7 +2492,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1821 | if (length > BUF_MAX_DATA_SIZE) | 2492 | if (length > BUF_MAX_DATA_SIZE) |
1822 | goto out; | 2493 | goto out; |
1823 | 2494 | ||
1824 | event = rb_reserve_next_event(cpu_buffer, length); | 2495 | event = rb_reserve_next_event(buffer, cpu_buffer, length); |
1825 | if (!event) | 2496 | if (!event) |
1826 | goto out; | 2497 | goto out; |
1827 | 2498 | ||
@@ -1842,9 +2513,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write); | |||
1842 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 2513 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) |
1843 | { | 2514 | { |
1844 | struct buffer_page *reader = cpu_buffer->reader_page; | 2515 | struct buffer_page *reader = cpu_buffer->reader_page; |
1845 | struct buffer_page *head = cpu_buffer->head_page; | 2516 | struct buffer_page *head = rb_set_head_page(cpu_buffer); |
1846 | struct buffer_page *commit = cpu_buffer->commit_page; | 2517 | struct buffer_page *commit = cpu_buffer->commit_page; |
1847 | 2518 | ||
2519 | /* In case of error, head will be NULL */ | ||
2520 | if (unlikely(!head)) | ||
2521 | return 1; | ||
2522 | |||
1848 | return reader->read == rb_page_commit(reader) && | 2523 | return reader->read == rb_page_commit(reader) && |
1849 | (commit == reader || | 2524 | (commit == reader || |
1850 | (commit == head && | 2525 | (commit == head && |
@@ -1935,7 +2610,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | |||
1935 | return 0; | 2610 | return 0; |
1936 | 2611 | ||
1937 | cpu_buffer = buffer->buffers[cpu]; | 2612 | cpu_buffer = buffer->buffers[cpu]; |
1938 | ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) | 2613 | ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) |
1939 | - cpu_buffer->read; | 2614 | - cpu_buffer->read; |
1940 | 2615 | ||
1941 | return ret; | 2616 | return ret; |
@@ -1956,33 +2631,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
1956 | return 0; | 2631 | return 0; |
1957 | 2632 | ||
1958 | cpu_buffer = buffer->buffers[cpu]; | 2633 | cpu_buffer = buffer->buffers[cpu]; |
1959 | ret = cpu_buffer->overrun; | 2634 | ret = local_read(&cpu_buffer->overrun); |
1960 | 2635 | ||
1961 | return ret; | 2636 | return ret; |
1962 | } | 2637 | } |
1963 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 2638 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); |
1964 | 2639 | ||
1965 | /** | 2640 | /** |
1966 | * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped | ||
1967 | * @buffer: The ring buffer | ||
1968 | * @cpu: The per CPU buffer to get the number of overruns from | ||
1969 | */ | ||
1970 | unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu) | ||
1971 | { | ||
1972 | struct ring_buffer_per_cpu *cpu_buffer; | ||
1973 | unsigned long ret; | ||
1974 | |||
1975 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
1976 | return 0; | ||
1977 | |||
1978 | cpu_buffer = buffer->buffers[cpu]; | ||
1979 | ret = cpu_buffer->nmi_dropped; | ||
1980 | |||
1981 | return ret; | ||
1982 | } | ||
1983 | EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu); | ||
1984 | |||
1985 | /** | ||
1986 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits | 2641 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits |
1987 | * @buffer: The ring buffer | 2642 | * @buffer: The ring buffer |
1988 | * @cpu: The per CPU buffer to get the number of overruns from | 2643 | * @cpu: The per CPU buffer to get the number of overruns from |
@@ -1997,7 +2652,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
1997 | return 0; | 2652 | return 0; |
1998 | 2653 | ||
1999 | cpu_buffer = buffer->buffers[cpu]; | 2654 | cpu_buffer = buffer->buffers[cpu]; |
2000 | ret = cpu_buffer->commit_overrun; | 2655 | ret = local_read(&cpu_buffer->commit_overrun); |
2001 | 2656 | ||
2002 | return ret; | 2657 | return ret; |
2003 | } | 2658 | } |
@@ -2020,7 +2675,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
2020 | for_each_buffer_cpu(buffer, cpu) { | 2675 | for_each_buffer_cpu(buffer, cpu) { |
2021 | cpu_buffer = buffer->buffers[cpu]; | 2676 | cpu_buffer = buffer->buffers[cpu]; |
2022 | entries += (local_read(&cpu_buffer->entries) - | 2677 | entries += (local_read(&cpu_buffer->entries) - |
2023 | cpu_buffer->overrun) - cpu_buffer->read; | 2678 | local_read(&cpu_buffer->overrun)) - cpu_buffer->read; |
2024 | } | 2679 | } |
2025 | 2680 | ||
2026 | return entries; | 2681 | return entries; |
@@ -2043,7 +2698,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer) | |||
2043 | /* if you care about this being correct, lock the buffer */ | 2698 | /* if you care about this being correct, lock the buffer */ |
2044 | for_each_buffer_cpu(buffer, cpu) { | 2699 | for_each_buffer_cpu(buffer, cpu) { |
2045 | cpu_buffer = buffer->buffers[cpu]; | 2700 | cpu_buffer = buffer->buffers[cpu]; |
2046 | overruns += cpu_buffer->overrun; | 2701 | overruns += local_read(&cpu_buffer->overrun); |
2047 | } | 2702 | } |
2048 | 2703 | ||
2049 | return overruns; | 2704 | return overruns; |
@@ -2056,8 +2711,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
2056 | 2711 | ||
2057 | /* Iterator usage is expected to have record disabled */ | 2712 | /* Iterator usage is expected to have record disabled */ |
2058 | if (list_empty(&cpu_buffer->reader_page->list)) { | 2713 | if (list_empty(&cpu_buffer->reader_page->list)) { |
2059 | iter->head_page = cpu_buffer->head_page; | 2714 | iter->head_page = rb_set_head_page(cpu_buffer); |
2060 | iter->head = cpu_buffer->head_page->read; | 2715 | if (unlikely(!iter->head_page)) |
2716 | return; | ||
2717 | iter->head = iter->head_page->read; | ||
2061 | } else { | 2718 | } else { |
2062 | iter->head_page = cpu_buffer->reader_page; | 2719 | iter->head_page = cpu_buffer->reader_page; |
2063 | iter->head = cpu_buffer->reader_page->read; | 2720 | iter->head = cpu_buffer->reader_page->read; |
@@ -2174,6 +2831,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2174 | struct buffer_page *reader = NULL; | 2831 | struct buffer_page *reader = NULL; |
2175 | unsigned long flags; | 2832 | unsigned long flags; |
2176 | int nr_loops = 0; | 2833 | int nr_loops = 0; |
2834 | int ret; | ||
2177 | 2835 | ||
2178 | local_irq_save(flags); | 2836 | local_irq_save(flags); |
2179 | __raw_spin_lock(&cpu_buffer->lock); | 2837 | __raw_spin_lock(&cpu_buffer->lock); |
@@ -2207,30 +2865,56 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2207 | goto out; | 2865 | goto out; |
2208 | 2866 | ||
2209 | /* | 2867 | /* |
2210 | * Splice the empty reader page into the list around the head. | ||
2211 | * Reset the reader page to size zero. | 2868 | * Reset the reader page to size zero. |
2212 | */ | 2869 | */ |
2870 | local_set(&cpu_buffer->reader_page->write, 0); | ||
2871 | local_set(&cpu_buffer->reader_page->entries, 0); | ||
2872 | local_set(&cpu_buffer->reader_page->page->commit, 0); | ||
2213 | 2873 | ||
2214 | reader = cpu_buffer->head_page; | 2874 | spin: |
2875 | /* | ||
2876 | * Splice the empty reader page into the list around the head. | ||
2877 | */ | ||
2878 | reader = rb_set_head_page(cpu_buffer); | ||
2215 | cpu_buffer->reader_page->list.next = reader->list.next; | 2879 | cpu_buffer->reader_page->list.next = reader->list.next; |
2216 | cpu_buffer->reader_page->list.prev = reader->list.prev; | 2880 | cpu_buffer->reader_page->list.prev = reader->list.prev; |
2217 | 2881 | ||
2218 | local_set(&cpu_buffer->reader_page->write, 0); | 2882 | /* |
2219 | local_set(&cpu_buffer->reader_page->entries, 0); | 2883 | * cpu_buffer->pages just needs to point to the buffer, it |
2220 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2884 | * has no specific buffer page to point to. Lets move it out |
2885 | * of our way so we don't accidently swap it. | ||
2886 | */ | ||
2887 | cpu_buffer->pages = reader->list.prev; | ||
2221 | 2888 | ||
2222 | /* Make the reader page now replace the head */ | 2889 | /* The reader page will be pointing to the new head */ |
2223 | reader->list.prev->next = &cpu_buffer->reader_page->list; | 2890 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); |
2224 | reader->list.next->prev = &cpu_buffer->reader_page->list; | 2891 | |
2892 | /* | ||
2893 | * Here's the tricky part. | ||
2894 | * | ||
2895 | * We need to move the pointer past the header page. | ||
2896 | * But we can only do that if a writer is not currently | ||
2897 | * moving it. The page before the header page has the | ||
2898 | * flag bit '1' set if it is pointing to the page we want. | ||
2899 | * but if the writer is in the process of moving it | ||
2900 | * than it will be '2' or already moved '0'. | ||
2901 | */ | ||
2902 | |||
2903 | ret = rb_head_page_replace(reader, cpu_buffer->reader_page); | ||
2225 | 2904 | ||
2226 | /* | 2905 | /* |
2227 | * If the tail is on the reader, then we must set the head | 2906 | * If we did not convert it, then we must try again. |
2228 | * to the inserted page, otherwise we set it one before. | ||
2229 | */ | 2907 | */ |
2230 | cpu_buffer->head_page = cpu_buffer->reader_page; | 2908 | if (!ret) |
2909 | goto spin; | ||
2231 | 2910 | ||
2232 | if (cpu_buffer->commit_page != reader) | 2911 | /* |
2233 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | 2912 | * Yeah! We succeeded in replacing the page. |
2913 | * | ||
2914 | * Now make the new head point back to the reader page. | ||
2915 | */ | ||
2916 | reader->list.next->prev = &cpu_buffer->reader_page->list; | ||
2917 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | ||
2234 | 2918 | ||
2235 | /* Finally update the reader page to the new head */ | 2919 | /* Finally update the reader page to the new head */ |
2236 | cpu_buffer->reader_page = reader; | 2920 | cpu_buffer->reader_page = reader; |
@@ -2259,8 +2943,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
2259 | 2943 | ||
2260 | event = rb_reader_event(cpu_buffer); | 2944 | event = rb_reader_event(cpu_buffer); |
2261 | 2945 | ||
2262 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 2946 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
2263 | || rb_discarded_event(event)) | ||
2264 | cpu_buffer->read++; | 2947 | cpu_buffer->read++; |
2265 | 2948 | ||
2266 | rb_update_read_stamp(cpu_buffer, event); | 2949 | rb_update_read_stamp(cpu_buffer, event); |
@@ -2351,7 +3034,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2351 | * the box. Return the padding, and we will release | 3034 | * the box. Return the padding, and we will release |
2352 | * the current locks, and try again. | 3035 | * the current locks, and try again. |
2353 | */ | 3036 | */ |
2354 | rb_advance_reader(cpu_buffer); | ||
2355 | return event; | 3037 | return event; |
2356 | 3038 | ||
2357 | case RINGBUF_TYPE_TIME_EXTEND: | 3039 | case RINGBUF_TYPE_TIME_EXTEND: |
@@ -2446,6 +3128,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2446 | } | 3128 | } |
2447 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); | 3129 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); |
2448 | 3130 | ||
3131 | static inline int rb_ok_to_lock(void) | ||
3132 | { | ||
3133 | /* | ||
3134 | * If an NMI die dumps out the content of the ring buffer | ||
3135 | * do not grab locks. We also permanently disable the ring | ||
3136 | * buffer too. A one time deal is all you get from reading | ||
3137 | * the ring buffer from an NMI. | ||
3138 | */ | ||
3139 | if (likely(!in_nmi())) | ||
3140 | return 1; | ||
3141 | |||
3142 | tracing_off_permanent(); | ||
3143 | return 0; | ||
3144 | } | ||
3145 | |||
2449 | /** | 3146 | /** |
2450 | * ring_buffer_peek - peek at the next event to be read | 3147 | * ring_buffer_peek - peek at the next event to be read |
2451 | * @buffer: The ring buffer to read | 3148 | * @buffer: The ring buffer to read |
@@ -2461,19 +3158,25 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2461 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3158 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
2462 | struct ring_buffer_event *event; | 3159 | struct ring_buffer_event *event; |
2463 | unsigned long flags; | 3160 | unsigned long flags; |
3161 | int dolock; | ||
2464 | 3162 | ||
2465 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3163 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2466 | return NULL; | 3164 | return NULL; |
2467 | 3165 | ||
3166 | dolock = rb_ok_to_lock(); | ||
2468 | again: | 3167 | again: |
2469 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3168 | local_irq_save(flags); |
3169 | if (dolock) | ||
3170 | spin_lock(&cpu_buffer->reader_lock); | ||
2470 | event = rb_buffer_peek(buffer, cpu, ts); | 3171 | event = rb_buffer_peek(buffer, cpu, ts); |
2471 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3172 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3173 | rb_advance_reader(cpu_buffer); | ||
3174 | if (dolock) | ||
3175 | spin_unlock(&cpu_buffer->reader_lock); | ||
3176 | local_irq_restore(flags); | ||
2472 | 3177 | ||
2473 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3178 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
2474 | cpu_relax(); | ||
2475 | goto again; | 3179 | goto again; |
2476 | } | ||
2477 | 3180 | ||
2478 | return event; | 3181 | return event; |
2479 | } | 3182 | } |
@@ -2498,10 +3201,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2498 | event = rb_iter_peek(iter, ts); | 3201 | event = rb_iter_peek(iter, ts); |
2499 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3202 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2500 | 3203 | ||
2501 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3204 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
2502 | cpu_relax(); | ||
2503 | goto again; | 3205 | goto again; |
2504 | } | ||
2505 | 3206 | ||
2506 | return event; | 3207 | return event; |
2507 | } | 3208 | } |
@@ -2520,6 +3221,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2520 | struct ring_buffer_per_cpu *cpu_buffer; | 3221 | struct ring_buffer_per_cpu *cpu_buffer; |
2521 | struct ring_buffer_event *event = NULL; | 3222 | struct ring_buffer_event *event = NULL; |
2522 | unsigned long flags; | 3223 | unsigned long flags; |
3224 | int dolock; | ||
3225 | |||
3226 | dolock = rb_ok_to_lock(); | ||
2523 | 3227 | ||
2524 | again: | 3228 | again: |
2525 | /* might be called in atomic */ | 3229 | /* might be called in atomic */ |
@@ -2529,24 +3233,23 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2529 | goto out; | 3233 | goto out; |
2530 | 3234 | ||
2531 | cpu_buffer = buffer->buffers[cpu]; | 3235 | cpu_buffer = buffer->buffers[cpu]; |
2532 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3236 | local_irq_save(flags); |
3237 | if (dolock) | ||
3238 | spin_lock(&cpu_buffer->reader_lock); | ||
2533 | 3239 | ||
2534 | event = rb_buffer_peek(buffer, cpu, ts); | 3240 | event = rb_buffer_peek(buffer, cpu, ts); |
2535 | if (!event) | 3241 | if (event) |
2536 | goto out_unlock; | 3242 | rb_advance_reader(cpu_buffer); |
2537 | |||
2538 | rb_advance_reader(cpu_buffer); | ||
2539 | 3243 | ||
2540 | out_unlock: | 3244 | if (dolock) |
2541 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3245 | spin_unlock(&cpu_buffer->reader_lock); |
3246 | local_irq_restore(flags); | ||
2542 | 3247 | ||
2543 | out: | 3248 | out: |
2544 | preempt_enable(); | 3249 | preempt_enable(); |
2545 | 3250 | ||
2546 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3251 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
2547 | cpu_relax(); | ||
2548 | goto again; | 3252 | goto again; |
2549 | } | ||
2550 | 3253 | ||
2551 | return event; | 3254 | return event; |
2552 | } | 3255 | } |
@@ -2626,21 +3329,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
2626 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3329 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
2627 | unsigned long flags; | 3330 | unsigned long flags; |
2628 | 3331 | ||
2629 | again: | ||
2630 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3332 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3333 | again: | ||
2631 | event = rb_iter_peek(iter, ts); | 3334 | event = rb_iter_peek(iter, ts); |
2632 | if (!event) | 3335 | if (!event) |
2633 | goto out; | 3336 | goto out; |
2634 | 3337 | ||
3338 | if (event->type_len == RINGBUF_TYPE_PADDING) | ||
3339 | goto again; | ||
3340 | |||
2635 | rb_advance_iter(iter); | 3341 | rb_advance_iter(iter); |
2636 | out: | 3342 | out: |
2637 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3343 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2638 | 3344 | ||
2639 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | ||
2640 | cpu_relax(); | ||
2641 | goto again; | ||
2642 | } | ||
2643 | |||
2644 | return event; | 3345 | return event; |
2645 | } | 3346 | } |
2646 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 3347 | EXPORT_SYMBOL_GPL(ring_buffer_read); |
@@ -2658,8 +3359,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size); | |||
2658 | static void | 3359 | static void |
2659 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | 3360 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) |
2660 | { | 3361 | { |
3362 | rb_head_page_deactivate(cpu_buffer); | ||
3363 | |||
2661 | cpu_buffer->head_page | 3364 | cpu_buffer->head_page |
2662 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 3365 | = list_entry(cpu_buffer->pages, struct buffer_page, list); |
2663 | local_set(&cpu_buffer->head_page->write, 0); | 3366 | local_set(&cpu_buffer->head_page->write, 0); |
2664 | local_set(&cpu_buffer->head_page->entries, 0); | 3367 | local_set(&cpu_buffer->head_page->entries, 0); |
2665 | local_set(&cpu_buffer->head_page->page->commit, 0); | 3368 | local_set(&cpu_buffer->head_page->page->commit, 0); |
@@ -2675,14 +3378,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
2675 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 3378 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2676 | cpu_buffer->reader_page->read = 0; | 3379 | cpu_buffer->reader_page->read = 0; |
2677 | 3380 | ||
2678 | cpu_buffer->nmi_dropped = 0; | 3381 | local_set(&cpu_buffer->commit_overrun, 0); |
2679 | cpu_buffer->commit_overrun = 0; | 3382 | local_set(&cpu_buffer->overrun, 0); |
2680 | cpu_buffer->overrun = 0; | ||
2681 | cpu_buffer->read = 0; | ||
2682 | local_set(&cpu_buffer->entries, 0); | 3383 | local_set(&cpu_buffer->entries, 0); |
3384 | local_set(&cpu_buffer->committing, 0); | ||
3385 | local_set(&cpu_buffer->commits, 0); | ||
3386 | cpu_buffer->read = 0; | ||
2683 | 3387 | ||
2684 | cpu_buffer->write_stamp = 0; | 3388 | cpu_buffer->write_stamp = 0; |
2685 | cpu_buffer->read_stamp = 0; | 3389 | cpu_buffer->read_stamp = 0; |
3390 | |||
3391 | rb_head_page_activate(cpu_buffer); | ||
2686 | } | 3392 | } |
2687 | 3393 | ||
2688 | /** | 3394 | /** |
@@ -2702,12 +3408,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
2702 | 3408 | ||
2703 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3409 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2704 | 3410 | ||
3411 | if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) | ||
3412 | goto out; | ||
3413 | |||
2705 | __raw_spin_lock(&cpu_buffer->lock); | 3414 | __raw_spin_lock(&cpu_buffer->lock); |
2706 | 3415 | ||
2707 | rb_reset_cpu(cpu_buffer); | 3416 | rb_reset_cpu(cpu_buffer); |
2708 | 3417 | ||
2709 | __raw_spin_unlock(&cpu_buffer->lock); | 3418 | __raw_spin_unlock(&cpu_buffer->lock); |
2710 | 3419 | ||
3420 | out: | ||
2711 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3421 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2712 | 3422 | ||
2713 | atomic_dec(&cpu_buffer->record_disabled); | 3423 | atomic_dec(&cpu_buffer->record_disabled); |
@@ -2734,12 +3444,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); | |||
2734 | int ring_buffer_empty(struct ring_buffer *buffer) | 3444 | int ring_buffer_empty(struct ring_buffer *buffer) |
2735 | { | 3445 | { |
2736 | struct ring_buffer_per_cpu *cpu_buffer; | 3446 | struct ring_buffer_per_cpu *cpu_buffer; |
3447 | unsigned long flags; | ||
3448 | int dolock; | ||
2737 | int cpu; | 3449 | int cpu; |
3450 | int ret; | ||
3451 | |||
3452 | dolock = rb_ok_to_lock(); | ||
2738 | 3453 | ||
2739 | /* yes this is racy, but if you don't like the race, lock the buffer */ | 3454 | /* yes this is racy, but if you don't like the race, lock the buffer */ |
2740 | for_each_buffer_cpu(buffer, cpu) { | 3455 | for_each_buffer_cpu(buffer, cpu) { |
2741 | cpu_buffer = buffer->buffers[cpu]; | 3456 | cpu_buffer = buffer->buffers[cpu]; |
2742 | if (!rb_per_cpu_empty(cpu_buffer)) | 3457 | local_irq_save(flags); |
3458 | if (dolock) | ||
3459 | spin_lock(&cpu_buffer->reader_lock); | ||
3460 | ret = rb_per_cpu_empty(cpu_buffer); | ||
3461 | if (dolock) | ||
3462 | spin_unlock(&cpu_buffer->reader_lock); | ||
3463 | local_irq_restore(flags); | ||
3464 | |||
3465 | if (!ret) | ||
2743 | return 0; | 3466 | return 0; |
2744 | } | 3467 | } |
2745 | 3468 | ||
@@ -2755,19 +3478,29 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); | |||
2755 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 3478 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
2756 | { | 3479 | { |
2757 | struct ring_buffer_per_cpu *cpu_buffer; | 3480 | struct ring_buffer_per_cpu *cpu_buffer; |
3481 | unsigned long flags; | ||
3482 | int dolock; | ||
2758 | int ret; | 3483 | int ret; |
2759 | 3484 | ||
2760 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3485 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2761 | return 1; | 3486 | return 1; |
2762 | 3487 | ||
3488 | dolock = rb_ok_to_lock(); | ||
3489 | |||
2763 | cpu_buffer = buffer->buffers[cpu]; | 3490 | cpu_buffer = buffer->buffers[cpu]; |
3491 | local_irq_save(flags); | ||
3492 | if (dolock) | ||
3493 | spin_lock(&cpu_buffer->reader_lock); | ||
2764 | ret = rb_per_cpu_empty(cpu_buffer); | 3494 | ret = rb_per_cpu_empty(cpu_buffer); |
2765 | 3495 | if (dolock) | |
3496 | spin_unlock(&cpu_buffer->reader_lock); | ||
3497 | local_irq_restore(flags); | ||
2766 | 3498 | ||
2767 | return ret; | 3499 | return ret; |
2768 | } | 3500 | } |
2769 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); | 3501 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); |
2770 | 3502 | ||
3503 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | ||
2771 | /** | 3504 | /** |
2772 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers | 3505 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers |
2773 | * @buffer_a: One buffer to swap with | 3506 | * @buffer_a: One buffer to swap with |
@@ -2822,20 +3555,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2822 | atomic_inc(&cpu_buffer_a->record_disabled); | 3555 | atomic_inc(&cpu_buffer_a->record_disabled); |
2823 | atomic_inc(&cpu_buffer_b->record_disabled); | 3556 | atomic_inc(&cpu_buffer_b->record_disabled); |
2824 | 3557 | ||
3558 | ret = -EBUSY; | ||
3559 | if (local_read(&cpu_buffer_a->committing)) | ||
3560 | goto out_dec; | ||
3561 | if (local_read(&cpu_buffer_b->committing)) | ||
3562 | goto out_dec; | ||
3563 | |||
2825 | buffer_a->buffers[cpu] = cpu_buffer_b; | 3564 | buffer_a->buffers[cpu] = cpu_buffer_b; |
2826 | buffer_b->buffers[cpu] = cpu_buffer_a; | 3565 | buffer_b->buffers[cpu] = cpu_buffer_a; |
2827 | 3566 | ||
2828 | cpu_buffer_b->buffer = buffer_a; | 3567 | cpu_buffer_b->buffer = buffer_a; |
2829 | cpu_buffer_a->buffer = buffer_b; | 3568 | cpu_buffer_a->buffer = buffer_b; |
2830 | 3569 | ||
3570 | ret = 0; | ||
3571 | |||
3572 | out_dec: | ||
2831 | atomic_dec(&cpu_buffer_a->record_disabled); | 3573 | atomic_dec(&cpu_buffer_a->record_disabled); |
2832 | atomic_dec(&cpu_buffer_b->record_disabled); | 3574 | atomic_dec(&cpu_buffer_b->record_disabled); |
2833 | |||
2834 | ret = 0; | ||
2835 | out: | 3575 | out: |
2836 | return ret; | 3576 | return ret; |
2837 | } | 3577 | } |
2838 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 3578 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
3579 | #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ | ||
2839 | 3580 | ||
2840 | /** | 3581 | /** |
2841 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 3582 | * ring_buffer_alloc_read_page - allocate a page to read from buffer |
@@ -3008,7 +3749,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3008 | read = 0; | 3749 | read = 0; |
3009 | } else { | 3750 | } else { |
3010 | /* update the entry counter */ | 3751 | /* update the entry counter */ |
3011 | cpu_buffer->read += local_read(&reader->entries); | 3752 | cpu_buffer->read += rb_page_entries(reader); |
3012 | 3753 | ||
3013 | /* swap the pages */ | 3754 | /* swap the pages */ |
3014 | rb_init_page(bpage); | 3755 | rb_init_page(bpage); |
@@ -3029,6 +3770,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3029 | } | 3770 | } |
3030 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | 3771 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); |
3031 | 3772 | ||
3773 | #ifdef CONFIG_TRACING | ||
3032 | static ssize_t | 3774 | static ssize_t |
3033 | rb_simple_read(struct file *filp, char __user *ubuf, | 3775 | rb_simple_read(struct file *filp, char __user *ubuf, |
3034 | size_t cnt, loff_t *ppos) | 3776 | size_t cnt, loff_t *ppos) |
@@ -3096,6 +3838,7 @@ static __init int rb_init_debugfs(void) | |||
3096 | } | 3838 | } |
3097 | 3839 | ||
3098 | fs_initcall(rb_init_debugfs); | 3840 | fs_initcall(rb_init_debugfs); |
3841 | #endif | ||
3099 | 3842 | ||
3100 | #ifdef CONFIG_HOTPLUG_CPU | 3843 | #ifdef CONFIG_HOTPLUG_CPU |
3101 | static int rb_cpu_notify(struct notifier_block *self, | 3844 | static int rb_cpu_notify(struct notifier_block *self, |
@@ -3108,7 +3851,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3108 | switch (action) { | 3851 | switch (action) { |
3109 | case CPU_UP_PREPARE: | 3852 | case CPU_UP_PREPARE: |
3110 | case CPU_UP_PREPARE_FROZEN: | 3853 | case CPU_UP_PREPARE_FROZEN: |
3111 | if (cpu_isset(cpu, *buffer->cpumask)) | 3854 | if (cpumask_test_cpu(cpu, buffer->cpumask)) |
3112 | return NOTIFY_OK; | 3855 | return NOTIFY_OK; |
3113 | 3856 | ||
3114 | buffer->buffers[cpu] = | 3857 | buffer->buffers[cpu] = |
@@ -3119,7 +3862,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3119 | return NOTIFY_OK; | 3862 | return NOTIFY_OK; |
3120 | } | 3863 | } |
3121 | smp_wmb(); | 3864 | smp_wmb(); |
3122 | cpu_set(cpu, *buffer->cpumask); | 3865 | cpumask_set_cpu(cpu, buffer->cpumask); |
3123 | break; | 3866 | break; |
3124 | case CPU_DOWN_PREPARE: | 3867 | case CPU_DOWN_PREPARE: |
3125 | case CPU_DOWN_PREPARE_FROZEN: | 3868 | case CPU_DOWN_PREPARE_FROZEN: |