diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 1161 | 
1 files changed, 912 insertions, 249 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 04dac2638258..3ffa502fb243 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c  | |||
| @@ -201,8 +201,6 @@ int tracing_is_on(void) | |||
| 201 | } | 201 | } | 
| 202 | EXPORT_SYMBOL_GPL(tracing_is_on); | 202 | EXPORT_SYMBOL_GPL(tracing_is_on); | 
| 203 | 203 | ||
| 204 | #include "trace.h" | ||
| 205 | |||
| 206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 204 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 
| 207 | #define RB_ALIGNMENT 4U | 205 | #define RB_ALIGNMENT 4U | 
| 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 206 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 
| @@ -218,17 +216,12 @@ enum { | |||
| 218 | 216 | ||
| 219 | static inline int rb_null_event(struct ring_buffer_event *event) | 217 | static inline int rb_null_event(struct ring_buffer_event *event) | 
| 220 | { | 218 | { | 
| 221 | return event->type_len == RINGBUF_TYPE_PADDING | 219 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 
| 222 | && event->time_delta == 0; | ||
| 223 | } | ||
| 224 | |||
| 225 | static inline int rb_discarded_event(struct ring_buffer_event *event) | ||
| 226 | { | ||
| 227 | return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; | ||
| 228 | } | 220 | } | 
| 229 | 221 | ||
| 230 | static void rb_event_set_padding(struct ring_buffer_event *event) | 222 | static void rb_event_set_padding(struct ring_buffer_event *event) | 
| 231 | { | 223 | { | 
| 224 | /* padding has a NULL time_delta */ | ||
| 232 | event->type_len = RINGBUF_TYPE_PADDING; | 225 | event->type_len = RINGBUF_TYPE_PADDING; | 
| 233 | event->time_delta = 0; | 226 | event->time_delta = 0; | 
| 234 | } | 227 | } | 
| @@ -322,6 +315,14 @@ struct buffer_data_page { | |||
| 322 | unsigned char data[]; /* data of buffer page */ | 315 | unsigned char data[]; /* data of buffer page */ | 
| 323 | }; | 316 | }; | 
| 324 | 317 | ||
| 318 | /* | ||
| 319 | * Note, the buffer_page list must be first. The buffer pages | ||
| 320 | * are allocated in cache lines, which means that each buffer | ||
| 321 | * page will be at the beginning of a cache line, and thus | ||
| 322 | * the least significant bits will be zero. We use this to | ||
| 323 | * add flags in the list struct pointers, to make the ring buffer | ||
| 324 | * lockless. | ||
| 325 | */ | ||
| 325 | struct buffer_page { | 326 | struct buffer_page { | 
| 326 | struct list_head list; /* list of buffer pages */ | 327 | struct list_head list; /* list of buffer pages */ | 
| 327 | local_t write; /* index for next write */ | 328 | local_t write; /* index for next write */ | 
| @@ -330,6 +331,21 @@ struct buffer_page { | |||
| 330 | struct buffer_data_page *page; /* Actual data page */ | 331 | struct buffer_data_page *page; /* Actual data page */ | 
| 331 | }; | 332 | }; | 
| 332 | 333 | ||
| 334 | /* | ||
| 335 | * The buffer page counters, write and entries, must be reset | ||
| 336 | * atomically when crossing page boundaries. To synchronize this | ||
| 337 | * update, two counters are inserted into the number. One is | ||
| 338 | * the actual counter for the write position or count on the page. | ||
| 339 | * | ||
| 340 | * The other is a counter of updaters. Before an update happens | ||
| 341 | * the update partition of the counter is incremented. This will | ||
| 342 | * allow the updater to update the counter atomically. | ||
| 343 | * | ||
| 344 | * The counter is 20 bits, and the state data is 12. | ||
| 345 | */ | ||
| 346 | #define RB_WRITE_MASK 0xfffff | ||
| 347 | #define RB_WRITE_INTCNT (1 << 20) | ||
| 348 | |||
| 333 | static void rb_init_page(struct buffer_data_page *bpage) | 349 | static void rb_init_page(struct buffer_data_page *bpage) | 
| 334 | { | 350 | { | 
| 335 | local_set(&bpage->commit, 0); | 351 | local_set(&bpage->commit, 0); | 
| @@ -403,21 +419,20 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
| 403 | struct ring_buffer_per_cpu { | 419 | struct ring_buffer_per_cpu { | 
| 404 | int cpu; | 420 | int cpu; | 
| 405 | struct ring_buffer *buffer; | 421 | struct ring_buffer *buffer; | 
| 406 | spinlock_t reader_lock; /* serialize readers */ | 422 | spinlock_t reader_lock; /* serialize readers */ | 
| 407 | raw_spinlock_t lock; | 423 | raw_spinlock_t lock; | 
| 408 | struct lock_class_key lock_key; | 424 | struct lock_class_key lock_key; | 
| 409 | struct list_head pages; | 425 | struct list_head *pages; | 
| 410 | struct buffer_page *head_page; /* read from head */ | 426 | struct buffer_page *head_page; /* read from head */ | 
| 411 | struct buffer_page *tail_page; /* write to tail */ | 427 | struct buffer_page *tail_page; /* write to tail */ | 
| 412 | struct buffer_page *commit_page; /* committed pages */ | 428 | struct buffer_page *commit_page; /* committed pages */ | 
| 413 | struct buffer_page *reader_page; | 429 | struct buffer_page *reader_page; | 
| 414 | unsigned long nmi_dropped; | 430 | local_t commit_overrun; | 
| 415 | unsigned long commit_overrun; | 431 | local_t overrun; | 
| 416 | unsigned long overrun; | ||
| 417 | unsigned long read; | ||
| 418 | local_t entries; | 432 | local_t entries; | 
| 419 | local_t committing; | 433 | local_t committing; | 
| 420 | local_t commits; | 434 | local_t commits; | 
| 435 | unsigned long read; | ||
| 421 | u64 write_stamp; | 436 | u64 write_stamp; | 
| 422 | u64 read_stamp; | 437 | u64 read_stamp; | 
| 423 | atomic_t record_disabled; | 438 | atomic_t record_disabled; | 
| @@ -450,20 +465,25 @@ struct ring_buffer_iter { | |||
| 450 | }; | 465 | }; | 
| 451 | 466 | ||
| 452 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ | 467 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ | 
| 453 | #define RB_WARN_ON(buffer, cond) \ | 468 | #define RB_WARN_ON(b, cond) \ | 
| 454 | ({ \ | 469 | ({ \ | 
| 455 | int _____ret = unlikely(cond); \ | 470 | int _____ret = unlikely(cond); \ | 
| 456 | if (_____ret) { \ | 471 | if (_____ret) { \ | 
| 457 | atomic_inc(&buffer->record_disabled); \ | 472 | if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ | 
| 458 | WARN_ON(1); \ | 473 | struct ring_buffer_per_cpu *__b = \ | 
| 459 | } \ | 474 | (void *)b; \ | 
| 460 | _____ret; \ | 475 | atomic_inc(&__b->buffer->record_disabled); \ | 
| 476 | } else \ | ||
| 477 | atomic_inc(&b->record_disabled); \ | ||
| 478 | WARN_ON(1); \ | ||
| 479 | } \ | ||
| 480 | _____ret; \ | ||
| 461 | }) | 481 | }) | 
| 462 | 482 | ||
| 463 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 483 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 
| 464 | #define DEBUG_SHIFT 0 | 484 | #define DEBUG_SHIFT 0 | 
| 465 | 485 | ||
| 466 | static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) | 486 | static inline u64 rb_time_stamp(struct ring_buffer *buffer) | 
| 467 | { | 487 | { | 
| 468 | /* shift to debug/test normalization and TIME_EXTENTS */ | 488 | /* shift to debug/test normalization and TIME_EXTENTS */ | 
| 469 | return buffer->clock() << DEBUG_SHIFT; | 489 | return buffer->clock() << DEBUG_SHIFT; | 
| @@ -474,7 +494,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | |||
| 474 | u64 time; | 494 | u64 time; | 
| 475 | 495 | ||
| 476 | preempt_disable_notrace(); | 496 | preempt_disable_notrace(); | 
| 477 | time = rb_time_stamp(buffer, cpu); | 497 | time = rb_time_stamp(buffer); | 
| 478 | preempt_enable_no_resched_notrace(); | 498 | preempt_enable_no_resched_notrace(); | 
| 479 | 499 | ||
| 480 | return time; | 500 | return time; | 
| @@ -489,6 +509,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | |||
| 489 | } | 509 | } | 
| 490 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | 510 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | 
| 491 | 511 | ||
| 512 | /* | ||
| 513 | * Making the ring buffer lockless makes things tricky. | ||
| 514 | * Although writes only happen on the CPU that they are on, | ||
| 515 | * and they only need to worry about interrupts. Reads can | ||
| 516 | * happen on any CPU. | ||
| 517 | * | ||
| 518 | * The reader page is always off the ring buffer, but when the | ||
| 519 | * reader finishes with a page, it needs to swap its page with | ||
| 520 | * a new one from the buffer. The reader needs to take from | ||
| 521 | * the head (writes go to the tail). But if a writer is in overwrite | ||
| 522 | * mode and wraps, it must push the head page forward. | ||
| 523 | * | ||
| 524 | * Here lies the problem. | ||
| 525 | * | ||
| 526 | * The reader must be careful to replace only the head page, and | ||
| 527 | * not another one. As described at the top of the file in the | ||
| 528 | * ASCII art, the reader sets its old page to point to the next | ||
| 529 | * page after head. It then sets the page after head to point to | ||
| 530 | * the old reader page. But if the writer moves the head page | ||
| 531 | * during this operation, the reader could end up with the tail. | ||
| 532 | * | ||
| 533 | * We use cmpxchg to help prevent this race. We also do something | ||
| 534 | * special with the page before head. We set the LSB to 1. | ||
| 535 | * | ||
| 536 | * When the writer must push the page forward, it will clear the | ||
| 537 | * bit that points to the head page, move the head, and then set | ||
| 538 | * the bit that points to the new head page. | ||
| 539 | * | ||
| 540 | * We also don't want an interrupt coming in and moving the head | ||
| 541 | * page on another writer. Thus we use the second LSB to catch | ||
| 542 | * that too. Thus: | ||
| 543 | * | ||
| 544 | * head->list->prev->next bit 1 bit 0 | ||
| 545 | * ------- ------- | ||
| 546 | * Normal page 0 0 | ||
| 547 | * Points to head page 0 1 | ||
| 548 | * New head page 1 0 | ||
| 549 | * | ||
| 550 | * Note we can not trust the prev pointer of the head page, because: | ||
| 551 | * | ||
| 552 | * +----+ +-----+ +-----+ | ||
| 553 | * | |------>| T |---X--->| N | | ||
| 554 | * | |<------| | | | | ||
| 555 | * +----+ +-----+ +-----+ | ||
| 556 | * ^ ^ | | ||
| 557 | * | +-----+ | | | ||
| 558 | * +----------| R |----------+ | | ||
| 559 | * | |<-----------+ | ||
| 560 | * +-----+ | ||
| 561 | * | ||
| 562 | * Key: ---X--> HEAD flag set in pointer | ||
| 563 | * T Tail page | ||
| 564 | * R Reader page | ||
| 565 | * N Next page | ||
| 566 | * | ||
| 567 | * (see __rb_reserve_next() to see where this happens) | ||
| 568 | * | ||
| 569 | * What the above shows is that the reader just swapped out | ||
| 570 | * the reader page with a page in the buffer, but before it | ||
| 571 | * could make the new header point back to the new page added | ||
| 572 | * it was preempted by a writer. The writer moved forward onto | ||
| 573 | * the new page added by the reader and is about to move forward | ||
| 574 | * again. | ||
| 575 | * | ||
| 576 | * You can see, it is legitimate for the previous pointer of | ||
| 577 | * the head (or any page) not to point back to itself. But only | ||
| 578 | * temporarially. | ||
| 579 | */ | ||
| 580 | |||
| 581 | #define RB_PAGE_NORMAL 0UL | ||
| 582 | #define RB_PAGE_HEAD 1UL | ||
| 583 | #define RB_PAGE_UPDATE 2UL | ||
| 584 | |||
| 585 | |||
| 586 | #define RB_FLAG_MASK 3UL | ||
| 587 | |||
| 588 | /* PAGE_MOVED is not part of the mask */ | ||
| 589 | #define RB_PAGE_MOVED 4UL | ||
| 590 | |||
| 591 | /* | ||
| 592 | * rb_list_head - remove any bit | ||
| 593 | */ | ||
| 594 | static struct list_head *rb_list_head(struct list_head *list) | ||
| 595 | { | ||
| 596 | unsigned long val = (unsigned long)list; | ||
| 597 | |||
| 598 | return (struct list_head *)(val & ~RB_FLAG_MASK); | ||
| 599 | } | ||
| 600 | |||
| 601 | /* | ||
| 602 | * rb_is_head_page - test if the given page is the head page | ||
| 603 | * | ||
| 604 | * Because the reader may move the head_page pointer, we can | ||
| 605 | * not trust what the head page is (it may be pointing to | ||
| 606 | * the reader page). But if the next page is a header page, | ||
| 607 | * its flags will be non zero. | ||
| 608 | */ | ||
| 609 | static int inline | ||
| 610 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 611 | struct buffer_page *page, struct list_head *list) | ||
| 612 | { | ||
| 613 | unsigned long val; | ||
| 614 | |||
| 615 | val = (unsigned long)list->next; | ||
| 616 | |||
| 617 | if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) | ||
| 618 | return RB_PAGE_MOVED; | ||
| 619 | |||
| 620 | return val & RB_FLAG_MASK; | ||
| 621 | } | ||
| 622 | |||
| 623 | /* | ||
| 624 | * rb_is_reader_page | ||
| 625 | * | ||
| 626 | * The unique thing about the reader page, is that, if the | ||
| 627 | * writer is ever on it, the previous pointer never points | ||
| 628 | * back to the reader page. | ||
| 629 | */ | ||
| 630 | static int rb_is_reader_page(struct buffer_page *page) | ||
| 631 | { | ||
| 632 | struct list_head *list = page->list.prev; | ||
| 633 | |||
| 634 | return rb_list_head(list->next) != &page->list; | ||
| 635 | } | ||
| 636 | |||
| 637 | /* | ||
| 638 | * rb_set_list_to_head - set a list_head to be pointing to head. | ||
| 639 | */ | ||
| 640 | static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 641 | struct list_head *list) | ||
| 642 | { | ||
| 643 | unsigned long *ptr; | ||
| 644 | |||
| 645 | ptr = (unsigned long *)&list->next; | ||
| 646 | *ptr |= RB_PAGE_HEAD; | ||
| 647 | *ptr &= ~RB_PAGE_UPDATE; | ||
| 648 | } | ||
| 649 | |||
| 650 | /* | ||
| 651 | * rb_head_page_activate - sets up head page | ||
| 652 | */ | ||
| 653 | static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) | ||
| 654 | { | ||
| 655 | struct buffer_page *head; | ||
| 656 | |||
| 657 | head = cpu_buffer->head_page; | ||
| 658 | if (!head) | ||
| 659 | return; | ||
| 660 | |||
| 661 | /* | ||
| 662 | * Set the previous list pointer to have the HEAD flag. | ||
| 663 | */ | ||
| 664 | rb_set_list_to_head(cpu_buffer, head->list.prev); | ||
| 665 | } | ||
| 666 | |||
| 667 | static void rb_list_head_clear(struct list_head *list) | ||
| 668 | { | ||
| 669 | unsigned long *ptr = (unsigned long *)&list->next; | ||
| 670 | |||
| 671 | *ptr &= ~RB_FLAG_MASK; | ||
| 672 | } | ||
| 673 | |||
| 674 | /* | ||
| 675 | * rb_head_page_dactivate - clears head page ptr (for free list) | ||
| 676 | */ | ||
| 677 | static void | ||
| 678 | rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) | ||
| 679 | { | ||
| 680 | struct list_head *hd; | ||
| 681 | |||
| 682 | /* Go through the whole list and clear any pointers found. */ | ||
| 683 | rb_list_head_clear(cpu_buffer->pages); | ||
| 684 | |||
| 685 | list_for_each(hd, cpu_buffer->pages) | ||
| 686 | rb_list_head_clear(hd); | ||
| 687 | } | ||
| 688 | |||
| 689 | static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 690 | struct buffer_page *head, | ||
| 691 | struct buffer_page *prev, | ||
| 692 | int old_flag, int new_flag) | ||
| 693 | { | ||
| 694 | struct list_head *list; | ||
| 695 | unsigned long val = (unsigned long)&head->list; | ||
| 696 | unsigned long ret; | ||
| 697 | |||
| 698 | list = &prev->list; | ||
| 699 | |||
| 700 | val &= ~RB_FLAG_MASK; | ||
| 701 | |||
| 702 | ret = cmpxchg((unsigned long *)&list->next, | ||
| 703 | val | old_flag, val | new_flag); | ||
| 704 | |||
| 705 | /* check if the reader took the page */ | ||
| 706 | if ((ret & ~RB_FLAG_MASK) != val) | ||
| 707 | return RB_PAGE_MOVED; | ||
| 708 | |||
| 709 | return ret & RB_FLAG_MASK; | ||
| 710 | } | ||
| 711 | |||
| 712 | static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 713 | struct buffer_page *head, | ||
| 714 | struct buffer_page *prev, | ||
| 715 | int old_flag) | ||
| 716 | { | ||
| 717 | return rb_head_page_set(cpu_buffer, head, prev, | ||
| 718 | old_flag, RB_PAGE_UPDATE); | ||
| 719 | } | ||
| 720 | |||
| 721 | static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 722 | struct buffer_page *head, | ||
| 723 | struct buffer_page *prev, | ||
| 724 | int old_flag) | ||
| 725 | { | ||
| 726 | return rb_head_page_set(cpu_buffer, head, prev, | ||
| 727 | old_flag, RB_PAGE_HEAD); | ||
| 728 | } | ||
| 729 | |||
| 730 | static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 731 | struct buffer_page *head, | ||
| 732 | struct buffer_page *prev, | ||
| 733 | int old_flag) | ||
| 734 | { | ||
| 735 | return rb_head_page_set(cpu_buffer, head, prev, | ||
| 736 | old_flag, RB_PAGE_NORMAL); | ||
| 737 | } | ||
| 738 | |||
| 739 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 740 | struct buffer_page **bpage) | ||
| 741 | { | ||
| 742 | struct list_head *p = rb_list_head((*bpage)->list.next); | ||
| 743 | |||
| 744 | *bpage = list_entry(p, struct buffer_page, list); | ||
| 745 | } | ||
| 746 | |||
| 747 | static struct buffer_page * | ||
| 748 | rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) | ||
| 749 | { | ||
| 750 | struct buffer_page *head; | ||
| 751 | struct buffer_page *page; | ||
| 752 | struct list_head *list; | ||
| 753 | int i; | ||
| 754 | |||
| 755 | if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) | ||
| 756 | return NULL; | ||
| 757 | |||
| 758 | /* sanity check */ | ||
| 759 | list = cpu_buffer->pages; | ||
| 760 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) | ||
| 761 | return NULL; | ||
| 762 | |||
| 763 | page = head = cpu_buffer->head_page; | ||
| 764 | /* | ||
| 765 | * It is possible that the writer moves the header behind | ||
| 766 | * where we started, and we miss in one loop. | ||
| 767 | * A second loop should grab the header, but we'll do | ||
| 768 | * three loops just because I'm paranoid. | ||
| 769 | */ | ||
| 770 | for (i = 0; i < 3; i++) { | ||
| 771 | do { | ||
| 772 | if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { | ||
| 773 | cpu_buffer->head_page = page; | ||
| 774 | return page; | ||
| 775 | } | ||
| 776 | rb_inc_page(cpu_buffer, &page); | ||
| 777 | } while (page != head); | ||
| 778 | } | ||
| 779 | |||
| 780 | RB_WARN_ON(cpu_buffer, 1); | ||
| 781 | |||
| 782 | return NULL; | ||
| 783 | } | ||
| 784 | |||
| 785 | static int rb_head_page_replace(struct buffer_page *old, | ||
| 786 | struct buffer_page *new) | ||
| 787 | { | ||
| 788 | unsigned long *ptr = (unsigned long *)&old->list.prev->next; | ||
| 789 | unsigned long val; | ||
| 790 | unsigned long ret; | ||
| 791 | |||
| 792 | val = *ptr & ~RB_FLAG_MASK; | ||
| 793 | val |= RB_PAGE_HEAD; | ||
| 794 | |||
| 795 | ret = cmpxchg(ptr, val, (unsigned long)&new->list); | ||
| 796 | |||
| 797 | return ret == val; | ||
| 798 | } | ||
| 799 | |||
| 800 | /* | ||
| 801 | * rb_tail_page_update - move the tail page forward | ||
| 802 | * | ||
| 803 | * Returns 1 if moved tail page, 0 if someone else did. | ||
| 804 | */ | ||
| 805 | static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 806 | struct buffer_page *tail_page, | ||
| 807 | struct buffer_page *next_page) | ||
| 808 | { | ||
| 809 | struct buffer_page *old_tail; | ||
| 810 | unsigned long old_entries; | ||
| 811 | unsigned long old_write; | ||
| 812 | int ret = 0; | ||
| 813 | |||
| 814 | /* | ||
| 815 | * The tail page now needs to be moved forward. | ||
| 816 | * | ||
| 817 | * We need to reset the tail page, but without messing | ||
| 818 | * with possible erasing of data brought in by interrupts | ||
| 819 | * that have moved the tail page and are currently on it. | ||
| 820 | * | ||
| 821 | * We add a counter to the write field to denote this. | ||
| 822 | */ | ||
| 823 | old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); | ||
| 824 | old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); | ||
| 825 | |||
| 826 | /* | ||
| 827 | * Just make sure we have seen our old_write and synchronize | ||
| 828 | * with any interrupts that come in. | ||
| 829 | */ | ||
| 830 | barrier(); | ||
| 831 | |||
| 832 | /* | ||
| 833 | * If the tail page is still the same as what we think | ||
| 834 | * it is, then it is up to us to update the tail | ||
| 835 | * pointer. | ||
| 836 | */ | ||
| 837 | if (tail_page == cpu_buffer->tail_page) { | ||
| 838 | /* Zero the write counter */ | ||
| 839 | unsigned long val = old_write & ~RB_WRITE_MASK; | ||
| 840 | unsigned long eval = old_entries & ~RB_WRITE_MASK; | ||
| 841 | |||
| 842 | /* | ||
| 843 | * This will only succeed if an interrupt did | ||
| 844 | * not come in and change it. In which case, we | ||
| 845 | * do not want to modify it. | ||
| 846 | * | ||
| 847 | * We add (void) to let the compiler know that we do not care | ||
| 848 | * about the return value of these functions. We use the | ||
| 849 | * cmpxchg to only update if an interrupt did not already | ||
| 850 | * do it for us. If the cmpxchg fails, we don't care. | ||
| 851 | */ | ||
| 852 | (void)local_cmpxchg(&next_page->write, old_write, val); | ||
| 853 | (void)local_cmpxchg(&next_page->entries, old_entries, eval); | ||
| 854 | |||
| 855 | /* | ||
| 856 | * No need to worry about races with clearing out the commit. | ||
| 857 | * it only can increment when a commit takes place. But that | ||
| 858 | * only happens in the outer most nested commit. | ||
| 859 | */ | ||
| 860 | local_set(&next_page->page->commit, 0); | ||
| 861 | |||
| 862 | old_tail = cmpxchg(&cpu_buffer->tail_page, | ||
| 863 | tail_page, next_page); | ||
| 864 | |||
| 865 | if (old_tail == tail_page) | ||
| 866 | ret = 1; | ||
| 867 | } | ||
| 868 | |||
| 869 | return ret; | ||
| 870 | } | ||
| 871 | |||
| 872 | static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 873 | struct buffer_page *bpage) | ||
| 874 | { | ||
| 875 | unsigned long val = (unsigned long)bpage; | ||
| 876 | |||
| 877 | if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK)) | ||
| 878 | return 1; | ||
| 879 | |||
| 880 | return 0; | ||
| 881 | } | ||
| 882 | |||
| 883 | /** | ||
| 884 | * rb_check_list - make sure a pointer to a list has the last bits zero | ||
| 885 | */ | ||
| 886 | static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 887 | struct list_head *list) | ||
| 888 | { | ||
| 889 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev)) | ||
| 890 | return 1; | ||
| 891 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next)) | ||
| 892 | return 1; | ||
| 893 | return 0; | ||
| 894 | } | ||
| 895 | |||
| 492 | /** | 896 | /** | 
| 493 | * check_pages - integrity check of buffer pages | 897 | * check_pages - integrity check of buffer pages | 
| 494 | * @cpu_buffer: CPU buffer with pages to test | 898 | * @cpu_buffer: CPU buffer with pages to test | 
| @@ -498,14 +902,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | |||
| 498 | */ | 902 | */ | 
| 499 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 903 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 
| 500 | { | 904 | { | 
| 501 | struct list_head *head = &cpu_buffer->pages; | 905 | struct list_head *head = cpu_buffer->pages; | 
| 502 | struct buffer_page *bpage, *tmp; | 906 | struct buffer_page *bpage, *tmp; | 
| 503 | 907 | ||
| 908 | rb_head_page_deactivate(cpu_buffer); | ||
| 909 | |||
| 504 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) | 910 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) | 
| 505 | return -1; | 911 | return -1; | 
| 506 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) | 912 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) | 
| 507 | return -1; | 913 | return -1; | 
| 508 | 914 | ||
| 915 | if (rb_check_list(cpu_buffer, head)) | ||
| 916 | return -1; | ||
| 917 | |||
| 509 | list_for_each_entry_safe(bpage, tmp, head, list) { | 918 | list_for_each_entry_safe(bpage, tmp, head, list) { | 
| 510 | if (RB_WARN_ON(cpu_buffer, | 919 | if (RB_WARN_ON(cpu_buffer, | 
| 511 | bpage->list.next->prev != &bpage->list)) | 920 | bpage->list.next->prev != &bpage->list)) | 
| @@ -513,25 +922,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 513 | if (RB_WARN_ON(cpu_buffer, | 922 | if (RB_WARN_ON(cpu_buffer, | 
| 514 | bpage->list.prev->next != &bpage->list)) | 923 | bpage->list.prev->next != &bpage->list)) | 
| 515 | return -1; | 924 | return -1; | 
| 925 | if (rb_check_list(cpu_buffer, &bpage->list)) | ||
| 926 | return -1; | ||
| 516 | } | 927 | } | 
| 517 | 928 | ||
| 929 | rb_head_page_activate(cpu_buffer); | ||
| 930 | |||
| 518 | return 0; | 931 | return 0; | 
| 519 | } | 932 | } | 
| 520 | 933 | ||
| 521 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | 934 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | 
| 522 | unsigned nr_pages) | 935 | unsigned nr_pages) | 
| 523 | { | 936 | { | 
| 524 | struct list_head *head = &cpu_buffer->pages; | ||
| 525 | struct buffer_page *bpage, *tmp; | 937 | struct buffer_page *bpage, *tmp; | 
| 526 | unsigned long addr; | 938 | unsigned long addr; | 
| 527 | LIST_HEAD(pages); | 939 | LIST_HEAD(pages); | 
| 528 | unsigned i; | 940 | unsigned i; | 
| 529 | 941 | ||
| 942 | WARN_ON(!nr_pages); | ||
| 943 | |||
| 530 | for (i = 0; i < nr_pages; i++) { | 944 | for (i = 0; i < nr_pages; i++) { | 
| 531 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 945 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 
| 532 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 946 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 
| 533 | if (!bpage) | 947 | if (!bpage) | 
| 534 | goto free_pages; | 948 | goto free_pages; | 
| 949 | |||
| 950 | rb_check_bpage(cpu_buffer, bpage); | ||
| 951 | |||
| 535 | list_add(&bpage->list, &pages); | 952 | list_add(&bpage->list, &pages); | 
| 536 | 953 | ||
| 537 | addr = __get_free_page(GFP_KERNEL); | 954 | addr = __get_free_page(GFP_KERNEL); | 
| @@ -541,7 +958,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 541 | rb_init_page(bpage->page); | 958 | rb_init_page(bpage->page); | 
| 542 | } | 959 | } | 
| 543 | 960 | ||
| 544 | list_splice(&pages, head); | 961 | /* | 
| 962 | * The ring buffer page list is a circular list that does not | ||
| 963 | * start and end with a list head. All page list items point to | ||
| 964 | * other pages. | ||
| 965 | */ | ||
| 966 | cpu_buffer->pages = pages.next; | ||
| 967 | list_del(&pages); | ||
| 545 | 968 | ||
| 546 | rb_check_pages(cpu_buffer); | 969 | rb_check_pages(cpu_buffer); | 
| 547 | 970 | ||
| @@ -573,13 +996,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
| 573 | spin_lock_init(&cpu_buffer->reader_lock); | 996 | spin_lock_init(&cpu_buffer->reader_lock); | 
| 574 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | 997 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | 
| 575 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 998 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 
| 576 | INIT_LIST_HEAD(&cpu_buffer->pages); | ||
| 577 | 999 | ||
| 578 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1000 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 
| 579 | GFP_KERNEL, cpu_to_node(cpu)); | 1001 | GFP_KERNEL, cpu_to_node(cpu)); | 
| 580 | if (!bpage) | 1002 | if (!bpage) | 
| 581 | goto fail_free_buffer; | 1003 | goto fail_free_buffer; | 
| 582 | 1004 | ||
| 1005 | rb_check_bpage(cpu_buffer, bpage); | ||
| 1006 | |||
| 583 | cpu_buffer->reader_page = bpage; | 1007 | cpu_buffer->reader_page = bpage; | 
| 584 | addr = __get_free_page(GFP_KERNEL); | 1008 | addr = __get_free_page(GFP_KERNEL); | 
| 585 | if (!addr) | 1009 | if (!addr) | 
| @@ -594,9 +1018,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
| 594 | goto fail_free_reader; | 1018 | goto fail_free_reader; | 
| 595 | 1019 | ||
| 596 | cpu_buffer->head_page | 1020 | cpu_buffer->head_page | 
| 597 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 1021 | = list_entry(cpu_buffer->pages, struct buffer_page, list); | 
| 598 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; | 1022 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; | 
| 599 | 1023 | ||
| 1024 | rb_head_page_activate(cpu_buffer); | ||
| 1025 | |||
| 600 | return cpu_buffer; | 1026 | return cpu_buffer; | 
| 601 | 1027 | ||
| 602 | fail_free_reader: | 1028 | fail_free_reader: | 
| @@ -609,15 +1035,22 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
| 609 | 1035 | ||
| 610 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | 1036 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | 
| 611 | { | 1037 | { | 
| 612 | struct list_head *head = &cpu_buffer->pages; | 1038 | struct list_head *head = cpu_buffer->pages; | 
| 613 | struct buffer_page *bpage, *tmp; | 1039 | struct buffer_page *bpage, *tmp; | 
| 614 | 1040 | ||
| 615 | free_buffer_page(cpu_buffer->reader_page); | 1041 | free_buffer_page(cpu_buffer->reader_page); | 
| 616 | 1042 | ||
| 617 | list_for_each_entry_safe(bpage, tmp, head, list) { | 1043 | rb_head_page_deactivate(cpu_buffer); | 
| 618 | list_del_init(&bpage->list); | 1044 | |
| 1045 | if (head) { | ||
| 1046 | list_for_each_entry_safe(bpage, tmp, head, list) { | ||
| 1047 | list_del_init(&bpage->list); | ||
| 1048 | free_buffer_page(bpage); | ||
| 1049 | } | ||
| 1050 | bpage = list_entry(head, struct buffer_page, list); | ||
| 619 | free_buffer_page(bpage); | 1051 | free_buffer_page(bpage); | 
| 620 | } | 1052 | } | 
| 1053 | |||
| 621 | kfree(cpu_buffer); | 1054 | kfree(cpu_buffer); | 
| 622 | } | 1055 | } | 
| 623 | 1056 | ||
| @@ -735,6 +1168,7 @@ ring_buffer_free(struct ring_buffer *buffer) | |||
| 735 | 1168 | ||
| 736 | put_online_cpus(); | 1169 | put_online_cpus(); | 
| 737 | 1170 | ||
| 1171 | kfree(buffer->buffers); | ||
| 738 | free_cpumask_var(buffer->cpumask); | 1172 | free_cpumask_var(buffer->cpumask); | 
| 739 | 1173 | ||
| 740 | kfree(buffer); | 1174 | kfree(buffer); | 
| @@ -759,15 +1193,17 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
| 759 | atomic_inc(&cpu_buffer->record_disabled); | 1193 | atomic_inc(&cpu_buffer->record_disabled); | 
| 760 | synchronize_sched(); | 1194 | synchronize_sched(); | 
| 761 | 1195 | ||
| 1196 | rb_head_page_deactivate(cpu_buffer); | ||
| 1197 | |||
| 762 | for (i = 0; i < nr_pages; i++) { | 1198 | for (i = 0; i < nr_pages; i++) { | 
| 763 | if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) | 1199 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 
| 764 | return; | 1200 | return; | 
| 765 | p = cpu_buffer->pages.next; | 1201 | p = cpu_buffer->pages->next; | 
| 766 | bpage = list_entry(p, struct buffer_page, list); | 1202 | bpage = list_entry(p, struct buffer_page, list); | 
| 767 | list_del_init(&bpage->list); | 1203 | list_del_init(&bpage->list); | 
| 768 | free_buffer_page(bpage); | 1204 | free_buffer_page(bpage); | 
| 769 | } | 1205 | } | 
| 770 | if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) | 1206 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) | 
| 771 | return; | 1207 | return; | 
| 772 | 1208 | ||
| 773 | rb_reset_cpu(cpu_buffer); | 1209 | rb_reset_cpu(cpu_buffer); | 
| @@ -789,15 +1225,19 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 789 | atomic_inc(&cpu_buffer->record_disabled); | 1225 | atomic_inc(&cpu_buffer->record_disabled); | 
| 790 | synchronize_sched(); | 1226 | synchronize_sched(); | 
| 791 | 1227 | ||
| 1228 | spin_lock_irq(&cpu_buffer->reader_lock); | ||
| 1229 | rb_head_page_deactivate(cpu_buffer); | ||
| 1230 | |||
| 792 | for (i = 0; i < nr_pages; i++) { | 1231 | for (i = 0; i < nr_pages; i++) { | 
| 793 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 1232 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 
| 794 | return; | 1233 | return; | 
| 795 | p = pages->next; | 1234 | p = pages->next; | 
| 796 | bpage = list_entry(p, struct buffer_page, list); | 1235 | bpage = list_entry(p, struct buffer_page, list); | 
| 797 | list_del_init(&bpage->list); | 1236 | list_del_init(&bpage->list); | 
| 798 | list_add_tail(&bpage->list, &cpu_buffer->pages); | 1237 | list_add_tail(&bpage->list, cpu_buffer->pages); | 
| 799 | } | 1238 | } | 
| 800 | rb_reset_cpu(cpu_buffer); | 1239 | rb_reset_cpu(cpu_buffer); | 
| 1240 | spin_unlock_irq(&cpu_buffer->reader_lock); | ||
| 801 | 1241 | ||
| 802 | rb_check_pages(cpu_buffer); | 1242 | rb_check_pages(cpu_buffer); | 
| 803 | 1243 | ||
| @@ -948,21 +1388,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 948 | } | 1388 | } | 
| 949 | 1389 | ||
| 950 | static inline struct ring_buffer_event * | 1390 | static inline struct ring_buffer_event * | 
| 951 | rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) | ||
| 952 | { | ||
| 953 | return __rb_page_index(cpu_buffer->head_page, | ||
| 954 | cpu_buffer->head_page->read); | ||
| 955 | } | ||
| 956 | |||
| 957 | static inline struct ring_buffer_event * | ||
| 958 | rb_iter_head_event(struct ring_buffer_iter *iter) | 1391 | rb_iter_head_event(struct ring_buffer_iter *iter) | 
| 959 | { | 1392 | { | 
| 960 | return __rb_page_index(iter->head_page, iter->head); | 1393 | return __rb_page_index(iter->head_page, iter->head); | 
| 961 | } | 1394 | } | 
| 962 | 1395 | ||
| 963 | static inline unsigned rb_page_write(struct buffer_page *bpage) | 1396 | static inline unsigned long rb_page_write(struct buffer_page *bpage) | 
| 964 | { | 1397 | { | 
| 965 | return local_read(&bpage->write); | 1398 | return local_read(&bpage->write) & RB_WRITE_MASK; | 
| 966 | } | 1399 | } | 
| 967 | 1400 | ||
| 968 | static inline unsigned rb_page_commit(struct buffer_page *bpage) | 1401 | static inline unsigned rb_page_commit(struct buffer_page *bpage) | 
| @@ -970,6 +1403,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage) | |||
| 970 | return local_read(&bpage->page->commit); | 1403 | return local_read(&bpage->page->commit); | 
| 971 | } | 1404 | } | 
| 972 | 1405 | ||
| 1406 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) | ||
| 1407 | { | ||
| 1408 | return local_read(&bpage->entries) & RB_WRITE_MASK; | ||
| 1409 | } | ||
| 1410 | |||
| 973 | /* Size is determined by what has been commited */ | 1411 | /* Size is determined by what has been commited */ | 
| 974 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 1412 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 
| 975 | { | 1413 | { | 
| @@ -982,22 +1420,6 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 982 | return rb_page_commit(cpu_buffer->commit_page); | 1420 | return rb_page_commit(cpu_buffer->commit_page); | 
| 983 | } | 1421 | } | 
| 984 | 1422 | ||
| 985 | static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) | ||
| 986 | { | ||
| 987 | return rb_page_commit(cpu_buffer->head_page); | ||
| 988 | } | ||
| 989 | |||
| 990 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 991 | struct buffer_page **bpage) | ||
| 992 | { | ||
| 993 | struct list_head *p = (*bpage)->list.next; | ||
| 994 | |||
| 995 | if (p == &cpu_buffer->pages) | ||
| 996 | p = p->next; | ||
| 997 | |||
| 998 | *bpage = list_entry(p, struct buffer_page, list); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | static inline unsigned | 1423 | static inline unsigned | 
| 1002 | rb_event_index(struct ring_buffer_event *event) | 1424 | rb_event_index(struct ring_buffer_event *event) | 
| 1003 | { | 1425 | { | 
| @@ -1023,6 +1445,8 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1023 | static void | 1445 | static void | 
| 1024 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1446 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 
| 1025 | { | 1447 | { | 
| 1448 | unsigned long max_count; | ||
| 1449 | |||
| 1026 | /* | 1450 | /* | 
| 1027 | * We only race with interrupts and NMIs on this CPU. | 1451 | * We only race with interrupts and NMIs on this CPU. | 
| 1028 | * If we own the commit event, then we can commit | 1452 | * If we own the commit event, then we can commit | 
| @@ -1032,9 +1456,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1032 | * assign the commit to the tail. | 1456 | * assign the commit to the tail. | 
| 1033 | */ | 1457 | */ | 
| 1034 | again: | 1458 | again: | 
| 1459 | max_count = cpu_buffer->buffer->pages * 100; | ||
| 1460 | |||
| 1035 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { | 1461 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { | 
| 1036 | cpu_buffer->commit_page->page->commit = | 1462 | if (RB_WARN_ON(cpu_buffer, !(--max_count))) | 
| 1037 | cpu_buffer->commit_page->write; | 1463 | return; | 
| 1464 | if (RB_WARN_ON(cpu_buffer, | ||
| 1465 | rb_is_reader_page(cpu_buffer->tail_page))) | ||
| 1466 | return; | ||
| 1467 | local_set(&cpu_buffer->commit_page->page->commit, | ||
| 1468 | rb_page_write(cpu_buffer->commit_page)); | ||
| 1038 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | 1469 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | 
| 1039 | cpu_buffer->write_stamp = | 1470 | cpu_buffer->write_stamp = | 
| 1040 | cpu_buffer->commit_page->page->time_stamp; | 1471 | cpu_buffer->commit_page->page->time_stamp; | 
| @@ -1043,8 +1474,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1043 | } | 1474 | } | 
| 1044 | while (rb_commit_index(cpu_buffer) != | 1475 | while (rb_commit_index(cpu_buffer) != | 
| 1045 | rb_page_write(cpu_buffer->commit_page)) { | 1476 | rb_page_write(cpu_buffer->commit_page)) { | 
| 1046 | cpu_buffer->commit_page->page->commit = | 1477 | |
| 1047 | cpu_buffer->commit_page->write; | 1478 | local_set(&cpu_buffer->commit_page->page->commit, | 
| 1479 | rb_page_write(cpu_buffer->commit_page)); | ||
| 1480 | RB_WARN_ON(cpu_buffer, | ||
| 1481 | local_read(&cpu_buffer->commit_page->page->commit) & | ||
| 1482 | ~RB_WRITE_MASK); | ||
| 1048 | barrier(); | 1483 | barrier(); | 
| 1049 | } | 1484 | } | 
| 1050 | 1485 | ||
| @@ -1077,7 +1512,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
| 1077 | * to the head page instead of next. | 1512 | * to the head page instead of next. | 
| 1078 | */ | 1513 | */ | 
| 1079 | if (iter->head_page == cpu_buffer->reader_page) | 1514 | if (iter->head_page == cpu_buffer->reader_page) | 
| 1080 | iter->head_page = cpu_buffer->head_page; | 1515 | iter->head_page = rb_set_head_page(cpu_buffer); | 
| 1081 | else | 1516 | else | 
| 1082 | rb_inc_page(cpu_buffer, &iter->head_page); | 1517 | rb_inc_page(cpu_buffer, &iter->head_page); | 
| 1083 | 1518 | ||
| @@ -1121,6 +1556,163 @@ rb_update_event(struct ring_buffer_event *event, | |||
| 1121 | } | 1556 | } | 
| 1122 | } | 1557 | } | 
| 1123 | 1558 | ||
| 1559 | /* | ||
| 1560 | * rb_handle_head_page - writer hit the head page | ||
| 1561 | * | ||
| 1562 | * Returns: +1 to retry page | ||
| 1563 | * 0 to continue | ||
| 1564 | * -1 on error | ||
| 1565 | */ | ||
| 1566 | static int | ||
| 1567 | rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 1568 | struct buffer_page *tail_page, | ||
| 1569 | struct buffer_page *next_page) | ||
| 1570 | { | ||
| 1571 | struct buffer_page *new_head; | ||
| 1572 | int entries; | ||
| 1573 | int type; | ||
| 1574 | int ret; | ||
| 1575 | |||
| 1576 | entries = rb_page_entries(next_page); | ||
| 1577 | |||
| 1578 | /* | ||
| 1579 | * The hard part is here. We need to move the head | ||
| 1580 | * forward, and protect against both readers on | ||
| 1581 | * other CPUs and writers coming in via interrupts. | ||
| 1582 | */ | ||
| 1583 | type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, | ||
| 1584 | RB_PAGE_HEAD); | ||
| 1585 | |||
| 1586 | /* | ||
| 1587 | * type can be one of four: | ||
| 1588 | * NORMAL - an interrupt already moved it for us | ||
| 1589 | * HEAD - we are the first to get here. | ||
| 1590 | * UPDATE - we are the interrupt interrupting | ||
| 1591 | * a current move. | ||
| 1592 | * MOVED - a reader on another CPU moved the next | ||
| 1593 | * pointer to its reader page. Give up | ||
| 1594 | * and try again. | ||
| 1595 | */ | ||
| 1596 | |||
| 1597 | switch (type) { | ||
| 1598 | case RB_PAGE_HEAD: | ||
| 1599 | /* | ||
| 1600 | * We changed the head to UPDATE, thus | ||
| 1601 | * it is our responsibility to update | ||
| 1602 | * the counters. | ||
| 1603 | */ | ||
| 1604 | local_add(entries, &cpu_buffer->overrun); | ||
| 1605 | |||
| 1606 | /* | ||
| 1607 | * The entries will be zeroed out when we move the | ||
| 1608 | * tail page. | ||
| 1609 | */ | ||
| 1610 | |||
| 1611 | /* still more to do */ | ||
| 1612 | break; | ||
| 1613 | |||
| 1614 | case RB_PAGE_UPDATE: | ||
| 1615 | /* | ||
| 1616 | * This is an interrupt that interrupt the | ||
| 1617 | * previous update. Still more to do. | ||
| 1618 | */ | ||
| 1619 | break; | ||
| 1620 | case RB_PAGE_NORMAL: | ||
| 1621 | /* | ||
| 1622 | * An interrupt came in before the update | ||
| 1623 | * and processed this for us. | ||
| 1624 | * Nothing left to do. | ||
| 1625 | */ | ||
| 1626 | return 1; | ||
| 1627 | case RB_PAGE_MOVED: | ||
| 1628 | /* | ||
| 1629 | * The reader is on another CPU and just did | ||
| 1630 | * a swap with our next_page. | ||
| 1631 | * Try again. | ||
| 1632 | */ | ||
| 1633 | return 1; | ||
| 1634 | default: | ||
| 1635 | RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ | ||
| 1636 | return -1; | ||
| 1637 | } | ||
| 1638 | |||
| 1639 | /* | ||
| 1640 | * Now that we are here, the old head pointer is | ||
| 1641 | * set to UPDATE. This will keep the reader from | ||
| 1642 | * swapping the head page with the reader page. | ||
| 1643 | * The reader (on another CPU) will spin till | ||
| 1644 | * we are finished. | ||
| 1645 | * | ||
| 1646 | * We just need to protect against interrupts | ||
| 1647 | * doing the job. We will set the next pointer | ||
| 1648 | * to HEAD. After that, we set the old pointer | ||
| 1649 | * to NORMAL, but only if it was HEAD before. | ||
| 1650 | * otherwise we are an interrupt, and only | ||
| 1651 | * want the outer most commit to reset it. | ||
| 1652 | */ | ||
| 1653 | new_head = next_page; | ||
| 1654 | rb_inc_page(cpu_buffer, &new_head); | ||
| 1655 | |||
| 1656 | ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, | ||
| 1657 | RB_PAGE_NORMAL); | ||
| 1658 | |||
| 1659 | /* | ||
| 1660 | * Valid returns are: | ||
| 1661 | * HEAD - an interrupt came in and already set it. | ||
| 1662 | * NORMAL - One of two things: | ||
| 1663 | * 1) We really set it. | ||
| 1664 | * 2) A bunch of interrupts came in and moved | ||
| 1665 | * the page forward again. | ||
| 1666 | */ | ||
| 1667 | switch (ret) { | ||
| 1668 | case RB_PAGE_HEAD: | ||
| 1669 | case RB_PAGE_NORMAL: | ||
| 1670 | /* OK */ | ||
| 1671 | break; | ||
| 1672 | default: | ||
| 1673 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1674 | return -1; | ||
| 1675 | } | ||
| 1676 | |||
| 1677 | /* | ||
| 1678 | * It is possible that an interrupt came in, | ||
| 1679 | * set the head up, then more interrupts came in | ||
| 1680 | * and moved it again. When we get back here, | ||
| 1681 | * the page would have been set to NORMAL but we | ||
| 1682 | * just set it back to HEAD. | ||
| 1683 | * | ||
| 1684 | * How do you detect this? Well, if that happened | ||
| 1685 | * the tail page would have moved. | ||
| 1686 | */ | ||
| 1687 | if (ret == RB_PAGE_NORMAL) { | ||
| 1688 | /* | ||
| 1689 | * If the tail had moved passed next, then we need | ||
| 1690 | * to reset the pointer. | ||
| 1691 | */ | ||
| 1692 | if (cpu_buffer->tail_page != tail_page && | ||
| 1693 | cpu_buffer->tail_page != next_page) | ||
| 1694 | rb_head_page_set_normal(cpu_buffer, new_head, | ||
| 1695 | next_page, | ||
| 1696 | RB_PAGE_HEAD); | ||
| 1697 | } | ||
| 1698 | |||
| 1699 | /* | ||
| 1700 | * If this was the outer most commit (the one that | ||
| 1701 | * changed the original pointer from HEAD to UPDATE), | ||
| 1702 | * then it is up to us to reset it to NORMAL. | ||
| 1703 | */ | ||
| 1704 | if (type == RB_PAGE_HEAD) { | ||
| 1705 | ret = rb_head_page_set_normal(cpu_buffer, next_page, | ||
| 1706 | tail_page, | ||
| 1707 | RB_PAGE_UPDATE); | ||
| 1708 | if (RB_WARN_ON(cpu_buffer, | ||
| 1709 | ret != RB_PAGE_UPDATE)) | ||
| 1710 | return -1; | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | return 0; | ||
| 1714 | } | ||
| 1715 | |||
| 1124 | static unsigned rb_calculate_event_length(unsigned length) | 1716 | static unsigned rb_calculate_event_length(unsigned length) | 
| 1125 | { | 1717 | { | 
| 1126 | struct ring_buffer_event event; /* Used only for sizeof array */ | 1718 | struct ring_buffer_event event; /* Used only for sizeof array */ | 
| @@ -1184,9 +1776,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1184 | event->type_len = RINGBUF_TYPE_PADDING; | 1776 | event->type_len = RINGBUF_TYPE_PADDING; | 
| 1185 | /* time delta must be non zero */ | 1777 | /* time delta must be non zero */ | 
| 1186 | event->time_delta = 1; | 1778 | event->time_delta = 1; | 
| 1187 | /* Account for this as an entry */ | ||
| 1188 | local_inc(&tail_page->entries); | ||
| 1189 | local_inc(&cpu_buffer->entries); | ||
| 1190 | 1779 | ||
| 1191 | /* Set write to end of buffer */ | 1780 | /* Set write to end of buffer */ | 
| 1192 | length = (tail + length) - BUF_PAGE_SIZE; | 1781 | length = (tail + length) - BUF_PAGE_SIZE; | 
| @@ -1199,96 +1788,93 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1199 | struct buffer_page *commit_page, | 1788 | struct buffer_page *commit_page, | 
| 1200 | struct buffer_page *tail_page, u64 *ts) | 1789 | struct buffer_page *tail_page, u64 *ts) | 
| 1201 | { | 1790 | { | 
| 1202 | struct buffer_page *next_page, *head_page, *reader_page; | ||
| 1203 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1791 | struct ring_buffer *buffer = cpu_buffer->buffer; | 
| 1204 | bool lock_taken = false; | 1792 | struct buffer_page *next_page; | 
| 1205 | unsigned long flags; | 1793 | int ret; | 
| 1206 | 1794 | ||
| 1207 | next_page = tail_page; | 1795 | next_page = tail_page; | 
| 1208 | 1796 | ||
| 1209 | local_irq_save(flags); | ||
| 1210 | /* | ||
| 1211 | * Since the write to the buffer is still not | ||
| 1212 | * fully lockless, we must be careful with NMIs. | ||
| 1213 | * The locks in the writers are taken when a write | ||
| 1214 | * crosses to a new page. The locks protect against | ||
| 1215 | * races with the readers (this will soon be fixed | ||
| 1216 | * with a lockless solution). | ||
| 1217 | * | ||
| 1218 | * Because we can not protect against NMIs, and we | ||
| 1219 | * want to keep traces reentrant, we need to manage | ||
| 1220 | * what happens when we are in an NMI. | ||
| 1221 | * | ||
| 1222 | * NMIs can happen after we take the lock. | ||
| 1223 | * If we are in an NMI, only take the lock | ||
| 1224 | * if it is not already taken. Otherwise | ||
| 1225 | * simply fail. | ||
| 1226 | */ | ||
| 1227 | if (unlikely(in_nmi())) { | ||
| 1228 | if (!__raw_spin_trylock(&cpu_buffer->lock)) { | ||
| 1229 | cpu_buffer->nmi_dropped++; | ||
| 1230 | goto out_reset; | ||
| 1231 | } | ||
| 1232 | } else | ||
| 1233 | __raw_spin_lock(&cpu_buffer->lock); | ||
| 1234 | |||
| 1235 | lock_taken = true; | ||
| 1236 | |||
| 1237 | rb_inc_page(cpu_buffer, &next_page); | 1797 | rb_inc_page(cpu_buffer, &next_page); | 
| 1238 | 1798 | ||
| 1239 | head_page = cpu_buffer->head_page; | ||
| 1240 | reader_page = cpu_buffer->reader_page; | ||
| 1241 | |||
| 1242 | /* we grabbed the lock before incrementing */ | ||
| 1243 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | ||
| 1244 | goto out_reset; | ||
| 1245 | |||
| 1246 | /* | 1799 | /* | 
| 1247 | * If for some reason, we had an interrupt storm that made | 1800 | * If for some reason, we had an interrupt storm that made | 
| 1248 | * it all the way around the buffer, bail, and warn | 1801 | * it all the way around the buffer, bail, and warn | 
| 1249 | * about it. | 1802 | * about it. | 
| 1250 | */ | 1803 | */ | 
| 1251 | if (unlikely(next_page == commit_page)) { | 1804 | if (unlikely(next_page == commit_page)) { | 
| 1252 | cpu_buffer->commit_overrun++; | 1805 | local_inc(&cpu_buffer->commit_overrun); | 
| 1253 | goto out_reset; | 1806 | goto out_reset; | 
| 1254 | } | 1807 | } | 
| 1255 | 1808 | ||
| 1256 | if (next_page == head_page) { | 1809 | /* | 
| 1257 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 1810 | * This is where the fun begins! | 
| 1258 | goto out_reset; | 1811 | * | 
| 1259 | 1812 | * We are fighting against races between a reader that | |
| 1260 | /* tail_page has not moved yet? */ | 1813 | * could be on another CPU trying to swap its reader | 
| 1261 | if (tail_page == cpu_buffer->tail_page) { | 1814 | * page with the buffer head. | 
| 1262 | /* count overflows */ | 1815 | * | 
| 1263 | cpu_buffer->overrun += | 1816 | * We are also fighting against interrupts coming in and | 
| 1264 | local_read(&head_page->entries); | 1817 | * moving the head or tail on us as well. | 
| 1818 | * | ||
| 1819 | * If the next page is the head page then we have filled | ||
| 1820 | * the buffer, unless the commit page is still on the | ||
| 1821 | * reader page. | ||
| 1822 | */ | ||
| 1823 | if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { | ||
| 1265 | 1824 | ||
| 1266 | rb_inc_page(cpu_buffer, &head_page); | 1825 | /* | 
| 1267 | cpu_buffer->head_page = head_page; | 1826 | * If the commit is not on the reader page, then | 
| 1268 | cpu_buffer->head_page->read = 0; | 1827 | * move the header page. | 
| 1828 | */ | ||
| 1829 | if (!rb_is_reader_page(cpu_buffer->commit_page)) { | ||
| 1830 | /* | ||
| 1831 | * If we are not in overwrite mode, | ||
| 1832 | * this is easy, just stop here. | ||
| 1833 | */ | ||
| 1834 | if (!(buffer->flags & RB_FL_OVERWRITE)) | ||
| 1835 | goto out_reset; | ||
| 1836 | |||
| 1837 | ret = rb_handle_head_page(cpu_buffer, | ||
| 1838 | tail_page, | ||
| 1839 | next_page); | ||
| 1840 | if (ret < 0) | ||
| 1841 | goto out_reset; | ||
| 1842 | if (ret) | ||
| 1843 | goto out_again; | ||
| 1844 | } else { | ||
| 1845 | /* | ||
| 1846 | * We need to be careful here too. The | ||
| 1847 | * commit page could still be on the reader | ||
| 1848 | * page. We could have a small buffer, and | ||
| 1849 | * have filled up the buffer with events | ||
| 1850 | * from interrupts and such, and wrapped. | ||
| 1851 | * | ||
| 1852 | * Note, if the tail page is also the on the | ||
| 1853 | * reader_page, we let it move out. | ||
| 1854 | */ | ||
| 1855 | if (unlikely((cpu_buffer->commit_page != | ||
| 1856 | cpu_buffer->tail_page) && | ||
| 1857 | (cpu_buffer->commit_page == | ||
| 1858 | cpu_buffer->reader_page))) { | ||
| 1859 | local_inc(&cpu_buffer->commit_overrun); | ||
| 1860 | goto out_reset; | ||
| 1861 | } | ||
| 1269 | } | 1862 | } | 
| 1270 | } | 1863 | } | 
| 1271 | 1864 | ||
| 1272 | /* | 1865 | ret = rb_tail_page_update(cpu_buffer, tail_page, next_page); | 
| 1273 | * If the tail page is still the same as what we think | 1866 | if (ret) { | 
| 1274 | * it is, then it is up to us to update the tail | 1867 | /* | 
| 1275 | * pointer. | 1868 | * Nested commits always have zero deltas, so | 
| 1276 | */ | 1869 | * just reread the time stamp | 
| 1277 | if (tail_page == cpu_buffer->tail_page) { | 1870 | */ | 
| 1278 | local_set(&next_page->write, 0); | 1871 | *ts = rb_time_stamp(buffer); | 
| 1279 | local_set(&next_page->entries, 0); | 1872 | next_page->page->time_stamp = *ts; | 
| 1280 | local_set(&next_page->page->commit, 0); | ||
| 1281 | cpu_buffer->tail_page = next_page; | ||
| 1282 | |||
| 1283 | /* reread the time stamp */ | ||
| 1284 | *ts = rb_time_stamp(buffer, cpu_buffer->cpu); | ||
| 1285 | cpu_buffer->tail_page->page->time_stamp = *ts; | ||
| 1286 | } | 1873 | } | 
| 1287 | 1874 | ||
| 1288 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 1875 | out_again: | 
| 1289 | 1876 | ||
| 1290 | __raw_spin_unlock(&cpu_buffer->lock); | 1877 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 
| 1291 | local_irq_restore(flags); | ||
| 1292 | 1878 | ||
| 1293 | /* fail and let the caller try again */ | 1879 | /* fail and let the caller try again */ | 
| 1294 | return ERR_PTR(-EAGAIN); | 1880 | return ERR_PTR(-EAGAIN); | 
| @@ -1297,9 +1883,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1297 | /* reset write */ | 1883 | /* reset write */ | 
| 1298 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 1884 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 
| 1299 | 1885 | ||
| 1300 | if (likely(lock_taken)) | ||
| 1301 | __raw_spin_unlock(&cpu_buffer->lock); | ||
| 1302 | local_irq_restore(flags); | ||
| 1303 | return NULL; | 1886 | return NULL; | 
| 1304 | } | 1887 | } | 
| 1305 | 1888 | ||
| @@ -1316,6 +1899,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1316 | barrier(); | 1899 | barrier(); | 
| 1317 | tail_page = cpu_buffer->tail_page; | 1900 | tail_page = cpu_buffer->tail_page; | 
| 1318 | write = local_add_return(length, &tail_page->write); | 1901 | write = local_add_return(length, &tail_page->write); | 
| 1902 | |||
| 1903 | /* set write to only the index of the write */ | ||
| 1904 | write &= RB_WRITE_MASK; | ||
| 1319 | tail = write - length; | 1905 | tail = write - length; | 
| 1320 | 1906 | ||
| 1321 | /* See if we shot pass the end of this buffer page */ | 1907 | /* See if we shot pass the end of this buffer page */ | 
| @@ -1360,12 +1946,16 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1360 | bpage = cpu_buffer->tail_page; | 1946 | bpage = cpu_buffer->tail_page; | 
| 1361 | 1947 | ||
| 1362 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | 1948 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | 
| 1949 | unsigned long write_mask = | ||
| 1950 | local_read(&bpage->write) & ~RB_WRITE_MASK; | ||
| 1363 | /* | 1951 | /* | 
| 1364 | * This is on the tail page. It is possible that | 1952 | * This is on the tail page. It is possible that | 
| 1365 | * a write could come in and move the tail page | 1953 | * a write could come in and move the tail page | 
| 1366 | * and write to the next page. That is fine | 1954 | * and write to the next page. That is fine | 
| 1367 | * because we just shorten what is on this page. | 1955 | * because we just shorten what is on this page. | 
| 1368 | */ | 1956 | */ | 
| 1957 | old_index += write_mask; | ||
| 1958 | new_index += write_mask; | ||
| 1369 | index = local_cmpxchg(&bpage->write, old_index, new_index); | 1959 | index = local_cmpxchg(&bpage->write, old_index, new_index); | 
| 1370 | if (index == old_index) | 1960 | if (index == old_index) | 
| 1371 | return 1; | 1961 | return 1; | 
| @@ -1481,7 +2071,8 @@ static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1481 | } | 2071 | } | 
| 1482 | 2072 | ||
| 1483 | static struct ring_buffer_event * | 2073 | static struct ring_buffer_event * | 
| 1484 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 2074 | rb_reserve_next_event(struct ring_buffer *buffer, | 
| 2075 | struct ring_buffer_per_cpu *cpu_buffer, | ||
| 1485 | unsigned long length) | 2076 | unsigned long length) | 
| 1486 | { | 2077 | { | 
| 1487 | struct ring_buffer_event *event; | 2078 | struct ring_buffer_event *event; | 
| @@ -1491,6 +2082,21 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1491 | 2082 | ||
| 1492 | rb_start_commit(cpu_buffer); | 2083 | rb_start_commit(cpu_buffer); | 
| 1493 | 2084 | ||
| 2085 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | ||
| 2086 | /* | ||
| 2087 | * Due to the ability to swap a cpu buffer from a buffer | ||
| 2088 | * it is possible it was swapped before we committed. | ||
| 2089 | * (committing stops a swap). We check for it here and | ||
| 2090 | * if it happened, we have to fail the write. | ||
| 2091 | */ | ||
| 2092 | barrier(); | ||
| 2093 | if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { | ||
| 2094 | local_dec(&cpu_buffer->committing); | ||
| 2095 | local_dec(&cpu_buffer->commits); | ||
| 2096 | return NULL; | ||
| 2097 | } | ||
| 2098 | #endif | ||
| 2099 | |||
| 1494 | length = rb_calculate_event_length(length); | 2100 | length = rb_calculate_event_length(length); | 
| 1495 | again: | 2101 | again: | 
| 1496 | /* | 2102 | /* | 
| @@ -1505,7 +2111,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1505 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 2111 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 
| 1506 | goto out_fail; | 2112 | goto out_fail; | 
| 1507 | 2113 | ||
| 1508 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 2114 | ts = rb_time_stamp(cpu_buffer->buffer); | 
| 1509 | 2115 | ||
| 1510 | /* | 2116 | /* | 
| 1511 | * Only the first commit can update the timestamp. | 2117 | * Only the first commit can update the timestamp. | 
| @@ -1563,6 +2169,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1563 | return NULL; | 2169 | return NULL; | 
| 1564 | } | 2170 | } | 
| 1565 | 2171 | ||
| 2172 | #ifdef CONFIG_TRACING | ||
| 2173 | |||
| 1566 | #define TRACE_RECURSIVE_DEPTH 16 | 2174 | #define TRACE_RECURSIVE_DEPTH 16 | 
| 1567 | 2175 | ||
| 1568 | static int trace_recursive_lock(void) | 2176 | static int trace_recursive_lock(void) | 
| @@ -1593,6 +2201,13 @@ static void trace_recursive_unlock(void) | |||
| 1593 | current->trace_recursion--; | 2201 | current->trace_recursion--; | 
| 1594 | } | 2202 | } | 
| 1595 | 2203 | ||
| 2204 | #else | ||
| 2205 | |||
| 2206 | #define trace_recursive_lock() (0) | ||
| 2207 | #define trace_recursive_unlock() do { } while (0) | ||
| 2208 | |||
| 2209 | #endif | ||
| 2210 | |||
| 1596 | static DEFINE_PER_CPU(int, rb_need_resched); | 2211 | static DEFINE_PER_CPU(int, rb_need_resched); | 
| 1597 | 2212 | ||
| 1598 | /** | 2213 | /** | 
| @@ -1642,7 +2257,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
| 1642 | if (length > BUF_MAX_DATA_SIZE) | 2257 | if (length > BUF_MAX_DATA_SIZE) | 
| 1643 | goto out; | 2258 | goto out; | 
| 1644 | 2259 | ||
| 1645 | event = rb_reserve_next_event(cpu_buffer, length); | 2260 | event = rb_reserve_next_event(buffer, cpu_buffer, length); | 
| 1646 | if (!event) | 2261 | if (!event) | 
| 1647 | goto out; | 2262 | goto out; | 
| 1648 | 2263 | ||
| @@ -1665,18 +2280,23 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
| 1665 | } | 2280 | } | 
| 1666 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | 2281 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | 
| 1667 | 2282 | ||
| 1668 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2283 | static void | 
| 2284 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 1669 | struct ring_buffer_event *event) | 2285 | struct ring_buffer_event *event) | 
| 1670 | { | 2286 | { | 
| 1671 | local_inc(&cpu_buffer->entries); | ||
| 1672 | |||
| 1673 | /* | 2287 | /* | 
| 1674 | * The event first in the commit queue updates the | 2288 | * The event first in the commit queue updates the | 
| 1675 | * time stamp. | 2289 | * time stamp. | 
| 1676 | */ | 2290 | */ | 
| 1677 | if (rb_event_is_commit(cpu_buffer, event)) | 2291 | if (rb_event_is_commit(cpu_buffer, event)) | 
| 1678 | cpu_buffer->write_stamp += event->time_delta; | 2292 | cpu_buffer->write_stamp += event->time_delta; | 
| 2293 | } | ||
| 1679 | 2294 | ||
| 2295 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 2296 | struct ring_buffer_event *event) | ||
| 2297 | { | ||
| 2298 | local_inc(&cpu_buffer->entries); | ||
| 2299 | rb_update_write_stamp(cpu_buffer, event); | ||
| 1680 | rb_end_commit(cpu_buffer); | 2300 | rb_end_commit(cpu_buffer); | 
| 1681 | } | 2301 | } | 
| 1682 | 2302 | ||
| @@ -1723,32 +2343,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event) | |||
| 1723 | event->time_delta = 1; | 2343 | event->time_delta = 1; | 
| 1724 | } | 2344 | } | 
| 1725 | 2345 | ||
| 1726 | /** | 2346 | /* | 
| 1727 | * ring_buffer_event_discard - discard any event in the ring buffer | 2347 | * Decrement the entries to the page that an event is on. | 
| 1728 | * @event: the event to discard | 2348 | * The event does not even need to exist, only the pointer | 
| 1729 | * | 2349 | * to the page it is on. This may only be called before the commit | 
| 1730 | * Sometimes a event that is in the ring buffer needs to be ignored. | 2350 | * takes place. | 
| 1731 | * This function lets the user discard an event in the ring buffer | ||
| 1732 | * and then that event will not be read later. | ||
| 1733 | * | ||
| 1734 | * Note, it is up to the user to be careful with this, and protect | ||
| 1735 | * against races. If the user discards an event that has been consumed | ||
| 1736 | * it is possible that it could corrupt the ring buffer. | ||
| 1737 | */ | 2351 | */ | 
| 1738 | void ring_buffer_event_discard(struct ring_buffer_event *event) | 2352 | static inline void | 
| 2353 | rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 2354 | struct ring_buffer_event *event) | ||
| 1739 | { | 2355 | { | 
| 1740 | rb_event_discard(event); | 2356 | unsigned long addr = (unsigned long)event; | 
| 2357 | struct buffer_page *bpage = cpu_buffer->commit_page; | ||
| 2358 | struct buffer_page *start; | ||
| 2359 | |||
| 2360 | addr &= PAGE_MASK; | ||
| 2361 | |||
| 2362 | /* Do the likely case first */ | ||
| 2363 | if (likely(bpage->page == (void *)addr)) { | ||
| 2364 | local_dec(&bpage->entries); | ||
| 2365 | return; | ||
| 2366 | } | ||
| 2367 | |||
| 2368 | /* | ||
| 2369 | * Because the commit page may be on the reader page we | ||
| 2370 | * start with the next page and check the end loop there. | ||
| 2371 | */ | ||
| 2372 | rb_inc_page(cpu_buffer, &bpage); | ||
| 2373 | start = bpage; | ||
| 2374 | do { | ||
| 2375 | if (bpage->page == (void *)addr) { | ||
| 2376 | local_dec(&bpage->entries); | ||
| 2377 | return; | ||
| 2378 | } | ||
| 2379 | rb_inc_page(cpu_buffer, &bpage); | ||
| 2380 | } while (bpage != start); | ||
| 2381 | |||
| 2382 | /* commit not part of this buffer?? */ | ||
| 2383 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1741 | } | 2384 | } | 
| 1742 | EXPORT_SYMBOL_GPL(ring_buffer_event_discard); | ||
| 1743 | 2385 | ||
| 1744 | /** | 2386 | /** | 
| 1745 | * ring_buffer_commit_discard - discard an event that has not been committed | 2387 | * ring_buffer_commit_discard - discard an event that has not been committed | 
| 1746 | * @buffer: the ring buffer | 2388 | * @buffer: the ring buffer | 
| 1747 | * @event: non committed event to discard | 2389 | * @event: non committed event to discard | 
| 1748 | * | 2390 | * | 
| 1749 | * This is similar to ring_buffer_event_discard but must only be | 2391 | * Sometimes an event that is in the ring buffer needs to be ignored. | 
| 1750 | * performed on an event that has not been committed yet. The difference | 2392 | * This function lets the user discard an event in the ring buffer | 
| 1751 | * is that this will also try to free the event from the ring buffer | 2393 | * and then that event will not be read later. | 
| 2394 | * | ||
| 2395 | * This function only works if it is called before the the item has been | ||
| 2396 | * committed. It will try to free the event from the ring buffer | ||
| 1752 | * if another event has not been added behind it. | 2397 | * if another event has not been added behind it. | 
| 1753 | * | 2398 | * | 
| 1754 | * If another event has been added behind it, it will set the event | 2399 | * If another event has been added behind it, it will set the event | 
| @@ -1776,14 +2421,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
| 1776 | */ | 2421 | */ | 
| 1777 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); | 2422 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); | 
| 1778 | 2423 | ||
| 1779 | if (!rb_try_to_discard(cpu_buffer, event)) | 2424 | rb_decrement_entry(cpu_buffer, event); | 
| 2425 | if (rb_try_to_discard(cpu_buffer, event)) | ||
| 1780 | goto out; | 2426 | goto out; | 
| 1781 | 2427 | ||
| 1782 | /* | 2428 | /* | 
| 1783 | * The commit is still visible by the reader, so we | 2429 | * The commit is still visible by the reader, so we | 
| 1784 | * must increment entries. | 2430 | * must still update the timestamp. | 
| 1785 | */ | 2431 | */ | 
| 1786 | local_inc(&cpu_buffer->entries); | 2432 | rb_update_write_stamp(cpu_buffer, event); | 
| 1787 | out: | 2433 | out: | 
| 1788 | rb_end_commit(cpu_buffer); | 2434 | rb_end_commit(cpu_buffer); | 
| 1789 | 2435 | ||
| @@ -1844,7 +2490,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
| 1844 | if (length > BUF_MAX_DATA_SIZE) | 2490 | if (length > BUF_MAX_DATA_SIZE) | 
| 1845 | goto out; | 2491 | goto out; | 
| 1846 | 2492 | ||
| 1847 | event = rb_reserve_next_event(cpu_buffer, length); | 2493 | event = rb_reserve_next_event(buffer, cpu_buffer, length); | 
| 1848 | if (!event) | 2494 | if (!event) | 
| 1849 | goto out; | 2495 | goto out; | 
| 1850 | 2496 | ||
| @@ -1865,9 +2511,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write); | |||
| 1865 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 2511 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 
| 1866 | { | 2512 | { | 
| 1867 | struct buffer_page *reader = cpu_buffer->reader_page; | 2513 | struct buffer_page *reader = cpu_buffer->reader_page; | 
| 1868 | struct buffer_page *head = cpu_buffer->head_page; | 2514 | struct buffer_page *head = rb_set_head_page(cpu_buffer); | 
| 1869 | struct buffer_page *commit = cpu_buffer->commit_page; | 2515 | struct buffer_page *commit = cpu_buffer->commit_page; | 
| 1870 | 2516 | ||
| 2517 | /* In case of error, head will be NULL */ | ||
| 2518 | if (unlikely(!head)) | ||
| 2519 | return 1; | ||
| 2520 | |||
| 1871 | return reader->read == rb_page_commit(reader) && | 2521 | return reader->read == rb_page_commit(reader) && | 
| 1872 | (commit == reader || | 2522 | (commit == reader || | 
| 1873 | (commit == head && | 2523 | (commit == head && | 
| @@ -1958,7 +2608,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | |||
| 1958 | return 0; | 2608 | return 0; | 
| 1959 | 2609 | ||
| 1960 | cpu_buffer = buffer->buffers[cpu]; | 2610 | cpu_buffer = buffer->buffers[cpu]; | 
| 1961 | ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) | 2611 | ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) | 
| 1962 | - cpu_buffer->read; | 2612 | - cpu_buffer->read; | 
| 1963 | 2613 | ||
| 1964 | return ret; | 2614 | return ret; | 
| @@ -1979,33 +2629,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
| 1979 | return 0; | 2629 | return 0; | 
| 1980 | 2630 | ||
| 1981 | cpu_buffer = buffer->buffers[cpu]; | 2631 | cpu_buffer = buffer->buffers[cpu]; | 
| 1982 | ret = cpu_buffer->overrun; | 2632 | ret = local_read(&cpu_buffer->overrun); | 
| 1983 | 2633 | ||
| 1984 | return ret; | 2634 | return ret; | 
| 1985 | } | 2635 | } | 
| 1986 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 2636 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 
| 1987 | 2637 | ||
| 1988 | /** | 2638 | /** | 
| 1989 | * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped | ||
| 1990 | * @buffer: The ring buffer | ||
| 1991 | * @cpu: The per CPU buffer to get the number of overruns from | ||
| 1992 | */ | ||
| 1993 | unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu) | ||
| 1994 | { | ||
| 1995 | struct ring_buffer_per_cpu *cpu_buffer; | ||
| 1996 | unsigned long ret; | ||
| 1997 | |||
| 1998 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
| 1999 | return 0; | ||
| 2000 | |||
| 2001 | cpu_buffer = buffer->buffers[cpu]; | ||
| 2002 | ret = cpu_buffer->nmi_dropped; | ||
| 2003 | |||
| 2004 | return ret; | ||
| 2005 | } | ||
| 2006 | EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu); | ||
| 2007 | |||
| 2008 | /** | ||
| 2009 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits | 2639 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits | 
| 2010 | * @buffer: The ring buffer | 2640 | * @buffer: The ring buffer | 
| 2011 | * @cpu: The per CPU buffer to get the number of overruns from | 2641 | * @cpu: The per CPU buffer to get the number of overruns from | 
| @@ -2020,7 +2650,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
| 2020 | return 0; | 2650 | return 0; | 
| 2021 | 2651 | ||
| 2022 | cpu_buffer = buffer->buffers[cpu]; | 2652 | cpu_buffer = buffer->buffers[cpu]; | 
| 2023 | ret = cpu_buffer->commit_overrun; | 2653 | ret = local_read(&cpu_buffer->commit_overrun); | 
| 2024 | 2654 | ||
| 2025 | return ret; | 2655 | return ret; | 
| 2026 | } | 2656 | } | 
| @@ -2043,7 +2673,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
| 2043 | for_each_buffer_cpu(buffer, cpu) { | 2673 | for_each_buffer_cpu(buffer, cpu) { | 
| 2044 | cpu_buffer = buffer->buffers[cpu]; | 2674 | cpu_buffer = buffer->buffers[cpu]; | 
| 2045 | entries += (local_read(&cpu_buffer->entries) - | 2675 | entries += (local_read(&cpu_buffer->entries) - | 
| 2046 | cpu_buffer->overrun) - cpu_buffer->read; | 2676 | local_read(&cpu_buffer->overrun)) - cpu_buffer->read; | 
| 2047 | } | 2677 | } | 
| 2048 | 2678 | ||
| 2049 | return entries; | 2679 | return entries; | 
| @@ -2051,7 +2681,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
| 2051 | EXPORT_SYMBOL_GPL(ring_buffer_entries); | 2681 | EXPORT_SYMBOL_GPL(ring_buffer_entries); | 
| 2052 | 2682 | ||
| 2053 | /** | 2683 | /** | 
| 2054 | * ring_buffer_overrun_cpu - get the number of overruns in buffer | 2684 | * ring_buffer_overruns - get the number of overruns in buffer | 
| 2055 | * @buffer: The ring buffer | 2685 | * @buffer: The ring buffer | 
| 2056 | * | 2686 | * | 
| 2057 | * Returns the total number of overruns in the ring buffer | 2687 | * Returns the total number of overruns in the ring buffer | 
| @@ -2066,7 +2696,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer) | |||
| 2066 | /* if you care about this being correct, lock the buffer */ | 2696 | /* if you care about this being correct, lock the buffer */ | 
| 2067 | for_each_buffer_cpu(buffer, cpu) { | 2697 | for_each_buffer_cpu(buffer, cpu) { | 
| 2068 | cpu_buffer = buffer->buffers[cpu]; | 2698 | cpu_buffer = buffer->buffers[cpu]; | 
| 2069 | overruns += cpu_buffer->overrun; | 2699 | overruns += local_read(&cpu_buffer->overrun); | 
| 2070 | } | 2700 | } | 
| 2071 | 2701 | ||
| 2072 | return overruns; | 2702 | return overruns; | 
| @@ -2079,8 +2709,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
| 2079 | 2709 | ||
| 2080 | /* Iterator usage is expected to have record disabled */ | 2710 | /* Iterator usage is expected to have record disabled */ | 
| 2081 | if (list_empty(&cpu_buffer->reader_page->list)) { | 2711 | if (list_empty(&cpu_buffer->reader_page->list)) { | 
| 2082 | iter->head_page = cpu_buffer->head_page; | 2712 | iter->head_page = rb_set_head_page(cpu_buffer); | 
| 2083 | iter->head = cpu_buffer->head_page->read; | 2713 | if (unlikely(!iter->head_page)) | 
| 2714 | return; | ||
| 2715 | iter->head = iter->head_page->read; | ||
| 2084 | } else { | 2716 | } else { | 
| 2085 | iter->head_page = cpu_buffer->reader_page; | 2717 | iter->head_page = cpu_buffer->reader_page; | 
| 2086 | iter->head = cpu_buffer->reader_page->read; | 2718 | iter->head = cpu_buffer->reader_page->read; | 
| @@ -2197,6 +2829,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2197 | struct buffer_page *reader = NULL; | 2829 | struct buffer_page *reader = NULL; | 
| 2198 | unsigned long flags; | 2830 | unsigned long flags; | 
| 2199 | int nr_loops = 0; | 2831 | int nr_loops = 0; | 
| 2832 | int ret; | ||
| 2200 | 2833 | ||
| 2201 | local_irq_save(flags); | 2834 | local_irq_save(flags); | 
| 2202 | __raw_spin_lock(&cpu_buffer->lock); | 2835 | __raw_spin_lock(&cpu_buffer->lock); | 
| @@ -2230,30 +2863,56 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2230 | goto out; | 2863 | goto out; | 
| 2231 | 2864 | ||
| 2232 | /* | 2865 | /* | 
| 2233 | * Splice the empty reader page into the list around the head. | ||
| 2234 | * Reset the reader page to size zero. | 2866 | * Reset the reader page to size zero. | 
| 2235 | */ | 2867 | */ | 
| 2868 | local_set(&cpu_buffer->reader_page->write, 0); | ||
| 2869 | local_set(&cpu_buffer->reader_page->entries, 0); | ||
| 2870 | local_set(&cpu_buffer->reader_page->page->commit, 0); | ||
| 2236 | 2871 | ||
| 2237 | reader = cpu_buffer->head_page; | 2872 | spin: | 
| 2873 | /* | ||
| 2874 | * Splice the empty reader page into the list around the head. | ||
| 2875 | */ | ||
| 2876 | reader = rb_set_head_page(cpu_buffer); | ||
| 2238 | cpu_buffer->reader_page->list.next = reader->list.next; | 2877 | cpu_buffer->reader_page->list.next = reader->list.next; | 
| 2239 | cpu_buffer->reader_page->list.prev = reader->list.prev; | 2878 | cpu_buffer->reader_page->list.prev = reader->list.prev; | 
| 2240 | 2879 | ||
| 2241 | local_set(&cpu_buffer->reader_page->write, 0); | 2880 | /* | 
| 2242 | local_set(&cpu_buffer->reader_page->entries, 0); | 2881 | * cpu_buffer->pages just needs to point to the buffer, it | 
| 2243 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2882 | * has no specific buffer page to point to. Lets move it out | 
| 2883 | * of our way so we don't accidently swap it. | ||
| 2884 | */ | ||
| 2885 | cpu_buffer->pages = reader->list.prev; | ||
| 2244 | 2886 | ||
| 2245 | /* Make the reader page now replace the head */ | 2887 | /* The reader page will be pointing to the new head */ | 
| 2246 | reader->list.prev->next = &cpu_buffer->reader_page->list; | 2888 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 
| 2247 | reader->list.next->prev = &cpu_buffer->reader_page->list; | 2889 | |
| 2890 | /* | ||
| 2891 | * Here's the tricky part. | ||
| 2892 | * | ||
| 2893 | * We need to move the pointer past the header page. | ||
| 2894 | * But we can only do that if a writer is not currently | ||
| 2895 | * moving it. The page before the header page has the | ||
| 2896 | * flag bit '1' set if it is pointing to the page we want. | ||
| 2897 | * but if the writer is in the process of moving it | ||
| 2898 | * than it will be '2' or already moved '0'. | ||
| 2899 | */ | ||
| 2900 | |||
| 2901 | ret = rb_head_page_replace(reader, cpu_buffer->reader_page); | ||
| 2248 | 2902 | ||
| 2249 | /* | 2903 | /* | 
| 2250 | * If the tail is on the reader, then we must set the head | 2904 | * If we did not convert it, then we must try again. | 
| 2251 | * to the inserted page, otherwise we set it one before. | ||
| 2252 | */ | 2905 | */ | 
| 2253 | cpu_buffer->head_page = cpu_buffer->reader_page; | 2906 | if (!ret) | 
| 2907 | goto spin; | ||
| 2254 | 2908 | ||
| 2255 | if (cpu_buffer->commit_page != reader) | 2909 | /* | 
| 2256 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | 2910 | * Yeah! We succeeded in replacing the page. | 
| 2911 | * | ||
| 2912 | * Now make the new head point back to the reader page. | ||
| 2913 | */ | ||
| 2914 | reader->list.next->prev = &cpu_buffer->reader_page->list; | ||
| 2915 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | ||
| 2257 | 2916 | ||
| 2258 | /* Finally update the reader page to the new head */ | 2917 | /* Finally update the reader page to the new head */ | 
| 2259 | cpu_buffer->reader_page = reader; | 2918 | cpu_buffer->reader_page = reader; | 
| @@ -2282,8 +2941,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2282 | 2941 | ||
| 2283 | event = rb_reader_event(cpu_buffer); | 2942 | event = rb_reader_event(cpu_buffer); | 
| 2284 | 2943 | ||
| 2285 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 2944 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 
| 2286 | || rb_discarded_event(event)) | ||
| 2287 | cpu_buffer->read++; | 2945 | cpu_buffer->read++; | 
| 2288 | 2946 | ||
| 2289 | rb_update_read_stamp(cpu_buffer, event); | 2947 | rb_update_read_stamp(cpu_buffer, event); | 
| @@ -2337,15 +2995,12 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
| 2337 | } | 2995 | } | 
| 2338 | 2996 | ||
| 2339 | static struct ring_buffer_event * | 2997 | static struct ring_buffer_event * | 
| 2340 | rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 2998 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | 
| 2341 | { | 2999 | { | 
| 2342 | struct ring_buffer_per_cpu *cpu_buffer; | ||
| 2343 | struct ring_buffer_event *event; | 3000 | struct ring_buffer_event *event; | 
| 2344 | struct buffer_page *reader; | 3001 | struct buffer_page *reader; | 
| 2345 | int nr_loops = 0; | 3002 | int nr_loops = 0; | 
| 2346 | 3003 | ||
| 2347 | cpu_buffer = buffer->buffers[cpu]; | ||
| 2348 | |||
| 2349 | again: | 3004 | again: | 
| 2350 | /* | 3005 | /* | 
| 2351 | * We repeat when a timestamp is encountered. It is possible | 3006 | * We repeat when a timestamp is encountered. It is possible | 
| @@ -2374,7 +3029,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2374 | * the box. Return the padding, and we will release | 3029 | * the box. Return the padding, and we will release | 
| 2375 | * the current locks, and try again. | 3030 | * the current locks, and try again. | 
| 2376 | */ | 3031 | */ | 
| 2377 | rb_advance_reader(cpu_buffer); | ||
| 2378 | return event; | 3032 | return event; | 
| 2379 | 3033 | ||
| 2380 | case RINGBUF_TYPE_TIME_EXTEND: | 3034 | case RINGBUF_TYPE_TIME_EXTEND: | 
| @@ -2390,7 +3044,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2390 | case RINGBUF_TYPE_DATA: | 3044 | case RINGBUF_TYPE_DATA: | 
| 2391 | if (ts) { | 3045 | if (ts) { | 
| 2392 | *ts = cpu_buffer->read_stamp + event->time_delta; | 3046 | *ts = cpu_buffer->read_stamp + event->time_delta; | 
| 2393 | ring_buffer_normalize_time_stamp(buffer, | 3047 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 
| 2394 | cpu_buffer->cpu, ts); | 3048 | cpu_buffer->cpu, ts); | 
| 2395 | } | 3049 | } | 
| 2396 | return event; | 3050 | return event; | 
| @@ -2477,7 +3131,7 @@ static inline int rb_ok_to_lock(void) | |||
| 2477 | * buffer too. A one time deal is all you get from reading | 3131 | * buffer too. A one time deal is all you get from reading | 
| 2478 | * the ring buffer from an NMI. | 3132 | * the ring buffer from an NMI. | 
| 2479 | */ | 3133 | */ | 
| 2480 | if (likely(!in_nmi() && !oops_in_progress)) | 3134 | if (likely(!in_nmi())) | 
| 2481 | return 1; | 3135 | return 1; | 
| 2482 | 3136 | ||
| 2483 | tracing_off_permanent(); | 3137 | tracing_off_permanent(); | 
| @@ -2509,15 +3163,15 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2509 | local_irq_save(flags); | 3163 | local_irq_save(flags); | 
| 2510 | if (dolock) | 3164 | if (dolock) | 
| 2511 | spin_lock(&cpu_buffer->reader_lock); | 3165 | spin_lock(&cpu_buffer->reader_lock); | 
| 2512 | event = rb_buffer_peek(buffer, cpu, ts); | 3166 | event = rb_buffer_peek(cpu_buffer, ts); | 
| 3167 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | ||
| 3168 | rb_advance_reader(cpu_buffer); | ||
| 2513 | if (dolock) | 3169 | if (dolock) | 
| 2514 | spin_unlock(&cpu_buffer->reader_lock); | 3170 | spin_unlock(&cpu_buffer->reader_lock); | 
| 2515 | local_irq_restore(flags); | 3171 | local_irq_restore(flags); | 
| 2516 | 3172 | ||
| 2517 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3173 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 
| 2518 | cpu_relax(); | ||
| 2519 | goto again; | 3174 | goto again; | 
| 2520 | } | ||
| 2521 | 3175 | ||
| 2522 | return event; | 3176 | return event; | 
| 2523 | } | 3177 | } | 
| @@ -2542,10 +3196,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2542 | event = rb_iter_peek(iter, ts); | 3196 | event = rb_iter_peek(iter, ts); | 
| 2543 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3197 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 
| 2544 | 3198 | ||
| 2545 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3199 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 
| 2546 | cpu_relax(); | ||
| 2547 | goto again; | 3200 | goto again; | 
| 2548 | } | ||
| 2549 | 3201 | ||
| 2550 | return event; | 3202 | return event; | 
| 2551 | } | 3203 | } | 
| @@ -2580,13 +3232,10 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2580 | if (dolock) | 3232 | if (dolock) | 
| 2581 | spin_lock(&cpu_buffer->reader_lock); | 3233 | spin_lock(&cpu_buffer->reader_lock); | 
| 2582 | 3234 | ||
| 2583 | event = rb_buffer_peek(buffer, cpu, ts); | 3235 | event = rb_buffer_peek(cpu_buffer, ts); | 
| 2584 | if (!event) | 3236 | if (event) | 
| 2585 | goto out_unlock; | 3237 | rb_advance_reader(cpu_buffer); | 
| 2586 | |||
| 2587 | rb_advance_reader(cpu_buffer); | ||
| 2588 | 3238 | ||
| 2589 | out_unlock: | ||
| 2590 | if (dolock) | 3239 | if (dolock) | 
| 2591 | spin_unlock(&cpu_buffer->reader_lock); | 3240 | spin_unlock(&cpu_buffer->reader_lock); | 
| 2592 | local_irq_restore(flags); | 3241 | local_irq_restore(flags); | 
| @@ -2594,10 +3243,8 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2594 | out: | 3243 | out: | 
| 2595 | preempt_enable(); | 3244 | preempt_enable(); | 
| 2596 | 3245 | ||
| 2597 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3246 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 
| 2598 | cpu_relax(); | ||
| 2599 | goto again; | 3247 | goto again; | 
| 2600 | } | ||
| 2601 | 3248 | ||
| 2602 | return event; | 3249 | return event; | 
| 2603 | } | 3250 | } | 
| @@ -2677,21 +3324,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2677 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3324 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 
| 2678 | unsigned long flags; | 3325 | unsigned long flags; | 
| 2679 | 3326 | ||
| 2680 | again: | ||
| 2681 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3327 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 
| 3328 | again: | ||
| 2682 | event = rb_iter_peek(iter, ts); | 3329 | event = rb_iter_peek(iter, ts); | 
| 2683 | if (!event) | 3330 | if (!event) | 
| 2684 | goto out; | 3331 | goto out; | 
| 2685 | 3332 | ||
| 3333 | if (event->type_len == RINGBUF_TYPE_PADDING) | ||
| 3334 | goto again; | ||
| 3335 | |||
| 2686 | rb_advance_iter(iter); | 3336 | rb_advance_iter(iter); | 
| 2687 | out: | 3337 | out: | 
| 2688 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3338 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 
| 2689 | 3339 | ||
| 2690 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | ||
| 2691 | cpu_relax(); | ||
| 2692 | goto again; | ||
| 2693 | } | ||
| 2694 | |||
| 2695 | return event; | 3340 | return event; | 
| 2696 | } | 3341 | } | 
| 2697 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 3342 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 
| @@ -2709,8 +3354,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size); | |||
| 2709 | static void | 3354 | static void | 
| 2710 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | 3355 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | 
| 2711 | { | 3356 | { | 
| 3357 | rb_head_page_deactivate(cpu_buffer); | ||
| 3358 | |||
| 2712 | cpu_buffer->head_page | 3359 | cpu_buffer->head_page | 
| 2713 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 3360 | = list_entry(cpu_buffer->pages, struct buffer_page, list); | 
| 2714 | local_set(&cpu_buffer->head_page->write, 0); | 3361 | local_set(&cpu_buffer->head_page->write, 0); | 
| 2715 | local_set(&cpu_buffer->head_page->entries, 0); | 3362 | local_set(&cpu_buffer->head_page->entries, 0); | 
| 2716 | local_set(&cpu_buffer->head_page->page->commit, 0); | 3363 | local_set(&cpu_buffer->head_page->page->commit, 0); | 
| @@ -2726,16 +3373,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2726 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 3373 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 
| 2727 | cpu_buffer->reader_page->read = 0; | 3374 | cpu_buffer->reader_page->read = 0; | 
| 2728 | 3375 | ||
| 2729 | cpu_buffer->nmi_dropped = 0; | 3376 | local_set(&cpu_buffer->commit_overrun, 0); | 
| 2730 | cpu_buffer->commit_overrun = 0; | 3377 | local_set(&cpu_buffer->overrun, 0); | 
| 2731 | cpu_buffer->overrun = 0; | ||
| 2732 | cpu_buffer->read = 0; | ||
| 2733 | local_set(&cpu_buffer->entries, 0); | 3378 | local_set(&cpu_buffer->entries, 0); | 
| 2734 | local_set(&cpu_buffer->committing, 0); | 3379 | local_set(&cpu_buffer->committing, 0); | 
| 2735 | local_set(&cpu_buffer->commits, 0); | 3380 | local_set(&cpu_buffer->commits, 0); | 
| 3381 | cpu_buffer->read = 0; | ||
| 2736 | 3382 | ||
| 2737 | cpu_buffer->write_stamp = 0; | 3383 | cpu_buffer->write_stamp = 0; | 
| 2738 | cpu_buffer->read_stamp = 0; | 3384 | cpu_buffer->read_stamp = 0; | 
| 3385 | |||
| 3386 | rb_head_page_activate(cpu_buffer); | ||
| 2739 | } | 3387 | } | 
| 2740 | 3388 | ||
| 2741 | /** | 3389 | /** | 
| @@ -2755,12 +3403,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
| 2755 | 3403 | ||
| 2756 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3404 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 
| 2757 | 3405 | ||
| 3406 | if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) | ||
| 3407 | goto out; | ||
| 3408 | |||
| 2758 | __raw_spin_lock(&cpu_buffer->lock); | 3409 | __raw_spin_lock(&cpu_buffer->lock); | 
| 2759 | 3410 | ||
| 2760 | rb_reset_cpu(cpu_buffer); | 3411 | rb_reset_cpu(cpu_buffer); | 
| 2761 | 3412 | ||
| 2762 | __raw_spin_unlock(&cpu_buffer->lock); | 3413 | __raw_spin_unlock(&cpu_buffer->lock); | 
| 2763 | 3414 | ||
| 3415 | out: | ||
| 2764 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3416 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 
| 2765 | 3417 | ||
| 2766 | atomic_dec(&cpu_buffer->record_disabled); | 3418 | atomic_dec(&cpu_buffer->record_disabled); | 
| @@ -2843,6 +3495,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | |||
| 2843 | } | 3495 | } | 
| 2844 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); | 3496 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); | 
| 2845 | 3497 | ||
| 3498 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | ||
| 2846 | /** | 3499 | /** | 
| 2847 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers | 3500 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers | 
| 2848 | * @buffer_a: One buffer to swap with | 3501 | * @buffer_a: One buffer to swap with | 
| @@ -2897,20 +3550,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
| 2897 | atomic_inc(&cpu_buffer_a->record_disabled); | 3550 | atomic_inc(&cpu_buffer_a->record_disabled); | 
| 2898 | atomic_inc(&cpu_buffer_b->record_disabled); | 3551 | atomic_inc(&cpu_buffer_b->record_disabled); | 
| 2899 | 3552 | ||
| 3553 | ret = -EBUSY; | ||
| 3554 | if (local_read(&cpu_buffer_a->committing)) | ||
| 3555 | goto out_dec; | ||
| 3556 | if (local_read(&cpu_buffer_b->committing)) | ||
| 3557 | goto out_dec; | ||
| 3558 | |||
| 2900 | buffer_a->buffers[cpu] = cpu_buffer_b; | 3559 | buffer_a->buffers[cpu] = cpu_buffer_b; | 
| 2901 | buffer_b->buffers[cpu] = cpu_buffer_a; | 3560 | buffer_b->buffers[cpu] = cpu_buffer_a; | 
| 2902 | 3561 | ||
| 2903 | cpu_buffer_b->buffer = buffer_a; | 3562 | cpu_buffer_b->buffer = buffer_a; | 
| 2904 | cpu_buffer_a->buffer = buffer_b; | 3563 | cpu_buffer_a->buffer = buffer_b; | 
| 2905 | 3564 | ||
| 3565 | ret = 0; | ||
| 3566 | |||
| 3567 | out_dec: | ||
| 2906 | atomic_dec(&cpu_buffer_a->record_disabled); | 3568 | atomic_dec(&cpu_buffer_a->record_disabled); | 
| 2907 | atomic_dec(&cpu_buffer_b->record_disabled); | 3569 | atomic_dec(&cpu_buffer_b->record_disabled); | 
| 2908 | |||
| 2909 | ret = 0; | ||
| 2910 | out: | 3570 | out: | 
| 2911 | return ret; | 3571 | return ret; | 
| 2912 | } | 3572 | } | 
| 2913 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 3573 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 
| 3574 | #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ | ||
| 2914 | 3575 | ||
| 2915 | /** | 3576 | /** | 
| 2916 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 3577 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 
| @@ -3083,7 +3744,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3083 | read = 0; | 3744 | read = 0; | 
| 3084 | } else { | 3745 | } else { | 
| 3085 | /* update the entry counter */ | 3746 | /* update the entry counter */ | 
| 3086 | cpu_buffer->read += local_read(&reader->entries); | 3747 | cpu_buffer->read += rb_page_entries(reader); | 
| 3087 | 3748 | ||
| 3088 | /* swap the pages */ | 3749 | /* swap the pages */ | 
| 3089 | rb_init_page(bpage); | 3750 | rb_init_page(bpage); | 
| @@ -3104,6 +3765,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3104 | } | 3765 | } | 
| 3105 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | 3766 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | 
| 3106 | 3767 | ||
| 3768 | #ifdef CONFIG_TRACING | ||
| 3107 | static ssize_t | 3769 | static ssize_t | 
| 3108 | rb_simple_read(struct file *filp, char __user *ubuf, | 3770 | rb_simple_read(struct file *filp, char __user *ubuf, | 
| 3109 | size_t cnt, loff_t *ppos) | 3771 | size_t cnt, loff_t *ppos) | 
| @@ -3171,6 +3833,7 @@ static __init int rb_init_debugfs(void) | |||
| 3171 | } | 3833 | } | 
| 3172 | 3834 | ||
| 3173 | fs_initcall(rb_init_debugfs); | 3835 | fs_initcall(rb_init_debugfs); | 
| 3836 | #endif | ||
| 3174 | 3837 | ||
| 3175 | #ifdef CONFIG_HOTPLUG_CPU | 3838 | #ifdef CONFIG_HOTPLUG_CPU | 
| 3176 | static int rb_cpu_notify(struct notifier_block *self, | 3839 | static int rb_cpu_notify(struct notifier_block *self, | 
