aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Metzger <markus.t.metzger@intel.com>2009-09-15 07:00:23 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-18 14:43:20 -0400
commit5622f295b53fb60dbf9bed3e2c89d182490a8b7f (patch)
tree8279554bddd1607d53dc06e97f4b5a1d0c085ccd
parent4b77a7297795229eca96c41e1709a3c87909fabe (diff)
x86, perf_counter, bts: Optimize BTS overflow handling
Draining the BTS buffer on a buffer overflow interrupt takes too long resulting in a kernel lockup when tracing the kernel. Restructure perf_counter sampling into sample creation and sample output. Prepare a single reference sample for BTS sampling and update the from and to address fields when draining the BTS buffer. Drain the entire BTS buffer between a single perf_output_begin() / perf_output_end() pair. Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20090915130023.A16204@sedona.ch.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c60
-rw-r--r--include/linux/perf_counter.h68
-rw-r--r--kernel/perf_counter.c312
3 files changed, 266 insertions, 174 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f9cd0849bd42..6a0e71b38126 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
36#define BTS_RECORD_SIZE 24 36#define BTS_RECORD_SIZE 24
37 37
38/* The size of a per-cpu BTS buffer in bytes: */ 38/* The size of a per-cpu BTS buffer in bytes: */
39#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) 39#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
40 40
41/* The BTS overflow threshold in bytes from the end of the buffer: */ 41/* The BTS overflow threshold in bytes from the end of the buffer: */
42#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) 42#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
43 43
44 44
45/* 45/*
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
1488 local_irq_restore(flags); 1488 local_irq_restore(flags);
1489} 1489}
1490 1490
1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, 1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
1492 struct perf_sample_data *data)
1493{ 1492{
1494 struct debug_store *ds = cpuc->ds; 1493 struct debug_store *ds = cpuc->ds;
1495 struct bts_record { 1494 struct bts_record {
@@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
1498 u64 flags; 1497 u64 flags;
1499 }; 1498 };
1500 struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; 1499 struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
1501 unsigned long orig_ip = data->regs->ip;
1502 struct bts_record *at, *top; 1500 struct bts_record *at, *top;
1501 struct perf_output_handle handle;
1502 struct perf_event_header header;
1503 struct perf_sample_data data;
1504 struct pt_regs regs;
1503 1505
1504 if (!counter) 1506 if (!counter)
1505 return; 1507 return;
@@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
1510 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 1512 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1511 top = (struct bts_record *)(unsigned long)ds->bts_index; 1513 top = (struct bts_record *)(unsigned long)ds->bts_index;
1512 1514
1515 if (top <= at)
1516 return;
1517
1513 ds->bts_index = ds->bts_buffer_base; 1518 ds->bts_index = ds->bts_buffer_base;
1514 1519
1520
1521 data.period = counter->hw.last_period;
1522 data.addr = 0;
1523 regs.ip = 0;
1524
1525 /*
1526 * Prepare a generic sample, i.e. fill in the invariant fields.
1527 * We will overwrite the from and to address before we output
1528 * the sample.
1529 */
1530 perf_prepare_sample(&header, &data, counter, &regs);
1531
1532 if (perf_output_begin(&handle, counter,
1533 header.size * (top - at), 1, 1))
1534 return;
1535
1515 for (; at < top; at++) { 1536 for (; at < top; at++) {
1516 data->regs->ip = at->from; 1537 data.ip = at->from;
1517 data->addr = at->to; 1538 data.addr = at->to;
1518 1539
1519 perf_counter_output(counter, 1, data); 1540 perf_output_sample(&handle, &header, &data, counter);
1520 } 1541 }
1521 1542
1522 data->regs->ip = orig_ip; 1543 perf_output_end(&handle);
1523 data->addr = 0;
1524 1544
1525 /* There's new data available. */ 1545 /* There's new data available. */
1546 counter->hw.interrupts++;
1526 counter->pending_kill = POLL_IN; 1547 counter->pending_kill = POLL_IN;
1527} 1548}
1528 1549
@@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
1552 x86_perf_counter_update(counter, hwc, idx); 1573 x86_perf_counter_update(counter, hwc, idx);
1553 1574
1554 /* Drain the remaining BTS records. */ 1575 /* Drain the remaining BTS records. */
1555 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 1576 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1556 struct perf_sample_data data; 1577 intel_pmu_drain_bts_buffer(cpuc);
1557 struct pt_regs regs;
1558 1578
1559 data.regs = &regs;
1560 intel_pmu_drain_bts_buffer(cpuc, &data);
1561 }
1562 cpuc->counters[idx] = NULL; 1579 cpuc->counters[idx] = NULL;
1563 clear_bit(idx, cpuc->used_mask); 1580 clear_bit(idx, cpuc->used_mask);
1564 1581
@@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
1619 int idx, handled = 0; 1636 int idx, handled = 0;
1620 u64 val; 1637 u64 val;
1621 1638
1622 data.regs = regs;
1623 data.addr = 0; 1639 data.addr = 0;
1624 1640
1625 cpuc = &__get_cpu_var(cpu_hw_counters); 1641 cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
1644 if (!x86_perf_counter_set_period(counter, hwc, idx)) 1660 if (!x86_perf_counter_set_period(counter, hwc, idx))
1645 continue; 1661 continue;
1646 1662
1647 if (perf_counter_overflow(counter, 1, &data)) 1663 if (perf_counter_overflow(counter, 1, &data, regs))
1648 p6_pmu_disable_counter(hwc, idx); 1664 p6_pmu_disable_counter(hwc, idx);
1649 } 1665 }
1650 1666
@@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1665 int bit, loops; 1681 int bit, loops;
1666 u64 ack, status; 1682 u64 ack, status;
1667 1683
1668 data.regs = regs;
1669 data.addr = 0; 1684 data.addr = 0;
1670 1685
1671 cpuc = &__get_cpu_var(cpu_hw_counters); 1686 cpuc = &__get_cpu_var(cpu_hw_counters);
1672 1687
1673 perf_disable(); 1688 perf_disable();
1674 intel_pmu_drain_bts_buffer(cpuc, &data); 1689 intel_pmu_drain_bts_buffer(cpuc);
1675 status = intel_pmu_get_status(); 1690 status = intel_pmu_get_status();
1676 if (!status) { 1691 if (!status) {
1677 perf_enable(); 1692 perf_enable();
@@ -1702,7 +1717,7 @@ again:
1702 1717
1703 data.period = counter->hw.last_period; 1718 data.period = counter->hw.last_period;
1704 1719
1705 if (perf_counter_overflow(counter, 1, &data)) 1720 if (perf_counter_overflow(counter, 1, &data, regs))
1706 intel_pmu_disable_counter(&counter->hw, bit); 1721 intel_pmu_disable_counter(&counter->hw, bit);
1707 } 1722 }
1708 1723
@@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1729 int idx, handled = 0; 1744 int idx, handled = 0;
1730 u64 val; 1745 u64 val;
1731 1746
1732 data.regs = regs;
1733 data.addr = 0; 1747 data.addr = 0;
1734 1748
1735 cpuc = &__get_cpu_var(cpu_hw_counters); 1749 cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1754 if (!x86_perf_counter_set_period(counter, hwc, idx)) 1768 if (!x86_perf_counter_set_period(counter, hwc, idx))
1755 continue; 1769 continue;
1756 1770
1757 if (perf_counter_overflow(counter, 1, &data)) 1771 if (perf_counter_overflow(counter, 1, &data, regs))
1758 amd_pmu_disable_counter(hwc, idx); 1772 amd_pmu_disable_counter(hwc, idx);
1759 } 1773 }
1760 1774
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6c1ef72ea501..c7375f97aa19 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -691,6 +691,17 @@ struct perf_cpu_context {
691 int recursion[4]; 691 int recursion[4];
692}; 692};
693 693
694struct perf_output_handle {
695 struct perf_counter *counter;
696 struct perf_mmap_data *data;
697 unsigned long head;
698 unsigned long offset;
699 int nmi;
700 int sample;
701 int locked;
702 unsigned long flags;
703};
704
694#ifdef CONFIG_PERF_COUNTERS 705#ifdef CONFIG_PERF_COUNTERS
695 706
696/* 707/*
@@ -722,16 +733,38 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
722extern void perf_counter_update_userpage(struct perf_counter *counter); 733extern void perf_counter_update_userpage(struct perf_counter *counter);
723 734
724struct perf_sample_data { 735struct perf_sample_data {
725 struct pt_regs *regs; 736 u64 type;
737
738 u64 ip;
739 struct {
740 u32 pid;
741 u32 tid;
742 } tid_entry;
743 u64 time;
726 u64 addr; 744 u64 addr;
745 u64 id;
746 u64 stream_id;
747 struct {
748 u32 cpu;
749 u32 reserved;
750 } cpu_entry;
727 u64 period; 751 u64 period;
752 struct perf_callchain_entry *callchain;
728 struct perf_raw_record *raw; 753 struct perf_raw_record *raw;
729}; 754};
730 755
756extern void perf_output_sample(struct perf_output_handle *handle,
757 struct perf_event_header *header,
758 struct perf_sample_data *data,
759 struct perf_counter *counter);
760extern void perf_prepare_sample(struct perf_event_header *header,
761 struct perf_sample_data *data,
762 struct perf_counter *counter,
763 struct pt_regs *regs);
764
731extern int perf_counter_overflow(struct perf_counter *counter, int nmi, 765extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
732 struct perf_sample_data *data); 766 struct perf_sample_data *data,
733extern void perf_counter_output(struct perf_counter *counter, int nmi, 767 struct pt_regs *regs);
734 struct perf_sample_data *data);
735 768
736/* 769/*
737 * Return 1 for a software counter, 0 for a hardware counter 770 * Return 1 for a software counter, 0 for a hardware counter
@@ -781,6 +814,12 @@ extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
781#define perf_instruction_pointer(regs) instruction_pointer(regs) 814#define perf_instruction_pointer(regs) instruction_pointer(regs)
782#endif 815#endif
783 816
817extern int perf_output_begin(struct perf_output_handle *handle,
818 struct perf_counter *counter, unsigned int size,
819 int nmi, int sample);
820extern void perf_output_end(struct perf_output_handle *handle);
821extern void perf_output_copy(struct perf_output_handle *handle,
822 const void *buf, unsigned int len);
784#else 823#else
785static inline void 824static inline void
786perf_counter_task_sched_in(struct task_struct *task, int cpu) { } 825perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
@@ -807,7 +846,28 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { }
807static inline void perf_counter_comm(struct task_struct *tsk) { } 846static inline void perf_counter_comm(struct task_struct *tsk) { }
808static inline void perf_counter_fork(struct task_struct *tsk) { } 847static inline void perf_counter_fork(struct task_struct *tsk) { }
809static inline void perf_counter_init(void) { } 848static inline void perf_counter_init(void) { }
849
850static inline int
851perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
852 unsigned int size, int nmi, int sample) { }
853static inline void perf_output_end(struct perf_output_handle *handle) { }
854static inline void
855perf_output_copy(struct perf_output_handle *handle,
856 const void *buf, unsigned int len) { }
857static inline void
858perf_output_sample(struct perf_output_handle *handle,
859 struct perf_event_header *header,
860 struct perf_sample_data *data,
861 struct perf_counter *counter) { }
862static inline void
863perf_prepare_sample(struct perf_event_header *header,
864 struct perf_sample_data *data,
865 struct perf_counter *counter,
866 struct pt_regs *regs) { }
810#endif 867#endif
811 868
869#define perf_output_put(handle, x) \
870 perf_output_copy((handle), &(x), sizeof(x))
871
812#endif /* __KERNEL__ */ 872#endif /* __KERNEL__ */
813#endif /* _LINUX_PERF_COUNTER_H */ 873#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 29b73b6e8146..215845243a69 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2512,18 +2512,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2512/* 2512/*
2513 * Output 2513 * Output
2514 */ 2514 */
2515
2516struct perf_output_handle {
2517 struct perf_counter *counter;
2518 struct perf_mmap_data *data;
2519 unsigned long head;
2520 unsigned long offset;
2521 int nmi;
2522 int sample;
2523 int locked;
2524 unsigned long flags;
2525};
2526
2527static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, 2515static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2528 unsigned long offset, unsigned long head) 2516 unsigned long offset, unsigned long head)
2529{ 2517{
@@ -2633,8 +2621,8 @@ out:
2633 local_irq_restore(handle->flags); 2621 local_irq_restore(handle->flags);
2634} 2622}
2635 2623
2636static void perf_output_copy(struct perf_output_handle *handle, 2624void perf_output_copy(struct perf_output_handle *handle,
2637 const void *buf, unsigned int len) 2625 const void *buf, unsigned int len)
2638{ 2626{
2639 unsigned int pages_mask; 2627 unsigned int pages_mask;
2640 unsigned int offset; 2628 unsigned int offset;
@@ -2669,12 +2657,9 @@ static void perf_output_copy(struct perf_output_handle *handle,
2669 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); 2657 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
2670} 2658}
2671 2659
2672#define perf_output_put(handle, x) \ 2660int perf_output_begin(struct perf_output_handle *handle,
2673 perf_output_copy((handle), &(x), sizeof(x)) 2661 struct perf_counter *counter, unsigned int size,
2674 2662 int nmi, int sample)
2675static int perf_output_begin(struct perf_output_handle *handle,
2676 struct perf_counter *counter, unsigned int size,
2677 int nmi, int sample)
2678{ 2663{
2679 struct perf_counter *output_counter; 2664 struct perf_counter *output_counter;
2680 struct perf_mmap_data *data; 2665 struct perf_mmap_data *data;
@@ -2756,7 +2741,7 @@ out:
2756 return -ENOSPC; 2741 return -ENOSPC;
2757} 2742}
2758 2743
2759static void perf_output_end(struct perf_output_handle *handle) 2744void perf_output_end(struct perf_output_handle *handle)
2760{ 2745{
2761 struct perf_counter *counter = handle->counter; 2746 struct perf_counter *counter = handle->counter;
2762 struct perf_mmap_data *data = handle->data; 2747 struct perf_mmap_data *data = handle->data;
@@ -2870,82 +2855,151 @@ static void perf_output_read(struct perf_output_handle *handle,
2870 perf_output_read_one(handle, counter); 2855 perf_output_read_one(handle, counter);
2871} 2856}
2872 2857
2873void perf_counter_output(struct perf_counter *counter, int nmi, 2858void perf_output_sample(struct perf_output_handle *handle,
2874 struct perf_sample_data *data) 2859 struct perf_event_header *header,
2860 struct perf_sample_data *data,
2861 struct perf_counter *counter)
2862{
2863 u64 sample_type = data->type;
2864
2865 perf_output_put(handle, *header);
2866
2867 if (sample_type & PERF_SAMPLE_IP)
2868 perf_output_put(handle, data->ip);
2869
2870 if (sample_type & PERF_SAMPLE_TID)
2871 perf_output_put(handle, data->tid_entry);
2872
2873 if (sample_type & PERF_SAMPLE_TIME)
2874 perf_output_put(handle, data->time);
2875
2876 if (sample_type & PERF_SAMPLE_ADDR)
2877 perf_output_put(handle, data->addr);
2878
2879 if (sample_type & PERF_SAMPLE_ID)
2880 perf_output_put(handle, data->id);
2881
2882 if (sample_type & PERF_SAMPLE_STREAM_ID)
2883 perf_output_put(handle, data->stream_id);
2884
2885 if (sample_type & PERF_SAMPLE_CPU)
2886 perf_output_put(handle, data->cpu_entry);
2887
2888 if (sample_type & PERF_SAMPLE_PERIOD)
2889 perf_output_put(handle, data->period);
2890
2891 if (sample_type & PERF_SAMPLE_READ)
2892 perf_output_read(handle, counter);
2893
2894 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2895 if (data->callchain) {
2896 int size = 1;
2897
2898 if (data->callchain)
2899 size += data->callchain->nr;
2900
2901 size *= sizeof(u64);
2902
2903 perf_output_copy(handle, data->callchain, size);
2904 } else {
2905 u64 nr = 0;
2906 perf_output_put(handle, nr);
2907 }
2908 }
2909
2910 if (sample_type & PERF_SAMPLE_RAW) {
2911 if (data->raw) {
2912 perf_output_put(handle, data->raw->size);
2913 perf_output_copy(handle, data->raw->data,
2914 data->raw->size);
2915 } else {
2916 struct {
2917 u32 size;
2918 u32 data;
2919 } raw = {
2920 .size = sizeof(u32),
2921 .data = 0,
2922 };
2923 perf_output_put(handle, raw);
2924 }
2925 }
2926}
2927
2928void perf_prepare_sample(struct perf_event_header *header,
2929 struct perf_sample_data *data,
2930 struct perf_counter *counter,
2931 struct pt_regs *regs)
2875{ 2932{
2876 int ret;
2877 u64 sample_type = counter->attr.sample_type; 2933 u64 sample_type = counter->attr.sample_type;
2878 struct perf_output_handle handle;
2879 struct perf_event_header header;
2880 u64 ip;
2881 struct {
2882 u32 pid, tid;
2883 } tid_entry;
2884 struct perf_callchain_entry *callchain = NULL;
2885 int callchain_size = 0;
2886 u64 time;
2887 struct {
2888 u32 cpu, reserved;
2889 } cpu_entry;
2890 2934
2891 header.type = PERF_EVENT_SAMPLE; 2935 data->type = sample_type;
2892 header.size = sizeof(header);
2893 2936
2894 header.misc = 0; 2937 header->type = PERF_EVENT_SAMPLE;
2895 header.misc |= perf_misc_flags(data->regs); 2938 header->size = sizeof(*header);
2939
2940 header->misc = 0;
2941 header->misc |= perf_misc_flags(regs);
2896 2942
2897 if (sample_type & PERF_SAMPLE_IP) { 2943 if (sample_type & PERF_SAMPLE_IP) {
2898 ip = perf_instruction_pointer(data->regs); 2944 data->ip = perf_instruction_pointer(regs);
2899 header.size += sizeof(ip); 2945
2946 header->size += sizeof(data->ip);
2900 } 2947 }
2901 2948
2902 if (sample_type & PERF_SAMPLE_TID) { 2949 if (sample_type & PERF_SAMPLE_TID) {
2903 /* namespace issues */ 2950 /* namespace issues */
2904 tid_entry.pid = perf_counter_pid(counter, current); 2951 data->tid_entry.pid = perf_counter_pid(counter, current);
2905 tid_entry.tid = perf_counter_tid(counter, current); 2952 data->tid_entry.tid = perf_counter_tid(counter, current);
2906 2953
2907 header.size += sizeof(tid_entry); 2954 header->size += sizeof(data->tid_entry);
2908 } 2955 }
2909 2956
2910 if (sample_type & PERF_SAMPLE_TIME) { 2957 if (sample_type & PERF_SAMPLE_TIME) {
2911 /* 2958 /*
2912 * Maybe do better on x86 and provide cpu_clock_nmi() 2959 * Maybe do better on x86 and provide cpu_clock_nmi()
2913 */ 2960 */
2914 time = sched_clock(); 2961 data->time = sched_clock();
2915 2962
2916 header.size += sizeof(u64); 2963 header->size += sizeof(data->time);
2917 } 2964 }
2918 2965
2919 if (sample_type & PERF_SAMPLE_ADDR) 2966 if (sample_type & PERF_SAMPLE_ADDR)
2920 header.size += sizeof(u64); 2967 header->size += sizeof(data->addr);
2921 2968
2922 if (sample_type & PERF_SAMPLE_ID) 2969 if (sample_type & PERF_SAMPLE_ID) {
2923 header.size += sizeof(u64); 2970 data->id = primary_counter_id(counter);
2924 2971
2925 if (sample_type & PERF_SAMPLE_STREAM_ID) 2972 header->size += sizeof(data->id);
2926 header.size += sizeof(u64); 2973 }
2974
2975 if (sample_type & PERF_SAMPLE_STREAM_ID) {
2976 data->stream_id = counter->id;
2977
2978 header->size += sizeof(data->stream_id);
2979 }
2927 2980
2928 if (sample_type & PERF_SAMPLE_CPU) { 2981 if (sample_type & PERF_SAMPLE_CPU) {
2929 header.size += sizeof(cpu_entry); 2982 data->cpu_entry.cpu = raw_smp_processor_id();
2983 data->cpu_entry.reserved = 0;
2930 2984
2931 cpu_entry.cpu = raw_smp_processor_id(); 2985 header->size += sizeof(data->cpu_entry);
2932 cpu_entry.reserved = 0;
2933 } 2986 }
2934 2987
2935 if (sample_type & PERF_SAMPLE_PERIOD) 2988 if (sample_type & PERF_SAMPLE_PERIOD)
2936 header.size += sizeof(u64); 2989 header->size += sizeof(data->period);
2937 2990
2938 if (sample_type & PERF_SAMPLE_READ) 2991 if (sample_type & PERF_SAMPLE_READ)
2939 header.size += perf_counter_read_size(counter); 2992 header->size += perf_counter_read_size(counter);
2940 2993
2941 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2994 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2942 callchain = perf_callchain(data->regs); 2995 int size = 1;
2943 2996
2944 if (callchain) { 2997 data->callchain = perf_callchain(regs);
2945 callchain_size = (1 + callchain->nr) * sizeof(u64); 2998
2946 header.size += callchain_size; 2999 if (data->callchain)
2947 } else 3000 size += data->callchain->nr;
2948 header.size += sizeof(u64); 3001
3002 header->size += size * sizeof(u64);
2949 } 3003 }
2950 3004
2951 if (sample_type & PERF_SAMPLE_RAW) { 3005 if (sample_type & PERF_SAMPLE_RAW) {
@@ -2957,69 +3011,23 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
2957 size += sizeof(u32); 3011 size += sizeof(u32);
2958 3012
2959 WARN_ON_ONCE(size & (sizeof(u64)-1)); 3013 WARN_ON_ONCE(size & (sizeof(u64)-1));
2960 header.size += size; 3014 header->size += size;
2961 }
2962
2963 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
2964 if (ret)
2965 return;
2966
2967 perf_output_put(&handle, header);
2968
2969 if (sample_type & PERF_SAMPLE_IP)
2970 perf_output_put(&handle, ip);
2971
2972 if (sample_type & PERF_SAMPLE_TID)
2973 perf_output_put(&handle, tid_entry);
2974
2975 if (sample_type & PERF_SAMPLE_TIME)
2976 perf_output_put(&handle, time);
2977
2978 if (sample_type & PERF_SAMPLE_ADDR)
2979 perf_output_put(&handle, data->addr);
2980
2981 if (sample_type & PERF_SAMPLE_ID) {
2982 u64 id = primary_counter_id(counter);
2983
2984 perf_output_put(&handle, id);
2985 } 3015 }
3016}
2986 3017
2987 if (sample_type & PERF_SAMPLE_STREAM_ID) 3018static void perf_counter_output(struct perf_counter *counter, int nmi,
2988 perf_output_put(&handle, counter->id); 3019 struct perf_sample_data *data,
2989 3020 struct pt_regs *regs)
2990 if (sample_type & PERF_SAMPLE_CPU) 3021{
2991 perf_output_put(&handle, cpu_entry); 3022 struct perf_output_handle handle;
2992 3023 struct perf_event_header header;
2993 if (sample_type & PERF_SAMPLE_PERIOD)
2994 perf_output_put(&handle, data->period);
2995 3024
2996 if (sample_type & PERF_SAMPLE_READ) 3025 perf_prepare_sample(&header, data, counter, regs);
2997 perf_output_read(&handle, counter);
2998 3026
2999 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 3027 if (perf_output_begin(&handle, counter, header.size, nmi, 1))
3000 if (callchain) 3028 return;
3001 perf_output_copy(&handle, callchain, callchain_size);
3002 else {
3003 u64 nr = 0;
3004 perf_output_put(&handle, nr);
3005 }
3006 }
3007 3029
3008 if (sample_type & PERF_SAMPLE_RAW) { 3030 perf_output_sample(&handle, &header, data, counter);
3009 if (data->raw) {
3010 perf_output_put(&handle, data->raw->size);
3011 perf_output_copy(&handle, data->raw->data, data->raw->size);
3012 } else {
3013 struct {
3014 u32 size;
3015 u32 data;
3016 } raw = {
3017 .size = sizeof(u32),
3018 .data = 0,
3019 };
3020 perf_output_put(&handle, raw);
3021 }
3022 }
3023 3031
3024 perf_output_end(&handle); 3032 perf_output_end(&handle);
3025} 3033}
@@ -3501,7 +3509,8 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3501 */ 3509 */
3502 3510
3503static int __perf_counter_overflow(struct perf_counter *counter, int nmi, 3511static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
3504 int throttle, struct perf_sample_data *data) 3512 int throttle, struct perf_sample_data *data,
3513 struct pt_regs *regs)
3505{ 3514{
3506 int events = atomic_read(&counter->event_limit); 3515 int events = atomic_read(&counter->event_limit);
3507 struct hw_perf_counter *hwc = &counter->hw; 3516 struct hw_perf_counter *hwc = &counter->hw;
@@ -3557,14 +3566,15 @@ static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
3557 perf_counter_disable(counter); 3566 perf_counter_disable(counter);
3558 } 3567 }
3559 3568
3560 perf_counter_output(counter, nmi, data); 3569 perf_counter_output(counter, nmi, data, regs);
3561 return ret; 3570 return ret;
3562} 3571}
3563 3572
3564int perf_counter_overflow(struct perf_counter *counter, int nmi, 3573int perf_counter_overflow(struct perf_counter *counter, int nmi,
3565 struct perf_sample_data *data) 3574 struct perf_sample_data *data,
3575 struct pt_regs *regs)
3566{ 3576{
3567 return __perf_counter_overflow(counter, nmi, 1, data); 3577 return __perf_counter_overflow(counter, nmi, 1, data, regs);
3568} 3578}
3569 3579
3570/* 3580/*
@@ -3602,7 +3612,8 @@ again:
3602} 3612}
3603 3613
3604static void perf_swcounter_overflow(struct perf_counter *counter, 3614static void perf_swcounter_overflow(struct perf_counter *counter,
3605 int nmi, struct perf_sample_data *data) 3615 int nmi, struct perf_sample_data *data,
3616 struct pt_regs *regs)
3606{ 3617{
3607 struct hw_perf_counter *hwc = &counter->hw; 3618 struct hw_perf_counter *hwc = &counter->hw;
3608 int throttle = 0; 3619 int throttle = 0;
@@ -3615,7 +3626,8 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
3615 return; 3626 return;
3616 3627
3617 for (; overflow; overflow--) { 3628 for (; overflow; overflow--) {
3618 if (__perf_counter_overflow(counter, nmi, throttle, data)) { 3629 if (__perf_counter_overflow(counter, nmi, throttle,
3630 data, regs)) {
3619 /* 3631 /*
3620 * We inhibit the overflow from happening when 3632 * We inhibit the overflow from happening when
3621 * hwc->interrupts == MAX_INTERRUPTS. 3633 * hwc->interrupts == MAX_INTERRUPTS.
@@ -3634,7 +3646,8 @@ static void perf_swcounter_unthrottle(struct perf_counter *counter)
3634} 3646}
3635 3647
3636static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 3648static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3637 int nmi, struct perf_sample_data *data) 3649 int nmi, struct perf_sample_data *data,
3650 struct pt_regs *regs)
3638{ 3651{
3639 struct hw_perf_counter *hwc = &counter->hw; 3652 struct hw_perf_counter *hwc = &counter->hw;
3640 3653
@@ -3643,11 +3656,11 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3643 if (!hwc->sample_period) 3656 if (!hwc->sample_period)
3644 return; 3657 return;
3645 3658
3646 if (!data->regs) 3659 if (!regs)
3647 return; 3660 return;
3648 3661
3649 if (!atomic64_add_negative(nr, &hwc->period_left)) 3662 if (!atomic64_add_negative(nr, &hwc->period_left))
3650 perf_swcounter_overflow(counter, nmi, data); 3663 perf_swcounter_overflow(counter, nmi, data, regs);
3651} 3664}
3652 3665
3653static int perf_swcounter_is_counting(struct perf_counter *counter) 3666static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3706,7 +3719,8 @@ static int perf_swcounter_match(struct perf_counter *counter,
3706static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 3719static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3707 enum perf_type_id type, 3720 enum perf_type_id type,
3708 u32 event, u64 nr, int nmi, 3721 u32 event, u64 nr, int nmi,
3709 struct perf_sample_data *data) 3722 struct perf_sample_data *data,
3723 struct pt_regs *regs)
3710{ 3724{
3711 struct perf_counter *counter; 3725 struct perf_counter *counter;
3712 3726
@@ -3715,8 +3729,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3715 3729
3716 rcu_read_lock(); 3730 rcu_read_lock();
3717 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 3731 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
3718 if (perf_swcounter_match(counter, type, event, data->regs)) 3732 if (perf_swcounter_match(counter, type, event, regs))
3719 perf_swcounter_add(counter, nr, nmi, data); 3733 perf_swcounter_add(counter, nr, nmi, data, regs);
3720 } 3734 }
3721 rcu_read_unlock(); 3735 rcu_read_unlock();
3722} 3736}
@@ -3737,7 +3751,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
3737 3751
3738static void do_perf_swcounter_event(enum perf_type_id type, u32 event, 3752static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3739 u64 nr, int nmi, 3753 u64 nr, int nmi,
3740 struct perf_sample_data *data) 3754 struct perf_sample_data *data,
3755 struct pt_regs *regs)
3741{ 3756{
3742 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3757 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3743 int *recursion = perf_swcounter_recursion_context(cpuctx); 3758 int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3750,7 +3765,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3750 barrier(); 3765 barrier();
3751 3766
3752 perf_swcounter_ctx_event(&cpuctx->ctx, type, event, 3767 perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
3753 nr, nmi, data); 3768 nr, nmi, data, regs);
3754 rcu_read_lock(); 3769 rcu_read_lock();
3755 /* 3770 /*
3756 * doesn't really matter which of the child contexts the 3771 * doesn't really matter which of the child contexts the
@@ -3758,7 +3773,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3758 */ 3773 */
3759 ctx = rcu_dereference(current->perf_counter_ctxp); 3774 ctx = rcu_dereference(current->perf_counter_ctxp);
3760 if (ctx) 3775 if (ctx)
3761 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data); 3776 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
3762 rcu_read_unlock(); 3777 rcu_read_unlock();
3763 3778
3764 barrier(); 3779 barrier();
@@ -3772,11 +3787,11 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3772 struct pt_regs *regs, u64 addr) 3787 struct pt_regs *regs, u64 addr)
3773{ 3788{
3774 struct perf_sample_data data = { 3789 struct perf_sample_data data = {
3775 .regs = regs,
3776 .addr = addr, 3790 .addr = addr,
3777 }; 3791 };
3778 3792
3779 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data); 3793 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
3794 &data, regs);
3780} 3795}
3781 3796
3782static void perf_swcounter_read(struct perf_counter *counter) 3797static void perf_swcounter_read(struct perf_counter *counter)
@@ -3813,6 +3828,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3813{ 3828{
3814 enum hrtimer_restart ret = HRTIMER_RESTART; 3829 enum hrtimer_restart ret = HRTIMER_RESTART;
3815 struct perf_sample_data data; 3830 struct perf_sample_data data;
3831 struct pt_regs *regs;
3816 struct perf_counter *counter; 3832 struct perf_counter *counter;
3817 u64 period; 3833 u64 period;
3818 3834
@@ -3820,17 +3836,17 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3820 counter->pmu->read(counter); 3836 counter->pmu->read(counter);
3821 3837
3822 data.addr = 0; 3838 data.addr = 0;
3823 data.regs = get_irq_regs(); 3839 regs = get_irq_regs();
3824 /* 3840 /*
3825 * In case we exclude kernel IPs or are somehow not in interrupt 3841 * In case we exclude kernel IPs or are somehow not in interrupt
3826 * context, provide the next best thing, the user IP. 3842 * context, provide the next best thing, the user IP.
3827 */ 3843 */
3828 if ((counter->attr.exclude_kernel || !data.regs) && 3844 if ((counter->attr.exclude_kernel || !regs) &&
3829 !counter->attr.exclude_user) 3845 !counter->attr.exclude_user)
3830 data.regs = task_pt_regs(current); 3846 regs = task_pt_regs(current);
3831 3847
3832 if (data.regs) { 3848 if (regs) {
3833 if (perf_counter_overflow(counter, 0, &data)) 3849 if (perf_counter_overflow(counter, 0, &data, regs))
3834 ret = HRTIMER_NORESTART; 3850 ret = HRTIMER_NORESTART;
3835 } 3851 }
3836 3852
@@ -3966,15 +3982,17 @@ void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
3966 }; 3982 };
3967 3983
3968 struct perf_sample_data data = { 3984 struct perf_sample_data data = {
3969 .regs = get_irq_regs(),
3970 .addr = addr, 3985 .addr = addr,
3971 .raw = &raw, 3986 .raw = &raw,
3972 }; 3987 };
3973 3988
3974 if (!data.regs) 3989 struct pt_regs *regs = get_irq_regs();
3975 data.regs = task_pt_regs(current); 3990
3991 if (!regs)
3992 regs = task_pt_regs(current);
3976 3993
3977 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data); 3994 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
3995 &data, regs);
3978} 3996}
3979EXPORT_SYMBOL_GPL(perf_tpcounter_event); 3997EXPORT_SYMBOL_GPL(perf_tpcounter_event);
3980 3998