diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-08-13 05:47:53 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-13 06:58:04 -0400 |
commit | 3dab77fb1bf89664bb1c9544607159dcab6f7d57 (patch) | |
tree | 9b0a47c53784cb066cecfd3091ac6042ce21a29e /kernel | |
parent | bcfc2602e8541ac13b1def38e2591dca072cff7a (diff) |
perf: Rework/fix the whole read vs group stuff
Replace PERF_SAMPLE_GROUP with PERF_SAMPLE_READ and introduce
PERF_FORMAT_GROUP to deal with group reads in a more generic
way.
This allows you to get group reads out of read() as well.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey J Ashford <cjashfor@us.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
LKML-Reference: <20090813103655.117411814@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/perf_counter.c | 274 |
1 files changed, 202 insertions, 72 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3dd4339589a0..b8c6b97a20a3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -1692,7 +1692,32 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1692 | return 0; | 1692 | return 0; |
1693 | } | 1693 | } |
1694 | 1694 | ||
1695 | static u64 perf_counter_read_tree(struct perf_counter *counter) | 1695 | static int perf_counter_read_size(struct perf_counter *counter) |
1696 | { | ||
1697 | int entry = sizeof(u64); /* value */ | ||
1698 | int size = 0; | ||
1699 | int nr = 1; | ||
1700 | |||
1701 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
1702 | size += sizeof(u64); | ||
1703 | |||
1704 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
1705 | size += sizeof(u64); | ||
1706 | |||
1707 | if (counter->attr.read_format & PERF_FORMAT_ID) | ||
1708 | entry += sizeof(u64); | ||
1709 | |||
1710 | if (counter->attr.read_format & PERF_FORMAT_GROUP) { | ||
1711 | nr += counter->group_leader->nr_siblings; | ||
1712 | size += sizeof(u64); | ||
1713 | } | ||
1714 | |||
1715 | size += entry * nr; | ||
1716 | |||
1717 | return size; | ||
1718 | } | ||
1719 | |||
1720 | static u64 perf_counter_read_value(struct perf_counter *counter) | ||
1696 | { | 1721 | { |
1697 | struct perf_counter *child; | 1722 | struct perf_counter *child; |
1698 | u64 total = 0; | 1723 | u64 total = 0; |
@@ -1704,14 +1729,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter) | |||
1704 | return total; | 1729 | return total; |
1705 | } | 1730 | } |
1706 | 1731 | ||
1732 | static int perf_counter_read_entry(struct perf_counter *counter, | ||
1733 | u64 read_format, char __user *buf) | ||
1734 | { | ||
1735 | int n = 0, count = 0; | ||
1736 | u64 values[2]; | ||
1737 | |||
1738 | values[n++] = perf_counter_read_value(counter); | ||
1739 | if (read_format & PERF_FORMAT_ID) | ||
1740 | values[n++] = primary_counter_id(counter); | ||
1741 | |||
1742 | count = n * sizeof(u64); | ||
1743 | |||
1744 | if (copy_to_user(buf, values, count)) | ||
1745 | return -EFAULT; | ||
1746 | |||
1747 | return count; | ||
1748 | } | ||
1749 | |||
1750 | static int perf_counter_read_group(struct perf_counter *counter, | ||
1751 | u64 read_format, char __user *buf) | ||
1752 | { | ||
1753 | struct perf_counter *leader = counter->group_leader, *sub; | ||
1754 | int n = 0, size = 0, err = -EFAULT; | ||
1755 | u64 values[3]; | ||
1756 | |||
1757 | values[n++] = 1 + leader->nr_siblings; | ||
1758 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | ||
1759 | values[n++] = leader->total_time_enabled + | ||
1760 | atomic64_read(&leader->child_total_time_enabled); | ||
1761 | } | ||
1762 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
1763 | values[n++] = leader->total_time_running + | ||
1764 | atomic64_read(&leader->child_total_time_running); | ||
1765 | } | ||
1766 | |||
1767 | size = n * sizeof(u64); | ||
1768 | |||
1769 | if (copy_to_user(buf, values, size)) | ||
1770 | return -EFAULT; | ||
1771 | |||
1772 | err = perf_counter_read_entry(leader, read_format, buf + size); | ||
1773 | if (err < 0) | ||
1774 | return err; | ||
1775 | |||
1776 | size += err; | ||
1777 | |||
1778 | list_for_each_entry(sub, &leader->sibling_list, list_entry) { | ||
1779 | err = perf_counter_read_entry(counter, read_format, | ||
1780 | buf + size); | ||
1781 | if (err < 0) | ||
1782 | return err; | ||
1783 | |||
1784 | size += err; | ||
1785 | } | ||
1786 | |||
1787 | return size; | ||
1788 | } | ||
1789 | |||
1790 | static int perf_counter_read_one(struct perf_counter *counter, | ||
1791 | u64 read_format, char __user *buf) | ||
1792 | { | ||
1793 | u64 values[4]; | ||
1794 | int n = 0; | ||
1795 | |||
1796 | values[n++] = perf_counter_read_value(counter); | ||
1797 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | ||
1798 | values[n++] = counter->total_time_enabled + | ||
1799 | atomic64_read(&counter->child_total_time_enabled); | ||
1800 | } | ||
1801 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
1802 | values[n++] = counter->total_time_running + | ||
1803 | atomic64_read(&counter->child_total_time_running); | ||
1804 | } | ||
1805 | if (read_format & PERF_FORMAT_ID) | ||
1806 | values[n++] = primary_counter_id(counter); | ||
1807 | |||
1808 | if (copy_to_user(buf, values, n * sizeof(u64))) | ||
1809 | return -EFAULT; | ||
1810 | |||
1811 | return n * sizeof(u64); | ||
1812 | } | ||
1813 | |||
1707 | /* | 1814 | /* |
1708 | * Read the performance counter - simple non blocking version for now | 1815 | * Read the performance counter - simple non blocking version for now |
1709 | */ | 1816 | */ |
1710 | static ssize_t | 1817 | static ssize_t |
1711 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | 1818 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) |
1712 | { | 1819 | { |
1713 | u64 values[4]; | 1820 | u64 read_format = counter->attr.read_format; |
1714 | int n; | 1821 | int ret; |
1715 | 1822 | ||
1716 | /* | 1823 | /* |
1717 | * Return end-of-file for a read on a counter that is in | 1824 | * Return end-of-file for a read on a counter that is in |
@@ -1721,28 +1828,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | |||
1721 | if (counter->state == PERF_COUNTER_STATE_ERROR) | 1828 | if (counter->state == PERF_COUNTER_STATE_ERROR) |
1722 | return 0; | 1829 | return 0; |
1723 | 1830 | ||
1831 | if (count < perf_counter_read_size(counter)) | ||
1832 | return -ENOSPC; | ||
1833 | |||
1724 | WARN_ON_ONCE(counter->ctx->parent_ctx); | 1834 | WARN_ON_ONCE(counter->ctx->parent_ctx); |
1725 | mutex_lock(&counter->child_mutex); | 1835 | mutex_lock(&counter->child_mutex); |
1726 | values[0] = perf_counter_read_tree(counter); | 1836 | if (read_format & PERF_FORMAT_GROUP) |
1727 | n = 1; | 1837 | ret = perf_counter_read_group(counter, read_format, buf); |
1728 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 1838 | else |
1729 | values[n++] = counter->total_time_enabled + | 1839 | ret = perf_counter_read_one(counter, read_format, buf); |
1730 | atomic64_read(&counter->child_total_time_enabled); | ||
1731 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
1732 | values[n++] = counter->total_time_running + | ||
1733 | atomic64_read(&counter->child_total_time_running); | ||
1734 | if (counter->attr.read_format & PERF_FORMAT_ID) | ||
1735 | values[n++] = primary_counter_id(counter); | ||
1736 | mutex_unlock(&counter->child_mutex); | 1840 | mutex_unlock(&counter->child_mutex); |
1737 | 1841 | ||
1738 | if (count < n * sizeof(u64)) | 1842 | return ret; |
1739 | return -EINVAL; | ||
1740 | count = n * sizeof(u64); | ||
1741 | |||
1742 | if (copy_to_user(buf, values, count)) | ||
1743 | return -EFAULT; | ||
1744 | |||
1745 | return count; | ||
1746 | } | 1843 | } |
1747 | 1844 | ||
1748 | static ssize_t | 1845 | static ssize_t |
@@ -2631,6 +2728,79 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) | |||
2631 | return task_pid_nr_ns(p, counter->ns); | 2728 | return task_pid_nr_ns(p, counter->ns); |
2632 | } | 2729 | } |
2633 | 2730 | ||
2731 | static void perf_output_read_one(struct perf_output_handle *handle, | ||
2732 | struct perf_counter *counter) | ||
2733 | { | ||
2734 | u64 read_format = counter->attr.read_format; | ||
2735 | u64 values[4]; | ||
2736 | int n = 0; | ||
2737 | |||
2738 | values[n++] = atomic64_read(&counter->count); | ||
2739 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | ||
2740 | values[n++] = counter->total_time_enabled + | ||
2741 | atomic64_read(&counter->child_total_time_enabled); | ||
2742 | } | ||
2743 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
2744 | values[n++] = counter->total_time_running + | ||
2745 | atomic64_read(&counter->child_total_time_running); | ||
2746 | } | ||
2747 | if (read_format & PERF_FORMAT_ID) | ||
2748 | values[n++] = primary_counter_id(counter); | ||
2749 | |||
2750 | perf_output_copy(handle, values, n * sizeof(u64)); | ||
2751 | } | ||
2752 | |||
2753 | /* | ||
2754 | * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult. | ||
2755 | */ | ||
2756 | static void perf_output_read_group(struct perf_output_handle *handle, | ||
2757 | struct perf_counter *counter) | ||
2758 | { | ||
2759 | struct perf_counter *leader = counter->group_leader, *sub; | ||
2760 | u64 read_format = counter->attr.read_format; | ||
2761 | u64 values[5]; | ||
2762 | int n = 0; | ||
2763 | |||
2764 | values[n++] = 1 + leader->nr_siblings; | ||
2765 | |||
2766 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
2767 | values[n++] = leader->total_time_enabled; | ||
2768 | |||
2769 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
2770 | values[n++] = leader->total_time_running; | ||
2771 | |||
2772 | if (leader != counter) | ||
2773 | leader->pmu->read(leader); | ||
2774 | |||
2775 | values[n++] = atomic64_read(&leader->count); | ||
2776 | if (read_format & PERF_FORMAT_ID) | ||
2777 | values[n++] = primary_counter_id(leader); | ||
2778 | |||
2779 | perf_output_copy(handle, values, n * sizeof(u64)); | ||
2780 | |||
2781 | list_for_each_entry(sub, &leader->sibling_list, list_entry) { | ||
2782 | n = 0; | ||
2783 | |||
2784 | if (sub != counter) | ||
2785 | sub->pmu->read(sub); | ||
2786 | |||
2787 | values[n++] = atomic64_read(&sub->count); | ||
2788 | if (read_format & PERF_FORMAT_ID) | ||
2789 | values[n++] = primary_counter_id(sub); | ||
2790 | |||
2791 | perf_output_copy(handle, values, n * sizeof(u64)); | ||
2792 | } | ||
2793 | } | ||
2794 | |||
2795 | static void perf_output_read(struct perf_output_handle *handle, | ||
2796 | struct perf_counter *counter) | ||
2797 | { | ||
2798 | if (counter->attr.read_format & PERF_FORMAT_GROUP) | ||
2799 | perf_output_read_group(handle, counter); | ||
2800 | else | ||
2801 | perf_output_read_one(handle, counter); | ||
2802 | } | ||
2803 | |||
2634 | void perf_counter_output(struct perf_counter *counter, int nmi, | 2804 | void perf_counter_output(struct perf_counter *counter, int nmi, |
2635 | struct perf_sample_data *data) | 2805 | struct perf_sample_data *data) |
2636 | { | 2806 | { |
@@ -2642,10 +2812,6 @@ void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2642 | struct { | 2812 | struct { |
2643 | u32 pid, tid; | 2813 | u32 pid, tid; |
2644 | } tid_entry; | 2814 | } tid_entry; |
2645 | struct { | ||
2646 | u64 id; | ||
2647 | u64 counter; | ||
2648 | } group_entry; | ||
2649 | struct perf_callchain_entry *callchain = NULL; | 2815 | struct perf_callchain_entry *callchain = NULL; |
2650 | int callchain_size = 0; | 2816 | int callchain_size = 0; |
2651 | u64 time; | 2817 | u64 time; |
@@ -2700,10 +2866,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2700 | if (sample_type & PERF_SAMPLE_PERIOD) | 2866 | if (sample_type & PERF_SAMPLE_PERIOD) |
2701 | header.size += sizeof(u64); | 2867 | header.size += sizeof(u64); |
2702 | 2868 | ||
2703 | if (sample_type & PERF_SAMPLE_GROUP) { | 2869 | if (sample_type & PERF_SAMPLE_READ) |
2704 | header.size += sizeof(u64) + | 2870 | header.size += perf_counter_read_size(counter); |
2705 | counter->nr_siblings * sizeof(group_entry); | ||
2706 | } | ||
2707 | 2871 | ||
2708 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 2872 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
2709 | callchain = perf_callchain(data->regs); | 2873 | callchain = perf_callchain(data->regs); |
@@ -2760,26 +2924,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2760 | if (sample_type & PERF_SAMPLE_PERIOD) | 2924 | if (sample_type & PERF_SAMPLE_PERIOD) |
2761 | perf_output_put(&handle, data->period); | 2925 | perf_output_put(&handle, data->period); |
2762 | 2926 | ||
2763 | /* | 2927 | if (sample_type & PERF_SAMPLE_READ) |
2764 | * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. | 2928 | perf_output_read(&handle, counter); |
2765 | */ | ||
2766 | if (sample_type & PERF_SAMPLE_GROUP) { | ||
2767 | struct perf_counter *leader, *sub; | ||
2768 | u64 nr = counter->nr_siblings; | ||
2769 | |||
2770 | perf_output_put(&handle, nr); | ||
2771 | |||
2772 | leader = counter->group_leader; | ||
2773 | list_for_each_entry(sub, &leader->sibling_list, list_entry) { | ||
2774 | if (sub != counter) | ||
2775 | sub->pmu->read(sub); | ||
2776 | |||
2777 | group_entry.id = primary_counter_id(sub); | ||
2778 | group_entry.counter = atomic64_read(&sub->count); | ||
2779 | |||
2780 | perf_output_put(&handle, group_entry); | ||
2781 | } | ||
2782 | } | ||
2783 | 2929 | ||
2784 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 2930 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
2785 | if (callchain) | 2931 | if (callchain) |
@@ -2818,8 +2964,6 @@ struct perf_read_event { | |||
2818 | 2964 | ||
2819 | u32 pid; | 2965 | u32 pid; |
2820 | u32 tid; | 2966 | u32 tid; |
2821 | u64 value; | ||
2822 | u64 format[3]; | ||
2823 | }; | 2967 | }; |
2824 | 2968 | ||
2825 | static void | 2969 | static void |
@@ -2831,34 +2975,20 @@ perf_counter_read_event(struct perf_counter *counter, | |||
2831 | .header = { | 2975 | .header = { |
2832 | .type = PERF_EVENT_READ, | 2976 | .type = PERF_EVENT_READ, |
2833 | .misc = 0, | 2977 | .misc = 0, |
2834 | .size = sizeof(event) - sizeof(event.format), | 2978 | .size = sizeof(event) + perf_counter_read_size(counter), |
2835 | }, | 2979 | }, |
2836 | .pid = perf_counter_pid(counter, task), | 2980 | .pid = perf_counter_pid(counter, task), |
2837 | .tid = perf_counter_tid(counter, task), | 2981 | .tid = perf_counter_tid(counter, task), |
2838 | .value = atomic64_read(&counter->count), | ||
2839 | }; | 2982 | }; |
2840 | int ret, i = 0; | 2983 | int ret; |
2841 | |||
2842 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | ||
2843 | event.header.size += sizeof(u64); | ||
2844 | event.format[i++] = counter->total_time_enabled; | ||
2845 | } | ||
2846 | |||
2847 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
2848 | event.header.size += sizeof(u64); | ||
2849 | event.format[i++] = counter->total_time_running; | ||
2850 | } | ||
2851 | |||
2852 | if (counter->attr.read_format & PERF_FORMAT_ID) { | ||
2853 | event.header.size += sizeof(u64); | ||
2854 | event.format[i++] = primary_counter_id(counter); | ||
2855 | } | ||
2856 | 2984 | ||
2857 | ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); | 2985 | ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); |
2858 | if (ret) | 2986 | if (ret) |
2859 | return; | 2987 | return; |
2860 | 2988 | ||
2861 | perf_output_copy(&handle, &event, event.header.size); | 2989 | perf_output_put(&handle, event); |
2990 | perf_output_read(&handle, counter); | ||
2991 | |||
2862 | perf_output_end(&handle); | 2992 | perf_output_end(&handle); |
2863 | } | 2993 | } |
2864 | 2994 | ||
@@ -3921,9 +4051,9 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3921 | atomic64_set(&hwc->period_left, hwc->sample_period); | 4051 | atomic64_set(&hwc->period_left, hwc->sample_period); |
3922 | 4052 | ||
3923 | /* | 4053 | /* |
3924 | * we currently do not support PERF_SAMPLE_GROUP on inherited counters | 4054 | * we currently do not support PERF_FORMAT_GROUP on inherited counters |
3925 | */ | 4055 | */ |
3926 | if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) | 4056 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) |
3927 | goto done; | 4057 | goto done; |
3928 | 4058 | ||
3929 | switch (attr->type) { | 4059 | switch (attr->type) { |