diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2010-07-01 10:20:36 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2010-08-18 19:32:31 -0400 |
commit | 927c7a9e92c4f69097a6e9e086d11fc2f8a5b40b (patch) | |
tree | d98bde726caf6b27d465852b5683cf08485df007 /kernel/perf_event.c | |
parent | f72c1a931e311bb7780fee19e41a89ac42cab50e (diff) |
perf: Fix race in callchains
Now that software events don't have interrupt disabled anymore in
the event path, callchains can nest on any context. So seperating
nmi and others contexts in two buffers has become racy.
Fix this by providing one buffer per nesting level. Given the size
of the callchain entries (2040 bytes * 4), we now need to allocate
them dynamically.
v2: Fixed put_callchain_entry call after recursion.
Fix the type of the recursion, it must be an array.
v3: Use a manual pr cpu allocation (temporary solution until NMIs
can safely access vmalloc'ed memory).
Do a better separation between callchain reference tracking and
allocation. Make the "put" path lockless for non-release cases.
v4: Protect the callchain buffers with rcu.
v5: Do the cpu buffers allocations node affine.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David Miller <davem@davemloft.net>
Cc: Borislav Petkov <bp@amd64.org>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 298 |
1 files changed, 229 insertions, 69 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 615d024894cf..75ab8a2df6b2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -1764,6 +1764,216 @@ static u64 perf_event_read(struct perf_event *event) | |||
1764 | } | 1764 | } |
1765 | 1765 | ||
1766 | /* | 1766 | /* |
1767 | * Callchain support | ||
1768 | */ | ||
1769 | |||
1770 | struct callchain_cpus_entries { | ||
1771 | struct rcu_head rcu_head; | ||
1772 | struct perf_callchain_entry *cpu_entries[0]; | ||
1773 | }; | ||
1774 | |||
1775 | static DEFINE_PER_CPU(int, callchain_recursion[4]); | ||
1776 | static atomic_t nr_callchain_events; | ||
1777 | static DEFINE_MUTEX(callchain_mutex); | ||
1778 | struct callchain_cpus_entries *callchain_cpus_entries; | ||
1779 | |||
1780 | |||
1781 | __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
1782 | struct pt_regs *regs) | ||
1783 | { | ||
1784 | } | ||
1785 | |||
1786 | __weak void perf_callchain_user(struct perf_callchain_entry *entry, | ||
1787 | struct pt_regs *regs) | ||
1788 | { | ||
1789 | } | ||
1790 | |||
1791 | static void release_callchain_buffers_rcu(struct rcu_head *head) | ||
1792 | { | ||
1793 | struct callchain_cpus_entries *entries; | ||
1794 | int cpu; | ||
1795 | |||
1796 | entries = container_of(head, struct callchain_cpus_entries, rcu_head); | ||
1797 | |||
1798 | for_each_possible_cpu(cpu) | ||
1799 | kfree(entries->cpu_entries[cpu]); | ||
1800 | |||
1801 | kfree(entries); | ||
1802 | } | ||
1803 | |||
1804 | static void release_callchain_buffers(void) | ||
1805 | { | ||
1806 | struct callchain_cpus_entries *entries; | ||
1807 | |||
1808 | entries = callchain_cpus_entries; | ||
1809 | rcu_assign_pointer(callchain_cpus_entries, NULL); | ||
1810 | call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); | ||
1811 | } | ||
1812 | |||
1813 | static int alloc_callchain_buffers(void) | ||
1814 | { | ||
1815 | int cpu; | ||
1816 | int size; | ||
1817 | struct callchain_cpus_entries *entries; | ||
1818 | |||
1819 | /* | ||
1820 | * We can't use the percpu allocation API for data that can be | ||
1821 | * accessed from NMI. Use a temporary manual per cpu allocation | ||
1822 | * until that gets sorted out. | ||
1823 | */ | ||
1824 | size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * | ||
1825 | num_possible_cpus(); | ||
1826 | |||
1827 | entries = kzalloc(size, GFP_KERNEL); | ||
1828 | if (!entries) | ||
1829 | return -ENOMEM; | ||
1830 | |||
1831 | size = sizeof(struct perf_callchain_entry) * 4; | ||
1832 | |||
1833 | for_each_possible_cpu(cpu) { | ||
1834 | entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, | ||
1835 | cpu_to_node(cpu)); | ||
1836 | if (!entries->cpu_entries[cpu]) | ||
1837 | goto fail; | ||
1838 | } | ||
1839 | |||
1840 | rcu_assign_pointer(callchain_cpus_entries, entries); | ||
1841 | |||
1842 | return 0; | ||
1843 | |||
1844 | fail: | ||
1845 | for_each_possible_cpu(cpu) | ||
1846 | kfree(entries->cpu_entries[cpu]); | ||
1847 | kfree(entries); | ||
1848 | |||
1849 | return -ENOMEM; | ||
1850 | } | ||
1851 | |||
1852 | static int get_callchain_buffers(void) | ||
1853 | { | ||
1854 | int err = 0; | ||
1855 | int count; | ||
1856 | |||
1857 | mutex_lock(&callchain_mutex); | ||
1858 | |||
1859 | count = atomic_inc_return(&nr_callchain_events); | ||
1860 | if (WARN_ON_ONCE(count < 1)) { | ||
1861 | err = -EINVAL; | ||
1862 | goto exit; | ||
1863 | } | ||
1864 | |||
1865 | if (count > 1) { | ||
1866 | /* If the allocation failed, give up */ | ||
1867 | if (!callchain_cpus_entries) | ||
1868 | err = -ENOMEM; | ||
1869 | goto exit; | ||
1870 | } | ||
1871 | |||
1872 | err = alloc_callchain_buffers(); | ||
1873 | if (err) | ||
1874 | release_callchain_buffers(); | ||
1875 | exit: | ||
1876 | mutex_unlock(&callchain_mutex); | ||
1877 | |||
1878 | return err; | ||
1879 | } | ||
1880 | |||
1881 | static void put_callchain_buffers(void) | ||
1882 | { | ||
1883 | if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { | ||
1884 | release_callchain_buffers(); | ||
1885 | mutex_unlock(&callchain_mutex); | ||
1886 | } | ||
1887 | } | ||
1888 | |||
1889 | static int get_recursion_context(int *recursion) | ||
1890 | { | ||
1891 | int rctx; | ||
1892 | |||
1893 | if (in_nmi()) | ||
1894 | rctx = 3; | ||
1895 | else if (in_irq()) | ||
1896 | rctx = 2; | ||
1897 | else if (in_softirq()) | ||
1898 | rctx = 1; | ||
1899 | else | ||
1900 | rctx = 0; | ||
1901 | |||
1902 | if (recursion[rctx]) | ||
1903 | return -1; | ||
1904 | |||
1905 | recursion[rctx]++; | ||
1906 | barrier(); | ||
1907 | |||
1908 | return rctx; | ||
1909 | } | ||
1910 | |||
1911 | static inline void put_recursion_context(int *recursion, int rctx) | ||
1912 | { | ||
1913 | barrier(); | ||
1914 | recursion[rctx]--; | ||
1915 | } | ||
1916 | |||
1917 | static struct perf_callchain_entry *get_callchain_entry(int *rctx) | ||
1918 | { | ||
1919 | int cpu; | ||
1920 | struct callchain_cpus_entries *entries; | ||
1921 | |||
1922 | *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); | ||
1923 | if (*rctx == -1) | ||
1924 | return NULL; | ||
1925 | |||
1926 | entries = rcu_dereference(callchain_cpus_entries); | ||
1927 | if (!entries) | ||
1928 | return NULL; | ||
1929 | |||
1930 | cpu = smp_processor_id(); | ||
1931 | |||
1932 | return &entries->cpu_entries[cpu][*rctx]; | ||
1933 | } | ||
1934 | |||
1935 | static void | ||
1936 | put_callchain_entry(int rctx) | ||
1937 | { | ||
1938 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); | ||
1939 | } | ||
1940 | |||
1941 | static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1942 | { | ||
1943 | int rctx; | ||
1944 | struct perf_callchain_entry *entry; | ||
1945 | |||
1946 | |||
1947 | entry = get_callchain_entry(&rctx); | ||
1948 | if (rctx == -1) | ||
1949 | return NULL; | ||
1950 | |||
1951 | if (!entry) | ||
1952 | goto exit_put; | ||
1953 | |||
1954 | entry->nr = 0; | ||
1955 | |||
1956 | if (!user_mode(regs)) { | ||
1957 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
1958 | perf_callchain_kernel(entry, regs); | ||
1959 | if (current->mm) | ||
1960 | regs = task_pt_regs(current); | ||
1961 | else | ||
1962 | regs = NULL; | ||
1963 | } | ||
1964 | |||
1965 | if (regs) { | ||
1966 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
1967 | perf_callchain_user(entry, regs); | ||
1968 | } | ||
1969 | |||
1970 | exit_put: | ||
1971 | put_callchain_entry(rctx); | ||
1972 | |||
1973 | return entry; | ||
1974 | } | ||
1975 | |||
1976 | /* | ||
1767 | * Initialize the perf_event context in a task_struct: | 1977 | * Initialize the perf_event context in a task_struct: |
1768 | */ | 1978 | */ |
1769 | static void | 1979 | static void |
@@ -1895,6 +2105,8 @@ static void free_event(struct perf_event *event) | |||
1895 | atomic_dec(&nr_comm_events); | 2105 | atomic_dec(&nr_comm_events); |
1896 | if (event->attr.task) | 2106 | if (event->attr.task) |
1897 | atomic_dec(&nr_task_events); | 2107 | atomic_dec(&nr_task_events); |
2108 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) | ||
2109 | put_callchain_buffers(); | ||
1898 | } | 2110 | } |
1899 | 2111 | ||
1900 | if (event->buffer) { | 2112 | if (event->buffer) { |
@@ -2937,55 +3149,6 @@ void perf_event_do_pending(void) | |||
2937 | __perf_pending_run(); | 3149 | __perf_pending_run(); |
2938 | } | 3150 | } |
2939 | 3151 | ||
2940 | DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); | ||
2941 | |||
2942 | /* | ||
2943 | * Callchain support -- arch specific | ||
2944 | */ | ||
2945 | |||
2946 | __weak struct perf_callchain_entry *perf_callchain_buffer(void) | ||
2947 | { | ||
2948 | return &__get_cpu_var(perf_callchain_entry); | ||
2949 | } | ||
2950 | |||
2951 | __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
2952 | struct pt_regs *regs) | ||
2953 | { | ||
2954 | } | ||
2955 | |||
2956 | __weak void perf_callchain_user(struct perf_callchain_entry *entry, | ||
2957 | struct pt_regs *regs) | ||
2958 | { | ||
2959 | } | ||
2960 | |||
2961 | static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
2962 | { | ||
2963 | struct perf_callchain_entry *entry; | ||
2964 | |||
2965 | entry = perf_callchain_buffer(); | ||
2966 | if (!entry) | ||
2967 | return NULL; | ||
2968 | |||
2969 | entry->nr = 0; | ||
2970 | |||
2971 | if (!user_mode(regs)) { | ||
2972 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
2973 | perf_callchain_kernel(entry, regs); | ||
2974 | if (current->mm) | ||
2975 | regs = task_pt_regs(current); | ||
2976 | else | ||
2977 | regs = NULL; | ||
2978 | } | ||
2979 | |||
2980 | if (regs) { | ||
2981 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
2982 | perf_callchain_user(entry, regs); | ||
2983 | } | ||
2984 | |||
2985 | return entry; | ||
2986 | } | ||
2987 | |||
2988 | |||
2989 | /* | 3152 | /* |
2990 | * We assume there is only KVM supporting the callbacks. | 3153 | * We assume there is only KVM supporting the callbacks. |
2991 | * Later on, we might change it to a list if there is | 3154 | * Later on, we might change it to a list if there is |
@@ -3480,14 +3643,20 @@ static void perf_event_output(struct perf_event *event, int nmi, | |||
3480 | struct perf_output_handle handle; | 3643 | struct perf_output_handle handle; |
3481 | struct perf_event_header header; | 3644 | struct perf_event_header header; |
3482 | 3645 | ||
3646 | /* protect the callchain buffers */ | ||
3647 | rcu_read_lock(); | ||
3648 | |||
3483 | perf_prepare_sample(&header, data, event, regs); | 3649 | perf_prepare_sample(&header, data, event, regs); |
3484 | 3650 | ||
3485 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) | 3651 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) |
3486 | return; | 3652 | goto exit; |
3487 | 3653 | ||
3488 | perf_output_sample(&handle, &header, data, event); | 3654 | perf_output_sample(&handle, &header, data, event); |
3489 | 3655 | ||
3490 | perf_output_end(&handle); | 3656 | perf_output_end(&handle); |
3657 | |||
3658 | exit: | ||
3659 | rcu_read_unlock(); | ||
3491 | } | 3660 | } |
3492 | 3661 | ||
3493 | /* | 3662 | /* |
@@ -4243,32 +4412,16 @@ end: | |||
4243 | int perf_swevent_get_recursion_context(void) | 4412 | int perf_swevent_get_recursion_context(void) |
4244 | { | 4413 | { |
4245 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4414 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
4246 | int rctx; | ||
4247 | 4415 | ||
4248 | if (in_nmi()) | 4416 | return get_recursion_context(cpuctx->recursion); |
4249 | rctx = 3; | ||
4250 | else if (in_irq()) | ||
4251 | rctx = 2; | ||
4252 | else if (in_softirq()) | ||
4253 | rctx = 1; | ||
4254 | else | ||
4255 | rctx = 0; | ||
4256 | |||
4257 | if (cpuctx->recursion[rctx]) | ||
4258 | return -1; | ||
4259 | |||
4260 | cpuctx->recursion[rctx]++; | ||
4261 | barrier(); | ||
4262 | |||
4263 | return rctx; | ||
4264 | } | 4417 | } |
4265 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4418 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
4266 | 4419 | ||
4267 | void inline perf_swevent_put_recursion_context(int rctx) | 4420 | void inline perf_swevent_put_recursion_context(int rctx) |
4268 | { | 4421 | { |
4269 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4422 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
4270 | barrier(); | 4423 | |
4271 | cpuctx->recursion[rctx]--; | 4424 | put_recursion_context(cpuctx->recursion, rctx); |
4272 | } | 4425 | } |
4273 | 4426 | ||
4274 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4427 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
@@ -4968,6 +5121,13 @@ done: | |||
4968 | atomic_inc(&nr_comm_events); | 5121 | atomic_inc(&nr_comm_events); |
4969 | if (event->attr.task) | 5122 | if (event->attr.task) |
4970 | atomic_inc(&nr_task_events); | 5123 | atomic_inc(&nr_task_events); |
5124 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { | ||
5125 | err = get_callchain_buffers(); | ||
5126 | if (err) { | ||
5127 | free_event(event); | ||
5128 | return ERR_PTR(err); | ||
5129 | } | ||
5130 | } | ||
4971 | } | 5131 | } |
4972 | 5132 | ||
4973 | return event; | 5133 | return event; |