aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-07-01 10:20:36 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-08-18 19:32:31 -0400
commit927c7a9e92c4f69097a6e9e086d11fc2f8a5b40b (patch)
treed98bde726caf6b27d465852b5683cf08485df007 /kernel/perf_event.c
parentf72c1a931e311bb7780fee19e41a89ac42cab50e (diff)
perf: Fix race in callchains
Now that software events don't have interrupt disabled anymore in the event path, callchains can nest on any context. So seperating nmi and others contexts in two buffers has become racy. Fix this by providing one buffer per nesting level. Given the size of the callchain entries (2040 bytes * 4), we now need to allocate them dynamically. v2: Fixed put_callchain_entry call after recursion. Fix the type of the recursion, it must be an array. v3: Use a manual pr cpu allocation (temporary solution until NMIs can safely access vmalloc'ed memory). Do a better separation between callchain reference tracking and allocation. Make the "put" path lockless for non-release cases. v4: Protect the callchain buffers with rcu. v5: Do the cpu buffers allocations node affine. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Tested-by: Will Deacon <will.deacon@arm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Stephane Eranian <eranian@google.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: David Miller <davem@davemloft.net> Cc: Borislav Petkov <bp@amd64.org>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c298
1 files changed, 229 insertions, 69 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 615d024894cf..75ab8a2df6b2 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1764,6 +1764,216 @@ static u64 perf_event_read(struct perf_event *event)
1764} 1764}
1765 1765
1766/* 1766/*
1767 * Callchain support
1768 */
1769
1770struct callchain_cpus_entries {
1771 struct rcu_head rcu_head;
1772 struct perf_callchain_entry *cpu_entries[0];
1773};
1774
1775static DEFINE_PER_CPU(int, callchain_recursion[4]);
1776static atomic_t nr_callchain_events;
1777static DEFINE_MUTEX(callchain_mutex);
1778struct callchain_cpus_entries *callchain_cpus_entries;
1779
1780
1781__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
1782 struct pt_regs *regs)
1783{
1784}
1785
1786__weak void perf_callchain_user(struct perf_callchain_entry *entry,
1787 struct pt_regs *regs)
1788{
1789}
1790
1791static void release_callchain_buffers_rcu(struct rcu_head *head)
1792{
1793 struct callchain_cpus_entries *entries;
1794 int cpu;
1795
1796 entries = container_of(head, struct callchain_cpus_entries, rcu_head);
1797
1798 for_each_possible_cpu(cpu)
1799 kfree(entries->cpu_entries[cpu]);
1800
1801 kfree(entries);
1802}
1803
1804static void release_callchain_buffers(void)
1805{
1806 struct callchain_cpus_entries *entries;
1807
1808 entries = callchain_cpus_entries;
1809 rcu_assign_pointer(callchain_cpus_entries, NULL);
1810 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
1811}
1812
1813static int alloc_callchain_buffers(void)
1814{
1815 int cpu;
1816 int size;
1817 struct callchain_cpus_entries *entries;
1818
1819 /*
1820 * We can't use the percpu allocation API for data that can be
1821 * accessed from NMI. Use a temporary manual per cpu allocation
1822 * until that gets sorted out.
1823 */
1824 size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
1825 num_possible_cpus();
1826
1827 entries = kzalloc(size, GFP_KERNEL);
1828 if (!entries)
1829 return -ENOMEM;
1830
1831 size = sizeof(struct perf_callchain_entry) * 4;
1832
1833 for_each_possible_cpu(cpu) {
1834 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
1835 cpu_to_node(cpu));
1836 if (!entries->cpu_entries[cpu])
1837 goto fail;
1838 }
1839
1840 rcu_assign_pointer(callchain_cpus_entries, entries);
1841
1842 return 0;
1843
1844fail:
1845 for_each_possible_cpu(cpu)
1846 kfree(entries->cpu_entries[cpu]);
1847 kfree(entries);
1848
1849 return -ENOMEM;
1850}
1851
1852static int get_callchain_buffers(void)
1853{
1854 int err = 0;
1855 int count;
1856
1857 mutex_lock(&callchain_mutex);
1858
1859 count = atomic_inc_return(&nr_callchain_events);
1860 if (WARN_ON_ONCE(count < 1)) {
1861 err = -EINVAL;
1862 goto exit;
1863 }
1864
1865 if (count > 1) {
1866 /* If the allocation failed, give up */
1867 if (!callchain_cpus_entries)
1868 err = -ENOMEM;
1869 goto exit;
1870 }
1871
1872 err = alloc_callchain_buffers();
1873 if (err)
1874 release_callchain_buffers();
1875exit:
1876 mutex_unlock(&callchain_mutex);
1877
1878 return err;
1879}
1880
1881static void put_callchain_buffers(void)
1882{
1883 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
1884 release_callchain_buffers();
1885 mutex_unlock(&callchain_mutex);
1886 }
1887}
1888
1889static int get_recursion_context(int *recursion)
1890{
1891 int rctx;
1892
1893 if (in_nmi())
1894 rctx = 3;
1895 else if (in_irq())
1896 rctx = 2;
1897 else if (in_softirq())
1898 rctx = 1;
1899 else
1900 rctx = 0;
1901
1902 if (recursion[rctx])
1903 return -1;
1904
1905 recursion[rctx]++;
1906 barrier();
1907
1908 return rctx;
1909}
1910
1911static inline void put_recursion_context(int *recursion, int rctx)
1912{
1913 barrier();
1914 recursion[rctx]--;
1915}
1916
1917static struct perf_callchain_entry *get_callchain_entry(int *rctx)
1918{
1919 int cpu;
1920 struct callchain_cpus_entries *entries;
1921
1922 *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
1923 if (*rctx == -1)
1924 return NULL;
1925
1926 entries = rcu_dereference(callchain_cpus_entries);
1927 if (!entries)
1928 return NULL;
1929
1930 cpu = smp_processor_id();
1931
1932 return &entries->cpu_entries[cpu][*rctx];
1933}
1934
1935static void
1936put_callchain_entry(int rctx)
1937{
1938 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
1939}
1940
1941static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1942{
1943 int rctx;
1944 struct perf_callchain_entry *entry;
1945
1946
1947 entry = get_callchain_entry(&rctx);
1948 if (rctx == -1)
1949 return NULL;
1950
1951 if (!entry)
1952 goto exit_put;
1953
1954 entry->nr = 0;
1955
1956 if (!user_mode(regs)) {
1957 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
1958 perf_callchain_kernel(entry, regs);
1959 if (current->mm)
1960 regs = task_pt_regs(current);
1961 else
1962 regs = NULL;
1963 }
1964
1965 if (regs) {
1966 perf_callchain_store(entry, PERF_CONTEXT_USER);
1967 perf_callchain_user(entry, regs);
1968 }
1969
1970exit_put:
1971 put_callchain_entry(rctx);
1972
1973 return entry;
1974}
1975
1976/*
1767 * Initialize the perf_event context in a task_struct: 1977 * Initialize the perf_event context in a task_struct:
1768 */ 1978 */
1769static void 1979static void
@@ -1895,6 +2105,8 @@ static void free_event(struct perf_event *event)
1895 atomic_dec(&nr_comm_events); 2105 atomic_dec(&nr_comm_events);
1896 if (event->attr.task) 2106 if (event->attr.task)
1897 atomic_dec(&nr_task_events); 2107 atomic_dec(&nr_task_events);
2108 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
2109 put_callchain_buffers();
1898 } 2110 }
1899 2111
1900 if (event->buffer) { 2112 if (event->buffer) {
@@ -2937,55 +3149,6 @@ void perf_event_do_pending(void)
2937 __perf_pending_run(); 3149 __perf_pending_run();
2938} 3150}
2939 3151
2940DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
2941
2942/*
2943 * Callchain support -- arch specific
2944 */
2945
2946__weak struct perf_callchain_entry *perf_callchain_buffer(void)
2947{
2948 return &__get_cpu_var(perf_callchain_entry);
2949}
2950
2951__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
2952 struct pt_regs *regs)
2953{
2954}
2955
2956__weak void perf_callchain_user(struct perf_callchain_entry *entry,
2957 struct pt_regs *regs)
2958{
2959}
2960
2961static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2962{
2963 struct perf_callchain_entry *entry;
2964
2965 entry = perf_callchain_buffer();
2966 if (!entry)
2967 return NULL;
2968
2969 entry->nr = 0;
2970
2971 if (!user_mode(regs)) {
2972 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
2973 perf_callchain_kernel(entry, regs);
2974 if (current->mm)
2975 regs = task_pt_regs(current);
2976 else
2977 regs = NULL;
2978 }
2979
2980 if (regs) {
2981 perf_callchain_store(entry, PERF_CONTEXT_USER);
2982 perf_callchain_user(entry, regs);
2983 }
2984
2985 return entry;
2986}
2987
2988
2989/* 3152/*
2990 * We assume there is only KVM supporting the callbacks. 3153 * We assume there is only KVM supporting the callbacks.
2991 * Later on, we might change it to a list if there is 3154 * Later on, we might change it to a list if there is
@@ -3480,14 +3643,20 @@ static void perf_event_output(struct perf_event *event, int nmi,
3480 struct perf_output_handle handle; 3643 struct perf_output_handle handle;
3481 struct perf_event_header header; 3644 struct perf_event_header header;
3482 3645
3646 /* protect the callchain buffers */
3647 rcu_read_lock();
3648
3483 perf_prepare_sample(&header, data, event, regs); 3649 perf_prepare_sample(&header, data, event, regs);
3484 3650
3485 if (perf_output_begin(&handle, event, header.size, nmi, 1)) 3651 if (perf_output_begin(&handle, event, header.size, nmi, 1))
3486 return; 3652 goto exit;
3487 3653
3488 perf_output_sample(&handle, &header, data, event); 3654 perf_output_sample(&handle, &header, data, event);
3489 3655
3490 perf_output_end(&handle); 3656 perf_output_end(&handle);
3657
3658exit:
3659 rcu_read_unlock();
3491} 3660}
3492 3661
3493/* 3662/*
@@ -4243,32 +4412,16 @@ end:
4243int perf_swevent_get_recursion_context(void) 4412int perf_swevent_get_recursion_context(void)
4244{ 4413{
4245 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4414 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
4246 int rctx;
4247 4415
4248 if (in_nmi()) 4416 return get_recursion_context(cpuctx->recursion);
4249 rctx = 3;
4250 else if (in_irq())
4251 rctx = 2;
4252 else if (in_softirq())
4253 rctx = 1;
4254 else
4255 rctx = 0;
4256
4257 if (cpuctx->recursion[rctx])
4258 return -1;
4259
4260 cpuctx->recursion[rctx]++;
4261 barrier();
4262
4263 return rctx;
4264} 4417}
4265EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4418EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4266 4419
4267void inline perf_swevent_put_recursion_context(int rctx) 4420void inline perf_swevent_put_recursion_context(int rctx)
4268{ 4421{
4269 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4422 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
4270 barrier(); 4423
4271 cpuctx->recursion[rctx]--; 4424 put_recursion_context(cpuctx->recursion, rctx);
4272} 4425}
4273 4426
4274void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4427void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4968,6 +5121,13 @@ done:
4968 atomic_inc(&nr_comm_events); 5121 atomic_inc(&nr_comm_events);
4969 if (event->attr.task) 5122 if (event->attr.task)
4970 atomic_inc(&nr_task_events); 5123 atomic_inc(&nr_task_events);
5124 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
5125 err = get_callchain_buffers();
5126 if (err) {
5127 free_event(event);
5128 return ERR_PTR(err);
5129 }
5130 }
4971 } 5131 }
4972 5132
4973 return event; 5133 return event;