aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-09-21 10:08:49 -0400
committerIngo Molnar <mingo@elte.hu>2009-10-06 08:21:50 -0400
commit906010b2134e14a2e377decbadd357b3d0ab9c6a (patch)
tree598b30d08f5ca8df1e00abc295b120fa1bd2c2e2 /kernel
parente13dbd7d75d1ecc315c6e3071b3c4e8fba4f6bec (diff)
perf_event: Provide vmalloc() based mmap() backing
Some architectures such as Sparc, ARM and MIPS (basically everything with flush_dcache_page()) need to deal with dcache aliases by carefully placing pages in both kernel and user maps. These architectures typically have to use vmalloc_user() for this. However, on other architectures, vmalloc() is not needed and has the downsides of being more restricted and slower than regular allocations. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: David Miller <davem@davemloft.net> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Jens Axboe <jens.axboe@oracle.com> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1254830228.21044.272.camel@laptop> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/perf_event.c248
1 files changed, 186 insertions, 62 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e491fb087939..9d0b5c665883 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -20,6 +20,7 @@
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/ptrace.h> 21#include <linux/ptrace.h>
22#include <linux/vmstat.h> 22#include <linux/vmstat.h>
23#include <linux/vmalloc.h>
23#include <linux/hardirq.h> 24#include <linux/hardirq.h>
24#include <linux/rculist.h> 25#include <linux/rculist.h>
25#include <linux/uaccess.h> 26#include <linux/uaccess.h>
@@ -2091,49 +2092,31 @@ unlock:
2091 rcu_read_unlock(); 2092 rcu_read_unlock();
2092} 2093}
2093 2094
2094static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2095static unsigned long perf_data_size(struct perf_mmap_data *data)
2095{ 2096{
2096 struct perf_event *event = vma->vm_file->private_data; 2097 return data->nr_pages << (PAGE_SHIFT + data->data_order);
2097 struct perf_mmap_data *data; 2098}
2098 int ret = VM_FAULT_SIGBUS;
2099
2100 if (vmf->flags & FAULT_FLAG_MKWRITE) {
2101 if (vmf->pgoff == 0)
2102 ret = 0;
2103 return ret;
2104 }
2105
2106 rcu_read_lock();
2107 data = rcu_dereference(event->data);
2108 if (!data)
2109 goto unlock;
2110
2111 if (vmf->pgoff == 0) {
2112 vmf->page = virt_to_page(data->user_page);
2113 } else {
2114 int nr = vmf->pgoff - 1;
2115
2116 if ((unsigned)nr > data->nr_pages)
2117 goto unlock;
2118 2099
2119 if (vmf->flags & FAULT_FLAG_WRITE) 2100#ifndef CONFIG_PERF_USE_VMALLOC
2120 goto unlock;
2121 2101
2122 vmf->page = virt_to_page(data->data_pages[nr]); 2102/*
2123 } 2103 * Back perf_mmap() with regular GFP_KERNEL-0 pages.
2104 */
2124 2105
2125 get_page(vmf->page); 2106static struct page *
2126 vmf->page->mapping = vma->vm_file->f_mapping; 2107perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
2127 vmf->page->index = vmf->pgoff; 2108{
2109 if (pgoff > data->nr_pages)
2110 return NULL;
2128 2111
2129 ret = 0; 2112 if (pgoff == 0)
2130unlock: 2113 return virt_to_page(data->user_page);
2131 rcu_read_unlock();
2132 2114
2133 return ret; 2115 return virt_to_page(data->data_pages[pgoff - 1]);
2134} 2116}
2135 2117
2136static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2118static struct perf_mmap_data *
2119perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2137{ 2120{
2138 struct perf_mmap_data *data; 2121 struct perf_mmap_data *data;
2139 unsigned long size; 2122 unsigned long size;
@@ -2158,19 +2141,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2158 goto fail_data_pages; 2141 goto fail_data_pages;
2159 } 2142 }
2160 2143
2144 data->data_order = 0;
2161 data->nr_pages = nr_pages; 2145 data->nr_pages = nr_pages;
2162 atomic_set(&data->lock, -1);
2163
2164 if (event->attr.watermark) {
2165 data->watermark = min_t(long, PAGE_SIZE * nr_pages,
2166 event->attr.wakeup_watermark);
2167 }
2168 if (!data->watermark)
2169 data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
2170 2146
2171 rcu_assign_pointer(event->data, data); 2147 return data;
2172
2173 return 0;
2174 2148
2175fail_data_pages: 2149fail_data_pages:
2176 for (i--; i >= 0; i--) 2150 for (i--; i >= 0; i--)
@@ -2182,7 +2156,7 @@ fail_user_page:
2182 kfree(data); 2156 kfree(data);
2183 2157
2184fail: 2158fail:
2185 return -ENOMEM; 2159 return NULL;
2186} 2160}
2187 2161
2188static void perf_mmap_free_page(unsigned long addr) 2162static void perf_mmap_free_page(unsigned long addr)
@@ -2193,28 +2167,169 @@ static void perf_mmap_free_page(unsigned long addr)
2193 __free_page(page); 2167 __free_page(page);
2194} 2168}
2195 2169
2196static void __perf_mmap_data_free(struct rcu_head *rcu_head) 2170static void perf_mmap_data_free(struct perf_mmap_data *data)
2197{ 2171{
2198 struct perf_mmap_data *data;
2199 int i; 2172 int i;
2200 2173
2201 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2202
2203 perf_mmap_free_page((unsigned long)data->user_page); 2174 perf_mmap_free_page((unsigned long)data->user_page);
2204 for (i = 0; i < data->nr_pages; i++) 2175 for (i = 0; i < data->nr_pages; i++)
2205 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2176 perf_mmap_free_page((unsigned long)data->data_pages[i]);
2177}
2178
2179#else
2180
2181/*
2182 * Back perf_mmap() with vmalloc memory.
2183 *
2184 * Required for architectures that have d-cache aliasing issues.
2185 */
2186
2187static struct page *
2188perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
2189{
2190 if (pgoff > (1UL << data->data_order))
2191 return NULL;
2192
2193 return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
2194}
2195
2196static void perf_mmap_unmark_page(void *addr)
2197{
2198 struct page *page = vmalloc_to_page(addr);
2199
2200 page->mapping = NULL;
2201}
2202
2203static void perf_mmap_data_free_work(struct work_struct *work)
2204{
2205 struct perf_mmap_data *data;
2206 void *base;
2207 int i, nr;
2208
2209 data = container_of(work, struct perf_mmap_data, work);
2210 nr = 1 << data->data_order;
2211
2212 base = data->user_page;
2213 for (i = 0; i < nr + 1; i++)
2214 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2215
2216 vfree(base);
2217}
2218
2219static void perf_mmap_data_free(struct perf_mmap_data *data)
2220{
2221 schedule_work(&data->work);
2222}
2223
2224static struct perf_mmap_data *
2225perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2226{
2227 struct perf_mmap_data *data;
2228 unsigned long size;
2229 void *all_buf;
2206 2230
2231 WARN_ON(atomic_read(&event->mmap_count));
2232
2233 size = sizeof(struct perf_mmap_data);
2234 size += sizeof(void *);
2235
2236 data = kzalloc(size, GFP_KERNEL);
2237 if (!data)
2238 goto fail;
2239
2240 INIT_WORK(&data->work, perf_mmap_data_free_work);
2241
2242 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
2243 if (!all_buf)
2244 goto fail_all_buf;
2245
2246 data->user_page = all_buf;
2247 data->data_pages[0] = all_buf + PAGE_SIZE;
2248 data->data_order = ilog2(nr_pages);
2249 data->nr_pages = 1;
2250
2251 return data;
2252
2253fail_all_buf:
2254 kfree(data);
2255
2256fail:
2257 return NULL;
2258}
2259
2260#endif
2261
2262static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2263{
2264 struct perf_event *event = vma->vm_file->private_data;
2265 struct perf_mmap_data *data;
2266 int ret = VM_FAULT_SIGBUS;
2267
2268 if (vmf->flags & FAULT_FLAG_MKWRITE) {
2269 if (vmf->pgoff == 0)
2270 ret = 0;
2271 return ret;
2272 }
2273
2274 rcu_read_lock();
2275 data = rcu_dereference(event->data);
2276 if (!data)
2277 goto unlock;
2278
2279 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
2280 goto unlock;
2281
2282 vmf->page = perf_mmap_to_page(data, vmf->pgoff);
2283 if (!vmf->page)
2284 goto unlock;
2285
2286 get_page(vmf->page);
2287 vmf->page->mapping = vma->vm_file->f_mapping;
2288 vmf->page->index = vmf->pgoff;
2289
2290 ret = 0;
2291unlock:
2292 rcu_read_unlock();
2293
2294 return ret;
2295}
2296
2297static void
2298perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2299{
2300 long max_size = perf_data_size(data);
2301
2302 atomic_set(&data->lock, -1);
2303
2304 if (event->attr.watermark) {
2305 data->watermark = min_t(long, max_size,
2306 event->attr.wakeup_watermark);
2307 }
2308
2309 if (!data->watermark)
2310 data->watermark = max_t(long, PAGE_SIZE, max_size / 2);
2311
2312
2313 rcu_assign_pointer(event->data, data);
2314}
2315
2316static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2317{
2318 struct perf_mmap_data *data;
2319
2320 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2321 perf_mmap_data_free(data);
2207 kfree(data); 2322 kfree(data);
2208} 2323}
2209 2324
2210static void perf_mmap_data_free(struct perf_event *event) 2325static void perf_mmap_data_release(struct perf_event *event)
2211{ 2326{
2212 struct perf_mmap_data *data = event->data; 2327 struct perf_mmap_data *data = event->data;
2213 2328
2214 WARN_ON(atomic_read(&event->mmap_count)); 2329 WARN_ON(atomic_read(&event->mmap_count));
2215 2330
2216 rcu_assign_pointer(event->data, NULL); 2331 rcu_assign_pointer(event->data, NULL);
2217 call_rcu(&data->rcu_head, __perf_mmap_data_free); 2332 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
2218} 2333}
2219 2334
2220static void perf_mmap_open(struct vm_area_struct *vma) 2335static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2230,11 +2345,12 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2230 2345
2231 WARN_ON_ONCE(event->ctx->parent_ctx); 2346 WARN_ON_ONCE(event->ctx->parent_ctx);
2232 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2347 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2348 unsigned long size = perf_data_size(event->data);
2233 struct user_struct *user = current_user(); 2349 struct user_struct *user = current_user();
2234 2350
2235 atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); 2351 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2236 vma->vm_mm->locked_vm -= event->data->nr_locked; 2352 vma->vm_mm->locked_vm -= event->data->nr_locked;
2237 perf_mmap_data_free(event); 2353 perf_mmap_data_release(event);
2238 mutex_unlock(&event->mmap_mutex); 2354 mutex_unlock(&event->mmap_mutex);
2239 } 2355 }
2240} 2356}
@@ -2252,6 +2368,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2252 unsigned long user_locked, user_lock_limit; 2368 unsigned long user_locked, user_lock_limit;
2253 struct user_struct *user = current_user(); 2369 struct user_struct *user = current_user();
2254 unsigned long locked, lock_limit; 2370 unsigned long locked, lock_limit;
2371 struct perf_mmap_data *data;
2255 unsigned long vma_size; 2372 unsigned long vma_size;
2256 unsigned long nr_pages; 2373 unsigned long nr_pages;
2257 long user_extra, extra; 2374 long user_extra, extra;
@@ -2314,10 +2431,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2314 } 2431 }
2315 2432
2316 WARN_ON(event->data); 2433 WARN_ON(event->data);
2317 ret = perf_mmap_data_alloc(event, nr_pages); 2434
2318 if (ret) 2435 data = perf_mmap_data_alloc(event, nr_pages);
2436 ret = -ENOMEM;
2437 if (!data)
2319 goto unlock; 2438 goto unlock;
2320 2439
2440 ret = 0;
2441 perf_mmap_data_init(event, data);
2442
2321 atomic_set(&event->mmap_count, 1); 2443 atomic_set(&event->mmap_count, 1);
2322 atomic_long_add(user_extra, &user->locked_vm); 2444 atomic_long_add(user_extra, &user->locked_vm);
2323 vma->vm_mm->locked_vm += extra; 2445 vma->vm_mm->locked_vm += extra;
@@ -2505,7 +2627,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2505 if (!data->writable) 2627 if (!data->writable)
2506 return true; 2628 return true;
2507 2629
2508 mask = (data->nr_pages << PAGE_SHIFT) - 1; 2630 mask = perf_data_size(data) - 1;
2509 2631
2510 offset = (offset - tail) & mask; 2632 offset = (offset - tail) & mask;
2511 head = (head - tail) & mask; 2633 head = (head - tail) & mask;
@@ -2610,7 +2732,7 @@ void perf_output_copy(struct perf_output_handle *handle,
2610 const void *buf, unsigned int len) 2732 const void *buf, unsigned int len)
2611{ 2733{
2612 unsigned int pages_mask; 2734 unsigned int pages_mask;
2613 unsigned int offset; 2735 unsigned long offset;
2614 unsigned int size; 2736 unsigned int size;
2615 void **pages; 2737 void **pages;
2616 2738
@@ -2619,12 +2741,14 @@ void perf_output_copy(struct perf_output_handle *handle,
2619 pages = handle->data->data_pages; 2741 pages = handle->data->data_pages;
2620 2742
2621 do { 2743 do {
2622 unsigned int page_offset; 2744 unsigned long page_offset;
2745 unsigned long page_size;
2623 int nr; 2746 int nr;
2624 2747
2625 nr = (offset >> PAGE_SHIFT) & pages_mask; 2748 nr = (offset >> PAGE_SHIFT) & pages_mask;
2626 page_offset = offset & (PAGE_SIZE - 1); 2749 page_size = 1UL << (handle->data->data_order + PAGE_SHIFT);
2627 size = min_t(unsigned int, PAGE_SIZE - page_offset, len); 2750 page_offset = offset & (page_size - 1);
2751 size = min_t(unsigned int, page_size - page_offset, len);
2628 2752
2629 memcpy(pages[nr] + page_offset, buf, size); 2753 memcpy(pages[nr] + page_offset, buf, size);
2630 2754