diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-09-21 10:08:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-10-06 08:21:50 -0400 |
commit | 906010b2134e14a2e377decbadd357b3d0ab9c6a (patch) | |
tree | 598b30d08f5ca8df1e00abc295b120fa1bd2c2e2 /kernel/perf_event.c | |
parent | e13dbd7d75d1ecc315c6e3071b3c4e8fba4f6bec (diff) |
perf_event: Provide vmalloc() based mmap() backing
Some architectures such as Sparc, ARM and MIPS (basically
everything with flush_dcache_page()) need to deal with dcache
aliases by carefully placing pages in both kernel and user maps.
These architectures typically have to use vmalloc_user() for this.
However, on other architectures, vmalloc() is not needed and has
the downsides of being more restricted and slower than regular
allocations.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: David Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1254830228.21044.272.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 248 |
1 files changed, 186 insertions, 62 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index e491fb087939..9d0b5c665883 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
22 | #include <linux/vmstat.h> | 22 | #include <linux/vmstat.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
24 | #include <linux/rculist.h> | 25 | #include <linux/rculist.h> |
25 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
@@ -2091,49 +2092,31 @@ unlock: | |||
2091 | rcu_read_unlock(); | 2092 | rcu_read_unlock(); |
2092 | } | 2093 | } |
2093 | 2094 | ||
2094 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 2095 | static unsigned long perf_data_size(struct perf_mmap_data *data) |
2095 | { | 2096 | { |
2096 | struct perf_event *event = vma->vm_file->private_data; | 2097 | return data->nr_pages << (PAGE_SHIFT + data->data_order); |
2097 | struct perf_mmap_data *data; | 2098 | } |
2098 | int ret = VM_FAULT_SIGBUS; | ||
2099 | |||
2100 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2101 | if (vmf->pgoff == 0) | ||
2102 | ret = 0; | ||
2103 | return ret; | ||
2104 | } | ||
2105 | |||
2106 | rcu_read_lock(); | ||
2107 | data = rcu_dereference(event->data); | ||
2108 | if (!data) | ||
2109 | goto unlock; | ||
2110 | |||
2111 | if (vmf->pgoff == 0) { | ||
2112 | vmf->page = virt_to_page(data->user_page); | ||
2113 | } else { | ||
2114 | int nr = vmf->pgoff - 1; | ||
2115 | |||
2116 | if ((unsigned)nr > data->nr_pages) | ||
2117 | goto unlock; | ||
2118 | 2099 | ||
2119 | if (vmf->flags & FAULT_FLAG_WRITE) | 2100 | #ifndef CONFIG_PERF_USE_VMALLOC |
2120 | goto unlock; | ||
2121 | 2101 | ||
2122 | vmf->page = virt_to_page(data->data_pages[nr]); | 2102 | /* |
2123 | } | 2103 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. |
2104 | */ | ||
2124 | 2105 | ||
2125 | get_page(vmf->page); | 2106 | static struct page * |
2126 | vmf->page->mapping = vma->vm_file->f_mapping; | 2107 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) |
2127 | vmf->page->index = vmf->pgoff; | 2108 | { |
2109 | if (pgoff > data->nr_pages) | ||
2110 | return NULL; | ||
2128 | 2111 | ||
2129 | ret = 0; | 2112 | if (pgoff == 0) |
2130 | unlock: | 2113 | return virt_to_page(data->user_page); |
2131 | rcu_read_unlock(); | ||
2132 | 2114 | ||
2133 | return ret; | 2115 | return virt_to_page(data->data_pages[pgoff - 1]); |
2134 | } | 2116 | } |
2135 | 2117 | ||
2136 | static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | 2118 | static struct perf_mmap_data * |
2119 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2137 | { | 2120 | { |
2138 | struct perf_mmap_data *data; | 2121 | struct perf_mmap_data *data; |
2139 | unsigned long size; | 2122 | unsigned long size; |
@@ -2158,19 +2141,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
2158 | goto fail_data_pages; | 2141 | goto fail_data_pages; |
2159 | } | 2142 | } |
2160 | 2143 | ||
2144 | data->data_order = 0; | ||
2161 | data->nr_pages = nr_pages; | 2145 | data->nr_pages = nr_pages; |
2162 | atomic_set(&data->lock, -1); | ||
2163 | |||
2164 | if (event->attr.watermark) { | ||
2165 | data->watermark = min_t(long, PAGE_SIZE * nr_pages, | ||
2166 | event->attr.wakeup_watermark); | ||
2167 | } | ||
2168 | if (!data->watermark) | ||
2169 | data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); | ||
2170 | 2146 | ||
2171 | rcu_assign_pointer(event->data, data); | 2147 | return data; |
2172 | |||
2173 | return 0; | ||
2174 | 2148 | ||
2175 | fail_data_pages: | 2149 | fail_data_pages: |
2176 | for (i--; i >= 0; i--) | 2150 | for (i--; i >= 0; i--) |
@@ -2182,7 +2156,7 @@ fail_user_page: | |||
2182 | kfree(data); | 2156 | kfree(data); |
2183 | 2157 | ||
2184 | fail: | 2158 | fail: |
2185 | return -ENOMEM; | 2159 | return NULL; |
2186 | } | 2160 | } |
2187 | 2161 | ||
2188 | static void perf_mmap_free_page(unsigned long addr) | 2162 | static void perf_mmap_free_page(unsigned long addr) |
@@ -2193,28 +2167,169 @@ static void perf_mmap_free_page(unsigned long addr) | |||
2193 | __free_page(page); | 2167 | __free_page(page); |
2194 | } | 2168 | } |
2195 | 2169 | ||
2196 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | 2170 | static void perf_mmap_data_free(struct perf_mmap_data *data) |
2197 | { | 2171 | { |
2198 | struct perf_mmap_data *data; | ||
2199 | int i; | 2172 | int i; |
2200 | 2173 | ||
2201 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2202 | |||
2203 | perf_mmap_free_page((unsigned long)data->user_page); | 2174 | perf_mmap_free_page((unsigned long)data->user_page); |
2204 | for (i = 0; i < data->nr_pages; i++) | 2175 | for (i = 0; i < data->nr_pages; i++) |
2205 | perf_mmap_free_page((unsigned long)data->data_pages[i]); | 2176 | perf_mmap_free_page((unsigned long)data->data_pages[i]); |
2177 | } | ||
2178 | |||
2179 | #else | ||
2180 | |||
2181 | /* | ||
2182 | * Back perf_mmap() with vmalloc memory. | ||
2183 | * | ||
2184 | * Required for architectures that have d-cache aliasing issues. | ||
2185 | */ | ||
2186 | |||
2187 | static struct page * | ||
2188 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) | ||
2189 | { | ||
2190 | if (pgoff > (1UL << data->data_order)) | ||
2191 | return NULL; | ||
2192 | |||
2193 | return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); | ||
2194 | } | ||
2195 | |||
2196 | static void perf_mmap_unmark_page(void *addr) | ||
2197 | { | ||
2198 | struct page *page = vmalloc_to_page(addr); | ||
2199 | |||
2200 | page->mapping = NULL; | ||
2201 | } | ||
2202 | |||
2203 | static void perf_mmap_data_free_work(struct work_struct *work) | ||
2204 | { | ||
2205 | struct perf_mmap_data *data; | ||
2206 | void *base; | ||
2207 | int i, nr; | ||
2208 | |||
2209 | data = container_of(work, struct perf_mmap_data, work); | ||
2210 | nr = 1 << data->data_order; | ||
2211 | |||
2212 | base = data->user_page; | ||
2213 | for (i = 0; i < nr + 1; i++) | ||
2214 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
2215 | |||
2216 | vfree(base); | ||
2217 | } | ||
2218 | |||
2219 | static void perf_mmap_data_free(struct perf_mmap_data *data) | ||
2220 | { | ||
2221 | schedule_work(&data->work); | ||
2222 | } | ||
2223 | |||
2224 | static struct perf_mmap_data * | ||
2225 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2226 | { | ||
2227 | struct perf_mmap_data *data; | ||
2228 | unsigned long size; | ||
2229 | void *all_buf; | ||
2206 | 2230 | ||
2231 | WARN_ON(atomic_read(&event->mmap_count)); | ||
2232 | |||
2233 | size = sizeof(struct perf_mmap_data); | ||
2234 | size += sizeof(void *); | ||
2235 | |||
2236 | data = kzalloc(size, GFP_KERNEL); | ||
2237 | if (!data) | ||
2238 | goto fail; | ||
2239 | |||
2240 | INIT_WORK(&data->work, perf_mmap_data_free_work); | ||
2241 | |||
2242 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
2243 | if (!all_buf) | ||
2244 | goto fail_all_buf; | ||
2245 | |||
2246 | data->user_page = all_buf; | ||
2247 | data->data_pages[0] = all_buf + PAGE_SIZE; | ||
2248 | data->data_order = ilog2(nr_pages); | ||
2249 | data->nr_pages = 1; | ||
2250 | |||
2251 | return data; | ||
2252 | |||
2253 | fail_all_buf: | ||
2254 | kfree(data); | ||
2255 | |||
2256 | fail: | ||
2257 | return NULL; | ||
2258 | } | ||
2259 | |||
2260 | #endif | ||
2261 | |||
2262 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
2263 | { | ||
2264 | struct perf_event *event = vma->vm_file->private_data; | ||
2265 | struct perf_mmap_data *data; | ||
2266 | int ret = VM_FAULT_SIGBUS; | ||
2267 | |||
2268 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2269 | if (vmf->pgoff == 0) | ||
2270 | ret = 0; | ||
2271 | return ret; | ||
2272 | } | ||
2273 | |||
2274 | rcu_read_lock(); | ||
2275 | data = rcu_dereference(event->data); | ||
2276 | if (!data) | ||
2277 | goto unlock; | ||
2278 | |||
2279 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) | ||
2280 | goto unlock; | ||
2281 | |||
2282 | vmf->page = perf_mmap_to_page(data, vmf->pgoff); | ||
2283 | if (!vmf->page) | ||
2284 | goto unlock; | ||
2285 | |||
2286 | get_page(vmf->page); | ||
2287 | vmf->page->mapping = vma->vm_file->f_mapping; | ||
2288 | vmf->page->index = vmf->pgoff; | ||
2289 | |||
2290 | ret = 0; | ||
2291 | unlock: | ||
2292 | rcu_read_unlock(); | ||
2293 | |||
2294 | return ret; | ||
2295 | } | ||
2296 | |||
2297 | static void | ||
2298 | perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | ||
2299 | { | ||
2300 | long max_size = perf_data_size(data); | ||
2301 | |||
2302 | atomic_set(&data->lock, -1); | ||
2303 | |||
2304 | if (event->attr.watermark) { | ||
2305 | data->watermark = min_t(long, max_size, | ||
2306 | event->attr.wakeup_watermark); | ||
2307 | } | ||
2308 | |||
2309 | if (!data->watermark) | ||
2310 | data->watermark = max_t(long, PAGE_SIZE, max_size / 2); | ||
2311 | |||
2312 | |||
2313 | rcu_assign_pointer(event->data, data); | ||
2314 | } | ||
2315 | |||
2316 | static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) | ||
2317 | { | ||
2318 | struct perf_mmap_data *data; | ||
2319 | |||
2320 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2321 | perf_mmap_data_free(data); | ||
2207 | kfree(data); | 2322 | kfree(data); |
2208 | } | 2323 | } |
2209 | 2324 | ||
2210 | static void perf_mmap_data_free(struct perf_event *event) | 2325 | static void perf_mmap_data_release(struct perf_event *event) |
2211 | { | 2326 | { |
2212 | struct perf_mmap_data *data = event->data; | 2327 | struct perf_mmap_data *data = event->data; |
2213 | 2328 | ||
2214 | WARN_ON(atomic_read(&event->mmap_count)); | 2329 | WARN_ON(atomic_read(&event->mmap_count)); |
2215 | 2330 | ||
2216 | rcu_assign_pointer(event->data, NULL); | 2331 | rcu_assign_pointer(event->data, NULL); |
2217 | call_rcu(&data->rcu_head, __perf_mmap_data_free); | 2332 | call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); |
2218 | } | 2333 | } |
2219 | 2334 | ||
2220 | static void perf_mmap_open(struct vm_area_struct *vma) | 2335 | static void perf_mmap_open(struct vm_area_struct *vma) |
@@ -2230,11 +2345,12 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
2230 | 2345 | ||
2231 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2346 | WARN_ON_ONCE(event->ctx->parent_ctx); |
2232 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 2347 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
2348 | unsigned long size = perf_data_size(event->data); | ||
2233 | struct user_struct *user = current_user(); | 2349 | struct user_struct *user = current_user(); |
2234 | 2350 | ||
2235 | atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); | 2351 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
2236 | vma->vm_mm->locked_vm -= event->data->nr_locked; | 2352 | vma->vm_mm->locked_vm -= event->data->nr_locked; |
2237 | perf_mmap_data_free(event); | 2353 | perf_mmap_data_release(event); |
2238 | mutex_unlock(&event->mmap_mutex); | 2354 | mutex_unlock(&event->mmap_mutex); |
2239 | } | 2355 | } |
2240 | } | 2356 | } |
@@ -2252,6 +2368,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2252 | unsigned long user_locked, user_lock_limit; | 2368 | unsigned long user_locked, user_lock_limit; |
2253 | struct user_struct *user = current_user(); | 2369 | struct user_struct *user = current_user(); |
2254 | unsigned long locked, lock_limit; | 2370 | unsigned long locked, lock_limit; |
2371 | struct perf_mmap_data *data; | ||
2255 | unsigned long vma_size; | 2372 | unsigned long vma_size; |
2256 | unsigned long nr_pages; | 2373 | unsigned long nr_pages; |
2257 | long user_extra, extra; | 2374 | long user_extra, extra; |
@@ -2314,10 +2431,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2314 | } | 2431 | } |
2315 | 2432 | ||
2316 | WARN_ON(event->data); | 2433 | WARN_ON(event->data); |
2317 | ret = perf_mmap_data_alloc(event, nr_pages); | 2434 | |
2318 | if (ret) | 2435 | data = perf_mmap_data_alloc(event, nr_pages); |
2436 | ret = -ENOMEM; | ||
2437 | if (!data) | ||
2319 | goto unlock; | 2438 | goto unlock; |
2320 | 2439 | ||
2440 | ret = 0; | ||
2441 | perf_mmap_data_init(event, data); | ||
2442 | |||
2321 | atomic_set(&event->mmap_count, 1); | 2443 | atomic_set(&event->mmap_count, 1); |
2322 | atomic_long_add(user_extra, &user->locked_vm); | 2444 | atomic_long_add(user_extra, &user->locked_vm); |
2323 | vma->vm_mm->locked_vm += extra; | 2445 | vma->vm_mm->locked_vm += extra; |
@@ -2505,7 +2627,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, | |||
2505 | if (!data->writable) | 2627 | if (!data->writable) |
2506 | return true; | 2628 | return true; |
2507 | 2629 | ||
2508 | mask = (data->nr_pages << PAGE_SHIFT) - 1; | 2630 | mask = perf_data_size(data) - 1; |
2509 | 2631 | ||
2510 | offset = (offset - tail) & mask; | 2632 | offset = (offset - tail) & mask; |
2511 | head = (head - tail) & mask; | 2633 | head = (head - tail) & mask; |
@@ -2610,7 +2732,7 @@ void perf_output_copy(struct perf_output_handle *handle, | |||
2610 | const void *buf, unsigned int len) | 2732 | const void *buf, unsigned int len) |
2611 | { | 2733 | { |
2612 | unsigned int pages_mask; | 2734 | unsigned int pages_mask; |
2613 | unsigned int offset; | 2735 | unsigned long offset; |
2614 | unsigned int size; | 2736 | unsigned int size; |
2615 | void **pages; | 2737 | void **pages; |
2616 | 2738 | ||
@@ -2619,12 +2741,14 @@ void perf_output_copy(struct perf_output_handle *handle, | |||
2619 | pages = handle->data->data_pages; | 2741 | pages = handle->data->data_pages; |
2620 | 2742 | ||
2621 | do { | 2743 | do { |
2622 | unsigned int page_offset; | 2744 | unsigned long page_offset; |
2745 | unsigned long page_size; | ||
2623 | int nr; | 2746 | int nr; |
2624 | 2747 | ||
2625 | nr = (offset >> PAGE_SHIFT) & pages_mask; | 2748 | nr = (offset >> PAGE_SHIFT) & pages_mask; |
2626 | page_offset = offset & (PAGE_SIZE - 1); | 2749 | page_size = 1UL << (handle->data->data_order + PAGE_SHIFT); |
2627 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | 2750 | page_offset = offset & (page_size - 1); |
2751 | size = min_t(unsigned int, page_size - page_offset, len); | ||
2628 | 2752 | ||
2629 | memcpy(pages[nr] + page_offset, buf, size); | 2753 | memcpy(pages[nr] + page_offset, buf, size); |
2630 | 2754 | ||