aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/exec_domain.c18
-rw-r--r--kernel/module.c7
-rw-r--r--kernel/perf_event.c351
-rw-r--r--kernel/sched.c24
-rw-r--r--kernel/sched_fair.c22
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--kernel/trace/trace_event_perf.c15
-rw-r--r--kernel/trace/trace_kprobe.c4
-rw-r--r--kernel/trace/trace_syscalls.c4
14 files changed, 275 insertions, 182 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 422cb19f156e..3ac6f5b0a64b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4598,7 +4598,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4598 parent_css = parent->subsys[subsys_id]; 4598 parent_css = parent->subsys[subsys_id];
4599 child_css = child->subsys[subsys_id]; 4599 child_css = child->subsys[subsys_id];
4600 parent_id = parent_css->id; 4600 parent_id = parent_css->id;
4601 depth = parent_id->depth; 4601 depth = parent_id->depth + 1;
4602 4602
4603 child_id = get_new_cssid(ss, depth); 4603 child_id = get_new_cssid(ss, depth);
4604 if (IS_ERR(child_id)) 4604 if (IS_ERR(child_id))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8b92539b4754..97d1b426a4ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,7 +34,7 @@ void cpu_maps_update_done(void)
34 mutex_unlock(&cpu_add_remove_lock); 34 mutex_unlock(&cpu_add_remove_lock);
35} 35}
36 36
37static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); 37static RAW_NOTIFIER_HEAD(cpu_chain);
38 38
39/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. 39/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
40 * Should always be manipulated under cpu_add_remove_lock 40 * Should always be manipulated under cpu_add_remove_lock
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c35452cadded..dd62f8e714ca 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -27,7 +27,7 @@ static struct exec_domain *exec_domains = &default_exec_domain;
27static DEFINE_RWLOCK(exec_domains_lock); 27static DEFINE_RWLOCK(exec_domains_lock);
28 28
29 29
30static u_long ident_map[32] = { 30static unsigned long ident_map[32] = {
31 0, 1, 2, 3, 4, 5, 6, 7, 31 0, 1, 2, 3, 4, 5, 6, 7,
32 8, 9, 10, 11, 12, 13, 14, 15, 32 8, 9, 10, 11, 12, 13, 14, 15,
33 16, 17, 18, 19, 20, 21, 22, 23, 33 16, 17, 18, 19, 20, 21, 22, 23,
@@ -56,10 +56,10 @@ default_handler(int segment, struct pt_regs *regp)
56} 56}
57 57
58static struct exec_domain * 58static struct exec_domain *
59lookup_exec_domain(u_long personality) 59lookup_exec_domain(unsigned int personality)
60{ 60{
61 struct exec_domain * ep; 61 unsigned int pers = personality(personality);
62 u_long pers = personality(personality); 62 struct exec_domain *ep;
63 63
64 read_lock(&exec_domains_lock); 64 read_lock(&exec_domains_lock);
65 for (ep = exec_domains; ep; ep = ep->next) { 65 for (ep = exec_domains; ep; ep = ep->next) {
@@ -70,7 +70,7 @@ lookup_exec_domain(u_long personality)
70 70
71#ifdef CONFIG_MODULES 71#ifdef CONFIG_MODULES
72 read_unlock(&exec_domains_lock); 72 read_unlock(&exec_domains_lock);
73 request_module("personality-%ld", pers); 73 request_module("personality-%d", pers);
74 read_lock(&exec_domains_lock); 74 read_lock(&exec_domains_lock);
75 75
76 for (ep = exec_domains; ep; ep = ep->next) { 76 for (ep = exec_domains; ep; ep = ep->next) {
@@ -135,7 +135,7 @@ unregister:
135} 135}
136 136
137int 137int
138__set_personality(u_long personality) 138__set_personality(unsigned int personality)
139{ 139{
140 struct exec_domain *ep, *oep; 140 struct exec_domain *ep, *oep;
141 141
@@ -188,9 +188,9 @@ static int __init proc_execdomains_init(void)
188module_init(proc_execdomains_init); 188module_init(proc_execdomains_init);
189#endif 189#endif
190 190
191SYSCALL_DEFINE1(personality, u_long, personality) 191SYSCALL_DEFINE1(personality, unsigned int, personality)
192{ 192{
193 u_long old = current->personality; 193 unsigned int old = current->personality;
194 194
195 if (personality != 0xffffffff) { 195 if (personality != 0xffffffff) {
196 set_personality(personality); 196 set_personality(personality);
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(personality, u_long, personality)
198 return -EINVAL; 198 return -EINVAL;
199 } 199 }
200 200
201 return (long)old; 201 return old;
202} 202}
203 203
204 204
diff --git a/kernel/module.c b/kernel/module.c
index 333fbcc96978..0129769301e3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -403,7 +403,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
403 Elf_Shdr *sechdrs, 403 Elf_Shdr *sechdrs,
404 const char *secstrings) 404 const char *secstrings)
405{ 405{
406 return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); 406 return find_sec(hdr, sechdrs, secstrings, ".data..percpu");
407} 407}
408 408
409static void percpu_modcopy(struct module *mod, 409static void percpu_modcopy(struct module *mod,
@@ -2014,6 +2014,7 @@ static noinline struct module *load_module(void __user *umod,
2014 long err = 0; 2014 long err = 0;
2015 void *ptr = NULL; /* Stops spurious gcc warning */ 2015 void *ptr = NULL; /* Stops spurious gcc warning */
2016 unsigned long symoffs, stroffs, *strmap; 2016 unsigned long symoffs, stroffs, *strmap;
2017 void __percpu *percpu;
2017 2018
2018 mm_segment_t old_fs; 2019 mm_segment_t old_fs;
2019 2020
@@ -2158,6 +2159,8 @@ static noinline struct module *load_module(void __user *umod,
2158 goto free_mod; 2159 goto free_mod;
2159 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2160 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2160 } 2161 }
2162 /* Keep this around for failure path. */
2163 percpu = mod_percpu(mod);
2161 2164
2162 /* Determine total sizes, and put offsets in sh_entsize. For now 2165 /* Determine total sizes, and put offsets in sh_entsize. For now
2163 this is done generically; there doesn't appear to be any 2166 this is done generically; there doesn't appear to be any
@@ -2463,7 +2466,7 @@ static noinline struct module *load_module(void __user *umod,
2463 module_free(mod, mod->module_core); 2466 module_free(mod, mod->module_core);
2464 /* mod will be freed with core. Don't access it beyond this line! */ 2467 /* mod will be freed with core. Don't access it beyond this line! */
2465 free_percpu: 2468 free_percpu:
2466 percpu_modfree(mod); 2469 free_percpu(percpu);
2467 free_mod: 2470 free_mod:
2468 kfree(args); 2471 kfree(args);
2469 kfree(strmap); 2472 kfree(strmap);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bd7ce8ca5bb9..31d6afe92594 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
283static void 283static void
284list_add_event(struct perf_event *event, struct perf_event_context *ctx) 284list_add_event(struct perf_event *event, struct perf_event_context *ctx)
285{ 285{
286 struct perf_event *group_leader = event->group_leader; 286 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
287 event->attach_state |= PERF_ATTACH_CONTEXT;
287 288
288 /* 289 /*
289 * Depending on whether it is a standalone or sibling event, 290 * If we're a stand alone event or group leader, we go to the context
290 * add it straight to the context's event list, or to the group 291 * list, group events are kept attached to the group so that
291 * leader's sibling list: 292 * perf_group_detach can, at all times, locate all siblings.
292 */ 293 */
293 if (group_leader == event) { 294 if (event->group_leader == event) {
294 struct list_head *list; 295 struct list_head *list;
295 296
296 if (is_software_event(event)) 297 if (is_software_event(event))
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
298 299
299 list = ctx_group_list(event, ctx); 300 list = ctx_group_list(event, ctx);
300 list_add_tail(&event->group_entry, list); 301 list_add_tail(&event->group_entry, list);
301 } else {
302 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
303 !is_software_event(event))
304 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
305
306 list_add_tail(&event->group_entry, &group_leader->sibling_list);
307 group_leader->nr_siblings++;
308 } 302 }
309 303
310 list_add_rcu(&event->event_entry, &ctx->event_list); 304 list_add_rcu(&event->event_entry, &ctx->event_list);
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
313 ctx->nr_stat++; 307 ctx->nr_stat++;
314} 308}
315 309
310static void perf_group_attach(struct perf_event *event)
311{
312 struct perf_event *group_leader = event->group_leader;
313
314 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
315 event->attach_state |= PERF_ATTACH_GROUP;
316
317 if (group_leader == event)
318 return;
319
320 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
321 !is_software_event(event))
322 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
323
324 list_add_tail(&event->group_entry, &group_leader->sibling_list);
325 group_leader->nr_siblings++;
326}
327
316/* 328/*
317 * Remove a event from the lists for its context. 329 * Remove a event from the lists for its context.
318 * Must be called with ctx->mutex and ctx->lock held. 330 * Must be called with ctx->mutex and ctx->lock held.
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
320static void 332static void
321list_del_event(struct perf_event *event, struct perf_event_context *ctx) 333list_del_event(struct perf_event *event, struct perf_event_context *ctx)
322{ 334{
323 if (list_empty(&event->group_entry)) 335 /*
336 * We can have double detach due to exit/hot-unplug + close.
337 */
338 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
324 return; 339 return;
340
341 event->attach_state &= ~PERF_ATTACH_CONTEXT;
342
325 ctx->nr_events--; 343 ctx->nr_events--;
326 if (event->attr.inherit_stat) 344 if (event->attr.inherit_stat)
327 ctx->nr_stat--; 345 ctx->nr_stat--;
328 346
329 list_del_init(&event->group_entry);
330 list_del_rcu(&event->event_entry); 347 list_del_rcu(&event->event_entry);
331 348
332 if (event->group_leader != event) 349 if (event->group_leader == event)
333 event->group_leader->nr_siblings--; 350 list_del_init(&event->group_entry);
334 351
335 update_group_times(event); 352 update_group_times(event);
336 353
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
345 event->state = PERF_EVENT_STATE_OFF; 362 event->state = PERF_EVENT_STATE_OFF;
346} 363}
347 364
348static void 365static void perf_group_detach(struct perf_event *event)
349perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
350{ 366{
351 struct perf_event *sibling, *tmp; 367 struct perf_event *sibling, *tmp;
368 struct list_head *list = NULL;
369
370 /*
371 * We can have double detach due to exit/hot-unplug + close.
372 */
373 if (!(event->attach_state & PERF_ATTACH_GROUP))
374 return;
375
376 event->attach_state &= ~PERF_ATTACH_GROUP;
377
378 /*
379 * If this is a sibling, remove it from its group.
380 */
381 if (event->group_leader != event) {
382 list_del_init(&event->group_entry);
383 event->group_leader->nr_siblings--;
384 return;
385 }
386
387 if (!list_empty(&event->group_entry))
388 list = &event->group_entry;
352 389
353 /* 390 /*
354 * If this was a group event with sibling events then 391 * If this was a group event with sibling events then
355 * upgrade the siblings to singleton events by adding them 392 * upgrade the siblings to singleton events by adding them
356 * to the context list directly: 393 * to whatever list we are on.
357 */ 394 */
358 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { 395 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
359 struct list_head *list; 396 if (list)
360 397 list_move_tail(&sibling->group_entry, list);
361 list = ctx_group_list(event, ctx);
362 list_move_tail(&sibling->group_entry, list);
363 sibling->group_leader = sibling; 398 sibling->group_leader = sibling;
364 399
365 /* Inherit group flags from the previous leader */ 400 /* Inherit group flags from the previous leader */
@@ -652,8 +687,11 @@ group_sched_in(struct perf_event *group_event,
652 if (txn) 687 if (txn)
653 pmu->start_txn(pmu); 688 pmu->start_txn(pmu);
654 689
655 if (event_sched_in(group_event, cpuctx, ctx)) 690 if (event_sched_in(group_event, cpuctx, ctx)) {
691 if (txn)
692 pmu->cancel_txn(pmu);
656 return -EAGAIN; 693 return -EAGAIN;
694 }
657 695
658 /* 696 /*
659 * Schedule in siblings as one group (if any): 697 * Schedule in siblings as one group (if any):
@@ -675,9 +713,6 @@ group_sched_in(struct perf_event *group_event,
675 } 713 }
676 714
677group_error: 715group_error:
678 if (txn)
679 pmu->cancel_txn(pmu);
680
681 /* 716 /*
682 * Groups can be scheduled in as one unit only, so undo any 717 * Groups can be scheduled in as one unit only, so undo any
683 * partial group before returning: 718 * partial group before returning:
@@ -689,6 +724,9 @@ group_error:
689 } 724 }
690 event_sched_out(group_event, cpuctx, ctx); 725 event_sched_out(group_event, cpuctx, ctx);
691 726
727 if (txn)
728 pmu->cancel_txn(pmu);
729
692 return -EAGAIN; 730 return -EAGAIN;
693} 731}
694 732
@@ -727,6 +765,7 @@ static void add_event_to_ctx(struct perf_event *event,
727 struct perf_event_context *ctx) 765 struct perf_event_context *ctx)
728{ 766{
729 list_add_event(event, ctx); 767 list_add_event(event, ctx);
768 perf_group_attach(event);
730 event->tstamp_enabled = ctx->time; 769 event->tstamp_enabled = ctx->time;
731 event->tstamp_running = ctx->time; 770 event->tstamp_running = ctx->time;
732 event->tstamp_stopped = ctx->time; 771 event->tstamp_stopped = ctx->time;
@@ -1841,6 +1880,7 @@ static void free_event_rcu(struct rcu_head *head)
1841} 1880}
1842 1881
1843static void perf_pending_sync(struct perf_event *event); 1882static void perf_pending_sync(struct perf_event *event);
1883static void perf_mmap_data_put(struct perf_mmap_data *data);
1844 1884
1845static void free_event(struct perf_event *event) 1885static void free_event(struct perf_event *event)
1846{ 1886{
@@ -1856,9 +1896,9 @@ static void free_event(struct perf_event *event)
1856 atomic_dec(&nr_task_events); 1896 atomic_dec(&nr_task_events);
1857 } 1897 }
1858 1898
1859 if (event->output) { 1899 if (event->data) {
1860 fput(event->output->filp); 1900 perf_mmap_data_put(event->data);
1861 event->output = NULL; 1901 event->data = NULL;
1862 } 1902 }
1863 1903
1864 if (event->destroy) 1904 if (event->destroy)
@@ -1893,8 +1933,8 @@ int perf_event_release_kernel(struct perf_event *event)
1893 */ 1933 */
1894 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 1934 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
1895 raw_spin_lock_irq(&ctx->lock); 1935 raw_spin_lock_irq(&ctx->lock);
1936 perf_group_detach(event);
1896 list_del_event(event, ctx); 1937 list_del_event(event, ctx);
1897 perf_destroy_group(event, ctx);
1898 raw_spin_unlock_irq(&ctx->lock); 1938 raw_spin_unlock_irq(&ctx->lock);
1899 mutex_unlock(&ctx->mutex); 1939 mutex_unlock(&ctx->mutex);
1900 1940
@@ -2175,7 +2215,27 @@ unlock:
2175 return ret; 2215 return ret;
2176} 2216}
2177 2217
2178static int perf_event_set_output(struct perf_event *event, int output_fd); 2218static const struct file_operations perf_fops;
2219
2220static struct perf_event *perf_fget_light(int fd, int *fput_needed)
2221{
2222 struct file *file;
2223
2224 file = fget_light(fd, fput_needed);
2225 if (!file)
2226 return ERR_PTR(-EBADF);
2227
2228 if (file->f_op != &perf_fops) {
2229 fput_light(file, *fput_needed);
2230 *fput_needed = 0;
2231 return ERR_PTR(-EBADF);
2232 }
2233
2234 return file->private_data;
2235}
2236
2237static int perf_event_set_output(struct perf_event *event,
2238 struct perf_event *output_event);
2179static int perf_event_set_filter(struct perf_event *event, void __user *arg); 2239static int perf_event_set_filter(struct perf_event *event, void __user *arg);
2180 2240
2181static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2241static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2262,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2202 return perf_event_period(event, (u64 __user *)arg); 2262 return perf_event_period(event, (u64 __user *)arg);
2203 2263
2204 case PERF_EVENT_IOC_SET_OUTPUT: 2264 case PERF_EVENT_IOC_SET_OUTPUT:
2205 return perf_event_set_output(event, arg); 2265 {
2266 struct perf_event *output_event = NULL;
2267 int fput_needed = 0;
2268 int ret;
2269
2270 if (arg != -1) {
2271 output_event = perf_fget_light(arg, &fput_needed);
2272 if (IS_ERR(output_event))
2273 return PTR_ERR(output_event);
2274 }
2275
2276 ret = perf_event_set_output(event, output_event);
2277 if (output_event)
2278 fput_light(output_event->filp, fput_needed);
2279
2280 return ret;
2281 }
2206 2282
2207 case PERF_EVENT_IOC_SET_FILTER: 2283 case PERF_EVENT_IOC_SET_FILTER:
2208 return perf_event_set_filter(event, (void __user *)arg); 2284 return perf_event_set_filter(event, (void __user *)arg);
@@ -2335,8 +2411,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2335 unsigned long size; 2411 unsigned long size;
2336 int i; 2412 int i;
2337 2413
2338 WARN_ON(atomic_read(&event->mmap_count));
2339
2340 size = sizeof(struct perf_mmap_data); 2414 size = sizeof(struct perf_mmap_data);
2341 size += nr_pages * sizeof(void *); 2415 size += nr_pages * sizeof(void *);
2342 2416
@@ -2452,8 +2526,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2452 unsigned long size; 2526 unsigned long size;
2453 void *all_buf; 2527 void *all_buf;
2454 2528
2455 WARN_ON(atomic_read(&event->mmap_count));
2456
2457 size = sizeof(struct perf_mmap_data); 2529 size = sizeof(struct perf_mmap_data);
2458 size += sizeof(void *); 2530 size += sizeof(void *);
2459 2531
@@ -2536,7 +2608,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2536 if (!data->watermark) 2608 if (!data->watermark)
2537 data->watermark = max_size / 2; 2609 data->watermark = max_size / 2;
2538 2610
2539 2611 atomic_set(&data->refcount, 1);
2540 rcu_assign_pointer(event->data, data); 2612 rcu_assign_pointer(event->data, data);
2541} 2613}
2542 2614
@@ -2548,13 +2620,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2548 perf_mmap_data_free(data); 2620 perf_mmap_data_free(data);
2549} 2621}
2550 2622
2551static void perf_mmap_data_release(struct perf_event *event) 2623static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
2552{ 2624{
2553 struct perf_mmap_data *data = event->data; 2625 struct perf_mmap_data *data;
2554 2626
2555 WARN_ON(atomic_read(&event->mmap_count)); 2627 rcu_read_lock();
2628 data = rcu_dereference(event->data);
2629 if (data) {
2630 if (!atomic_inc_not_zero(&data->refcount))
2631 data = NULL;
2632 }
2633 rcu_read_unlock();
2634
2635 return data;
2636}
2637
2638static void perf_mmap_data_put(struct perf_mmap_data *data)
2639{
2640 if (!atomic_dec_and_test(&data->refcount))
2641 return;
2556 2642
2557 rcu_assign_pointer(event->data, NULL);
2558 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2643 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
2559} 2644}
2560 2645
@@ -2569,15 +2654,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2569{ 2654{
2570 struct perf_event *event = vma->vm_file->private_data; 2655 struct perf_event *event = vma->vm_file->private_data;
2571 2656
2572 WARN_ON_ONCE(event->ctx->parent_ctx);
2573 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2657 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2574 unsigned long size = perf_data_size(event->data); 2658 unsigned long size = perf_data_size(event->data);
2575 struct user_struct *user = current_user(); 2659 struct user_struct *user = event->mmap_user;
2660 struct perf_mmap_data *data = event->data;
2576 2661
2577 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2662 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2578 vma->vm_mm->locked_vm -= event->data->nr_locked; 2663 vma->vm_mm->locked_vm -= event->mmap_locked;
2579 perf_mmap_data_release(event); 2664 rcu_assign_pointer(event->data, NULL);
2580 mutex_unlock(&event->mmap_mutex); 2665 mutex_unlock(&event->mmap_mutex);
2666
2667 perf_mmap_data_put(data);
2668 free_uid(user);
2581 } 2669 }
2582} 2670}
2583 2671
@@ -2629,13 +2717,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2629 2717
2630 WARN_ON_ONCE(event->ctx->parent_ctx); 2718 WARN_ON_ONCE(event->ctx->parent_ctx);
2631 mutex_lock(&event->mmap_mutex); 2719 mutex_lock(&event->mmap_mutex);
2632 if (event->output) { 2720 if (event->data) {
2633 ret = -EINVAL; 2721 if (event->data->nr_pages == nr_pages)
2634 goto unlock; 2722 atomic_inc(&event->data->refcount);
2635 } 2723 else
2636
2637 if (atomic_inc_not_zero(&event->mmap_count)) {
2638 if (nr_pages != event->data->nr_pages)
2639 ret = -EINVAL; 2724 ret = -EINVAL;
2640 goto unlock; 2725 goto unlock;
2641 } 2726 }
@@ -2667,21 +2752,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2667 WARN_ON(event->data); 2752 WARN_ON(event->data);
2668 2753
2669 data = perf_mmap_data_alloc(event, nr_pages); 2754 data = perf_mmap_data_alloc(event, nr_pages);
2670 ret = -ENOMEM; 2755 if (!data) {
2671 if (!data) 2756 ret = -ENOMEM;
2672 goto unlock; 2757 goto unlock;
2758 }
2673 2759
2674 ret = 0;
2675 perf_mmap_data_init(event, data); 2760 perf_mmap_data_init(event, data);
2676
2677 atomic_set(&event->mmap_count, 1);
2678 atomic_long_add(user_extra, &user->locked_vm);
2679 vma->vm_mm->locked_vm += extra;
2680 event->data->nr_locked = extra;
2681 if (vma->vm_flags & VM_WRITE) 2761 if (vma->vm_flags & VM_WRITE)
2682 event->data->writable = 1; 2762 event->data->writable = 1;
2683 2763
2764 atomic_long_add(user_extra, &user->locked_vm);
2765 event->mmap_locked = extra;
2766 event->mmap_user = get_current_user();
2767 vma->vm_mm->locked_vm += event->mmap_locked;
2768
2684unlock: 2769unlock:
2770 if (!ret)
2771 atomic_inc(&event->mmap_count);
2685 mutex_unlock(&event->mmap_mutex); 2772 mutex_unlock(&event->mmap_mutex);
2686 2773
2687 vma->vm_flags |= VM_RESERVED; 2774 vma->vm_flags |= VM_RESERVED;
@@ -2977,6 +3064,7 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
2977 3064
2978 len -= size; 3065 len -= size;
2979 handle->addr += size; 3066 handle->addr += size;
3067 buf += size;
2980 handle->size -= size; 3068 handle->size -= size;
2981 if (!handle->size) { 3069 if (!handle->size) {
2982 struct perf_mmap_data *data = handle->data; 3070 struct perf_mmap_data *data = handle->data;
@@ -2993,7 +3081,6 @@ int perf_output_begin(struct perf_output_handle *handle,
2993 struct perf_event *event, unsigned int size, 3081 struct perf_event *event, unsigned int size,
2994 int nmi, int sample) 3082 int nmi, int sample)
2995{ 3083{
2996 struct perf_event *output_event;
2997 struct perf_mmap_data *data; 3084 struct perf_mmap_data *data;
2998 unsigned long tail, offset, head; 3085 unsigned long tail, offset, head;
2999 int have_lost; 3086 int have_lost;
@@ -3010,10 +3097,6 @@ int perf_output_begin(struct perf_output_handle *handle,
3010 if (event->parent) 3097 if (event->parent)
3011 event = event->parent; 3098 event = event->parent;
3012 3099
3013 output_event = rcu_dereference(event->output);
3014 if (output_event)
3015 event = output_event;
3016
3017 data = rcu_dereference(event->data); 3100 data = rcu_dereference(event->data);
3018 if (!data) 3101 if (!data)
3019 goto out; 3102 goto out;
@@ -3972,13 +4055,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3972 } 4055 }
3973} 4056}
3974 4057
3975static void perf_swevent_unthrottle(struct perf_event *event)
3976{
3977 /*
3978 * Nothing to do, we already reset hwc->interrupts.
3979 */
3980}
3981
3982static void perf_swevent_add(struct perf_event *event, u64 nr, 4058static void perf_swevent_add(struct perf_event *event, u64 nr,
3983 int nmi, struct perf_sample_data *data, 4059 int nmi, struct perf_sample_data *data,
3984 struct pt_regs *regs) 4060 struct pt_regs *regs)
@@ -4193,11 +4269,22 @@ static void perf_swevent_disable(struct perf_event *event)
4193 hlist_del_rcu(&event->hlist_entry); 4269 hlist_del_rcu(&event->hlist_entry);
4194} 4270}
4195 4271
4272static void perf_swevent_void(struct perf_event *event)
4273{
4274}
4275
4276static int perf_swevent_int(struct perf_event *event)
4277{
4278 return 0;
4279}
4280
4196static const struct pmu perf_ops_generic = { 4281static const struct pmu perf_ops_generic = {
4197 .enable = perf_swevent_enable, 4282 .enable = perf_swevent_enable,
4198 .disable = perf_swevent_disable, 4283 .disable = perf_swevent_disable,
4284 .start = perf_swevent_int,
4285 .stop = perf_swevent_void,
4199 .read = perf_swevent_read, 4286 .read = perf_swevent_read,
4200 .unthrottle = perf_swevent_unthrottle, 4287 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4201}; 4288};
4202 4289
4203/* 4290/*
@@ -4478,8 +4565,10 @@ static int swevent_hlist_get(struct perf_event *event)
4478static const struct pmu perf_ops_tracepoint = { 4565static const struct pmu perf_ops_tracepoint = {
4479 .enable = perf_trace_enable, 4566 .enable = perf_trace_enable,
4480 .disable = perf_trace_disable, 4567 .disable = perf_trace_disable,
4568 .start = perf_swevent_int,
4569 .stop = perf_swevent_void,
4481 .read = perf_swevent_read, 4570 .read = perf_swevent_read,
4482 .unthrottle = perf_swevent_unthrottle, 4571 .unthrottle = perf_swevent_void,
4483}; 4572};
4484 4573
4485static int perf_tp_filter_match(struct perf_event *event, 4574static int perf_tp_filter_match(struct perf_event *event,
@@ -4912,39 +5001,17 @@ err_size:
4912 goto out; 5001 goto out;
4913} 5002}
4914 5003
4915static int perf_event_set_output(struct perf_event *event, int output_fd) 5004static int
5005perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
4916{ 5006{
4917 struct perf_event *output_event = NULL; 5007 struct perf_mmap_data *data = NULL, *old_data = NULL;
4918 struct file *output_file = NULL;
4919 struct perf_event *old_output;
4920 int fput_needed = 0;
4921 int ret = -EINVAL; 5008 int ret = -EINVAL;
4922 5009
4923 /* 5010 if (!output_event)
4924 * Don't allow output of inherited per-task events. This would
4925 * create performance issues due to cross cpu access.
4926 */
4927 if (event->cpu == -1 && event->attr.inherit)
4928 return -EINVAL;
4929
4930 if (!output_fd)
4931 goto set; 5011 goto set;
4932 5012
4933 output_file = fget_light(output_fd, &fput_needed); 5013 /* don't allow circular references */
4934 if (!output_file) 5014 if (event == output_event)
4935 return -EBADF;
4936
4937 if (output_file->f_op != &perf_fops)
4938 goto out;
4939
4940 output_event = output_file->private_data;
4941
4942 /* Don't chain output fds */
4943 if (output_event->output)
4944 goto out;
4945
4946 /* Don't set an output fd when we already have an output channel */
4947 if (event->data)
4948 goto out; 5015 goto out;
4949 5016
4950 /* 5017 /*
@@ -4959,26 +5026,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
4959 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 5026 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
4960 goto out; 5027 goto out;
4961 5028
4962 atomic_long_inc(&output_file->f_count);
4963
4964set: 5029set:
4965 mutex_lock(&event->mmap_mutex); 5030 mutex_lock(&event->mmap_mutex);
4966 old_output = event->output; 5031 /* Can't redirect output if we've got an active mmap() */
4967 rcu_assign_pointer(event->output, output_event); 5032 if (atomic_read(&event->mmap_count))
4968 mutex_unlock(&event->mmap_mutex); 5033 goto unlock;
4969 5034
4970 if (old_output) { 5035 if (output_event) {
4971 /* 5036 /* get the buffer we want to redirect to */
4972 * we need to make sure no existing perf_output_*() 5037 data = perf_mmap_data_get(output_event);
4973 * is still referencing this event. 5038 if (!data)
4974 */ 5039 goto unlock;
4975 synchronize_rcu();
4976 fput(old_output->filp);
4977 } 5040 }
4978 5041
5042 old_data = event->data;
5043 rcu_assign_pointer(event->data, data);
4979 ret = 0; 5044 ret = 0;
5045unlock:
5046 mutex_unlock(&event->mmap_mutex);
5047
5048 if (old_data)
5049 perf_mmap_data_put(old_data);
4980out: 5050out:
4981 fput_light(output_file, fput_needed);
4982 return ret; 5051 return ret;
4983} 5052}
4984 5053
@@ -4994,7 +5063,7 @@ SYSCALL_DEFINE5(perf_event_open,
4994 struct perf_event_attr __user *, attr_uptr, 5063 struct perf_event_attr __user *, attr_uptr,
4995 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5064 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
4996{ 5065{
4997 struct perf_event *event, *group_leader; 5066 struct perf_event *event, *group_leader = NULL, *output_event = NULL;
4998 struct perf_event_attr attr; 5067 struct perf_event_attr attr;
4999 struct perf_event_context *ctx; 5068 struct perf_event_context *ctx;
5000 struct file *event_file = NULL; 5069 struct file *event_file = NULL;
@@ -5034,19 +5103,25 @@ SYSCALL_DEFINE5(perf_event_open,
5034 goto err_fd; 5103 goto err_fd;
5035 } 5104 }
5036 5105
5106 if (group_fd != -1) {
5107 group_leader = perf_fget_light(group_fd, &fput_needed);
5108 if (IS_ERR(group_leader)) {
5109 err = PTR_ERR(group_leader);
5110 goto err_put_context;
5111 }
5112 group_file = group_leader->filp;
5113 if (flags & PERF_FLAG_FD_OUTPUT)
5114 output_event = group_leader;
5115 if (flags & PERF_FLAG_FD_NO_GROUP)
5116 group_leader = NULL;
5117 }
5118
5037 /* 5119 /*
5038 * Look up the group leader (we will attach this event to it): 5120 * Look up the group leader (we will attach this event to it):
5039 */ 5121 */
5040 group_leader = NULL; 5122 if (group_leader) {
5041 if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
5042 err = -EINVAL; 5123 err = -EINVAL;
5043 group_file = fget_light(group_fd, &fput_needed);
5044 if (!group_file)
5045 goto err_put_context;
5046 if (group_file->f_op != &perf_fops)
5047 goto err_put_context;
5048 5124
5049 group_leader = group_file->private_data;
5050 /* 5125 /*
5051 * Do not allow a recursive hierarchy (this new sibling 5126 * Do not allow a recursive hierarchy (this new sibling
5052 * becoming part of another group-sibling): 5127 * becoming part of another group-sibling):
@@ -5068,9 +5143,16 @@ SYSCALL_DEFINE5(perf_event_open,
5068 5143
5069 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 5144 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5070 NULL, NULL, GFP_KERNEL); 5145 NULL, NULL, GFP_KERNEL);
5071 err = PTR_ERR(event); 5146 if (IS_ERR(event)) {
5072 if (IS_ERR(event)) 5147 err = PTR_ERR(event);
5073 goto err_put_context; 5148 goto err_put_context;
5149 }
5150
5151 if (output_event) {
5152 err = perf_event_set_output(event, output_event);
5153 if (err)
5154 goto err_free_put_context;
5155 }
5074 5156
5075 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5157 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5076 if (IS_ERR(event_file)) { 5158 if (IS_ERR(event_file)) {
@@ -5078,12 +5160,6 @@ SYSCALL_DEFINE5(perf_event_open,
5078 goto err_free_put_context; 5160 goto err_free_put_context;
5079 } 5161 }
5080 5162
5081 if (flags & PERF_FLAG_FD_OUTPUT) {
5082 err = perf_event_set_output(event, group_fd);
5083 if (err)
5084 goto err_fput_free_put_context;
5085 }
5086
5087 event->filp = event_file; 5163 event->filp = event_file;
5088 WARN_ON_ONCE(ctx->parent_ctx); 5164 WARN_ON_ONCE(ctx->parent_ctx);
5089 mutex_lock(&ctx->mutex); 5165 mutex_lock(&ctx->mutex);
@@ -5097,12 +5173,16 @@ SYSCALL_DEFINE5(perf_event_open,
5097 list_add_tail(&event->owner_entry, &current->perf_event_list); 5173 list_add_tail(&event->owner_entry, &current->perf_event_list);
5098 mutex_unlock(&current->perf_event_mutex); 5174 mutex_unlock(&current->perf_event_mutex);
5099 5175
5176 /*
5177 * Drop the reference on the group_event after placing the
5178 * new event on the sibling_list. This ensures destruction
5179 * of the group leader will find the pointer to itself in
5180 * perf_group_detach().
5181 */
5100 fput_light(group_file, fput_needed); 5182 fput_light(group_file, fput_needed);
5101 fd_install(event_fd, event_file); 5183 fd_install(event_fd, event_file);
5102 return event_fd; 5184 return event_fd;
5103 5185
5104err_fput_free_put_context:
5105 fput(event_file);
5106err_free_put_context: 5186err_free_put_context:
5107 free_event(event); 5187 free_event(event);
5108err_put_context: 5188err_put_context:
@@ -5420,6 +5500,7 @@ static void perf_free_event(struct perf_event *event,
5420 5500
5421 fput(parent->filp); 5501 fput(parent->filp);
5422 5502
5503 perf_group_detach(event);
5423 list_del_event(event, ctx); 5504 list_del_event(event, ctx);
5424 free_event(event); 5505 free_event(event);
5425} 5506}
diff --git a/kernel/sched.c b/kernel/sched.c
index d48408142503..f8b8996228dd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -544,6 +544,8 @@ struct rq {
544 struct root_domain *rd; 544 struct root_domain *rd;
545 struct sched_domain *sd; 545 struct sched_domain *sd;
546 546
547 unsigned long cpu_power;
548
547 unsigned char idle_at_tick; 549 unsigned char idle_at_tick;
548 /* For active balancing */ 550 /* For active balancing */
549 int post_schedule; 551 int post_schedule;
@@ -1499,24 +1501,9 @@ static unsigned long target_load(int cpu, int type)
1499 return max(rq->cpu_load[type-1], total); 1501 return max(rq->cpu_load[type-1], total);
1500} 1502}
1501 1503
1502static struct sched_group *group_of(int cpu)
1503{
1504 struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
1505
1506 if (!sd)
1507 return NULL;
1508
1509 return sd->groups;
1510}
1511
1512static unsigned long power_of(int cpu) 1504static unsigned long power_of(int cpu)
1513{ 1505{
1514 struct sched_group *group = group_of(cpu); 1506 return cpu_rq(cpu)->cpu_power;
1515
1516 if (!group)
1517 return SCHED_LOAD_SCALE;
1518
1519 return group->cpu_power;
1520} 1507}
1521 1508
1522static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1509static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
1854static void set_load_weight(struct task_struct *p) 1841static void set_load_weight(struct task_struct *p)
1855{ 1842{
1856 if (task_has_rt_policy(p)) { 1843 if (task_has_rt_policy(p)) {
1857 p->se.load.weight = prio_to_weight[0] * 2; 1844 p->se.load.weight = 0;
1858 p->se.load.inv_weight = prio_to_wmult[0] >> 1; 1845 p->se.load.inv_weight = WMULT_CONST;
1859 return; 1846 return;
1860 } 1847 }
1861 1848
@@ -7605,6 +7592,7 @@ void __init sched_init(void)
7605#ifdef CONFIG_SMP 7592#ifdef CONFIG_SMP
7606 rq->sd = NULL; 7593 rq->sd = NULL;
7607 rq->rd = NULL; 7594 rq->rd = NULL;
7595 rq->cpu_power = SCHED_LOAD_SCALE;
7608 rq->post_schedule = 0; 7596 rq->post_schedule = 0;
7609 rq->active_balance = 0; 7597 rq->active_balance = 0;
7610 rq->next_balance = jiffies; 7598 rq->next_balance = jiffies;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 217e4a9393e4..eed35eded602 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1225,7 +1225,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1225 unsigned long this_load, load; 1225 unsigned long this_load, load;
1226 int idx, this_cpu, prev_cpu; 1226 int idx, this_cpu, prev_cpu;
1227 unsigned long tl_per_task; 1227 unsigned long tl_per_task;
1228 unsigned int imbalance;
1229 struct task_group *tg; 1228 struct task_group *tg;
1230 unsigned long weight; 1229 unsigned long weight;
1231 int balanced; 1230 int balanced;
@@ -1252,8 +1251,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1252 tg = task_group(p); 1251 tg = task_group(p);
1253 weight = p->se.load.weight; 1252 weight = p->se.load.weight;
1254 1253
1255 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1256
1257 /* 1254 /*
1258 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1255 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1259 * due to the sync cause above having dropped this_load to 0, we'll 1256 * due to the sync cause above having dropped this_load to 0, we'll
@@ -1263,9 +1260,21 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1263 * Otherwise check if either cpus are near enough in load to allow this 1260 * Otherwise check if either cpus are near enough in load to allow this
1264 * task to be woken on this_cpu. 1261 * task to be woken on this_cpu.
1265 */ 1262 */
1266 balanced = !this_load || 1263 if (this_load) {
1267 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= 1264 unsigned long this_eff_load, prev_eff_load;
1268 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1265
1266 this_eff_load = 100;
1267 this_eff_load *= power_of(prev_cpu);
1268 this_eff_load *= this_load +
1269 effective_load(tg, this_cpu, weight, weight);
1270
1271 prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
1272 prev_eff_load *= power_of(this_cpu);
1273 prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
1274
1275 balanced = this_eff_load <= prev_eff_load;
1276 } else
1277 balanced = true;
1269 1278
1270 /* 1279 /*
1271 * If the currently running task will sleep within 1280 * If the currently running task will sleep within
@@ -2298,6 +2307,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2298 if (!power) 2307 if (!power)
2299 power = 1; 2308 power = 1;
2300 2309
2310 cpu_rq(cpu)->cpu_power = power;
2301 sdg->cpu_power = power; 2311 sdg->cpu_power = power;
2302} 2312}
2303 2313
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 825e1126008f..07b4f1b1a73a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -850,7 +850,7 @@ static __init int spawn_ksoftirqd(void)
850 void *cpu = (void *)(long)smp_processor_id(); 850 void *cpu = (void *)(long)smp_processor_id();
851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
852 852
853 BUG_ON(err == NOTIFY_BAD); 853 BUG_ON(err != NOTIFY_OK);
854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
855 register_cpu_notifier(&cpu_nfb); 855 register_cpu_notifier(&cpu_nfb);
856 return 0; 856 return 0;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b4e7431e7c78..70f8d90331e9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -321,7 +321,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
321 321
322#ifdef CONFIG_HOTPLUG_CPU 322#ifdef CONFIG_HOTPLUG_CPU
323 case CPU_UP_CANCELED: 323 case CPU_UP_CANCELED:
324 case CPU_DEAD: 324 case CPU_POST_DEAD:
325 { 325 {
326 struct cpu_stop_work *work; 326 struct cpu_stop_work *work;
327 327
diff --git a/kernel/timer.c b/kernel/timer.c
index 2454172a80d3..ee305c8d4e18 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1717,7 +1717,7 @@ void __init init_timers(void)
1717 1717
1718 init_timer_stats(); 1718 init_timer_stats();
1719 1719
1720 BUG_ON(err == NOTIFY_BAD); 1720 BUG_ON(err != NOTIFY_OK);
1721 register_cpu_notifier(&timers_nb); 1721 register_cpu_notifier(&timers_nb);
1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1723} 1723}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 36ea2b65dcdc..638711c17504 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -842,6 +842,7 @@ static void blk_add_trace_split(void *ignore,
842 842
843/** 843/**
844 * blk_add_trace_remap - Add a trace for a remap operation 844 * blk_add_trace_remap - Add a trace for a remap operation
845 * @ignore: trace callback data parameter (not used)
845 * @q: queue the io is for 846 * @q: queue the io is for
846 * @bio: the source bio 847 * @bio: the source bio
847 * @dev: target device 848 * @dev: target device
@@ -873,6 +874,7 @@ static void blk_add_trace_remap(void *ignore,
873 874
874/** 875/**
875 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 876 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
877 * @ignore: trace callback data parameter (not used)
876 * @q: queue the io is for 878 * @q: queue the io is for
877 * @rq: the source request 879 * @rq: the source request
878 * @dev: target device 880 * @dev: target device
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index cb6f365016e4..e6f65887842c 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -116,7 +116,7 @@ int perf_trace_enable(struct perf_event *p_event)
116 if (WARN_ON_ONCE(!list)) 116 if (WARN_ON_ONCE(!list))
117 return -EINVAL; 117 return -EINVAL;
118 118
119 list = per_cpu_ptr(list, smp_processor_id()); 119 list = this_cpu_ptr(list);
120 hlist_add_head_rcu(&p_event->hlist_entry, list); 120 hlist_add_head_rcu(&p_event->hlist_entry, list);
121 121
122 return 0; 122 return 0;
@@ -132,8 +132,9 @@ void perf_trace_destroy(struct perf_event *p_event)
132 struct ftrace_event_call *tp_event = p_event->tp_event; 132 struct ftrace_event_call *tp_event = p_event->tp_event;
133 int i; 133 int i;
134 134
135 mutex_lock(&event_mutex);
135 if (--tp_event->perf_refcount > 0) 136 if (--tp_event->perf_refcount > 0)
136 return; 137 goto out;
137 138
138 if (tp_event->class->reg) 139 if (tp_event->class->reg)
139 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); 140 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
@@ -142,6 +143,12 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->class->perf_probe, 143 tp_event->class->perf_probe,
143 tp_event); 144 tp_event);
144 145
146 /*
147 * Ensure our callback won't be called anymore. See
148 * tracepoint_probe_unregister() and __DO_TRACE().
149 */
150 synchronize_sched();
151
145 free_percpu(tp_event->perf_events); 152 free_percpu(tp_event->perf_events);
146 tp_event->perf_events = NULL; 153 tp_event->perf_events = NULL;
147 154
@@ -151,6 +158,8 @@ void perf_trace_destroy(struct perf_event *p_event)
151 perf_trace_buf[i] = NULL; 158 perf_trace_buf[i] = NULL;
152 } 159 }
153 } 160 }
161out:
162 mutex_unlock(&event_mutex);
154} 163}
155 164
156__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 165__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -169,7 +178,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
169 if (*rctxp < 0) 178 if (*rctxp < 0)
170 return NULL; 179 return NULL;
171 180
172 raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); 181 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
173 182
174 /* zero the dead bytes from align to not leak stack to user */ 183 /* zero the dead bytes from align to not leak stack to user */
175 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); 184 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index faf7cefd15da..f52b5f50299d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1359,7 +1359,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1359 for (i = 0; i < tp->nr_args; i++) 1359 for (i = 0; i < tp->nr_args; i++)
1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1361 1361
1362 head = per_cpu_ptr(call->perf_events, smp_processor_id()); 1362 head = this_cpu_ptr(call->perf_events);
1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1364} 1364}
1365 1365
@@ -1392,7 +1392,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1392 for (i = 0; i < tp->nr_args; i++) 1392 for (i = 0; i < tp->nr_args; i++)
1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1394 1394
1395 head = per_cpu_ptr(call->perf_events, smp_processor_id()); 1395 head = this_cpu_ptr(call->perf_events);
1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1397} 1397}
1398 1398
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d2c859cec9ea..34e35804304b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -519,7 +519,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
519 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 519 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
520 (unsigned long *)&rec->args); 520 (unsigned long *)&rec->args);
521 521
522 head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); 522 head = this_cpu_ptr(sys_data->enter_event->perf_events);
523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
524} 524}
525 525
@@ -595,7 +595,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
595 rec->nr = syscall_nr; 595 rec->nr = syscall_nr;
596 rec->ret = syscall_get_return_value(current, regs); 596 rec->ret = syscall_get_return_value(current, regs);
597 597
598 head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); 598 head = this_cpu_ptr(sys_data->exit_event->perf_events);
599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
600} 600}
601 601