diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-03-04 05:47:50 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-04 05:47:52 -0500 |
commit | 4f16d4e0c9a4b20d9f0db365587b96d6001efd7d (patch) | |
tree | fa25dcf285b26f1fac2bf267d0d1cd2c4eba90b8 /kernel | |
parent | 1e259e0a9982078896f3404240096cbea01daca4 (diff) | |
parent | 6630125419ef37ff8781713c5e9d416f2a4ba357 (diff) |
Merge branch 'perf/core' into perf/urgent
Merge reason: Switch from pre-merge topical split to the post-merge urgent track
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/futex.c | 30 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 10 | ||||
-rw-r--r-- | kernel/kfifo.c | 3 | ||||
-rw-r--r-- | kernel/kgdb.c | 6 | ||||
-rw-r--r-- | kernel/kprobes.c | 34 | ||||
-rw-r--r-- | kernel/perf_event.c | 642 | ||||
-rw-r--r-- | kernel/sched.c | 12 | ||||
-rw-r--r-- | kernel/softirq.c | 15 | ||||
-rw-r--r-- | kernel/softlockup.c | 15 | ||||
-rw-r--r-- | kernel/sys.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 | ||||
-rw-r--r-- | kernel/trace/Makefile | 4 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 54 | ||||
-rw-r--r-- | kernel/trace/trace_event_profile.c | 52 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 198 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 24 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 76 |
18 files changed, 666 insertions, 517 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index d9b3a2228f9d..e7a35f1039e7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
530 | return -EINVAL; | 530 | return -EINVAL; |
531 | 531 | ||
532 | WARN_ON(!atomic_read(&pi_state->refcount)); | 532 | WARN_ON(!atomic_read(&pi_state->refcount)); |
533 | WARN_ON(pid && pi_state->owner && | 533 | |
534 | pi_state->owner->pid != pid); | 534 | /* |
535 | * When pi_state->owner is NULL then the owner died | ||
536 | * and another waiter is on the fly. pi_state->owner | ||
537 | * is fixed up by the task which acquires | ||
538 | * pi_state->rt_mutex. | ||
539 | * | ||
540 | * We do not check for pid == 0 which can happen when | ||
541 | * the owner died and robust_list_exit() cleared the | ||
542 | * TID. | ||
543 | */ | ||
544 | if (pid && pi_state->owner) { | ||
545 | /* | ||
546 | * Bail out if user space manipulated the | ||
547 | * futex value. | ||
548 | */ | ||
549 | if (pid != task_pid_vnr(pi_state->owner)) | ||
550 | return -EINVAL; | ||
551 | } | ||
535 | 552 | ||
536 | atomic_inc(&pi_state->refcount); | 553 | atomic_inc(&pi_state->refcount); |
537 | *ps = pi_state; | 554 | *ps = pi_state; |
@@ -758,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
758 | if (!pi_state) | 775 | if (!pi_state) |
759 | return -EINVAL; | 776 | return -EINVAL; |
760 | 777 | ||
778 | /* | ||
779 | * If current does not own the pi_state then the futex is | ||
780 | * inconsistent and user space fiddled with the futex value. | ||
781 | */ | ||
782 | if (pi_state->owner != current) | ||
783 | return -EINVAL; | ||
784 | |||
761 | raw_spin_lock(&pi_state->pi_mutex.wait_lock); | 785 | raw_spin_lock(&pi_state->pi_mutex.wait_lock); |
762 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 786 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
763 | 787 | ||
@@ -1971,7 +1995,7 @@ retry_private: | |||
1971 | /* Unqueue and drop the lock */ | 1995 | /* Unqueue and drop the lock */ |
1972 | unqueue_me_pi(&q); | 1996 | unqueue_me_pi(&q); |
1973 | 1997 | ||
1974 | goto out; | 1998 | goto out_put_key; |
1975 | 1999 | ||
1976 | out_unlock_put_key: | 2000 | out_unlock_put_key: |
1977 | queue_unlock(&q, hb); | 2001 | queue_unlock(&q, hb); |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 4d99512ee149..03808ed342a6 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | |||
413 | * | 413 | * |
414 | * @return a set of per_cpu pointers to perf events | 414 | * @return a set of per_cpu pointers to perf events |
415 | */ | 415 | */ |
416 | struct perf_event ** | 416 | struct perf_event * __percpu * |
417 | register_wide_hw_breakpoint(struct perf_event_attr *attr, | 417 | register_wide_hw_breakpoint(struct perf_event_attr *attr, |
418 | perf_overflow_handler_t triggered) | 418 | perf_overflow_handler_t triggered) |
419 | { | 419 | { |
420 | struct perf_event **cpu_events, **pevent, *bp; | 420 | struct perf_event * __percpu *cpu_events, **pevent, *bp; |
421 | long err; | 421 | long err; |
422 | int cpu; | 422 | int cpu; |
423 | 423 | ||
424 | cpu_events = alloc_percpu(typeof(*cpu_events)); | 424 | cpu_events = alloc_percpu(typeof(*cpu_events)); |
425 | if (!cpu_events) | 425 | if (!cpu_events) |
426 | return ERR_PTR(-ENOMEM); | 426 | return (void __percpu __force *)ERR_PTR(-ENOMEM); |
427 | 427 | ||
428 | get_online_cpus(); | 428 | get_online_cpus(); |
429 | for_each_online_cpu(cpu) { | 429 | for_each_online_cpu(cpu) { |
@@ -451,7 +451,7 @@ fail: | |||
451 | put_online_cpus(); | 451 | put_online_cpus(); |
452 | 452 | ||
453 | free_percpu(cpu_events); | 453 | free_percpu(cpu_events); |
454 | return ERR_PTR(err); | 454 | return (void __percpu __force *)ERR_PTR(err); |
455 | } | 455 | } |
456 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | 456 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); |
457 | 457 | ||
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | |||
459 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel | 459 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel |
460 | * @cpu_events: the per cpu set of events to unregister | 460 | * @cpu_events: the per cpu set of events to unregister |
461 | */ | 461 | */ |
462 | void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) | 462 | void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) |
463 | { | 463 | { |
464 | int cpu; | 464 | int cpu; |
465 | struct perf_event **pevent; | 465 | struct perf_event **pevent; |
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 498cabba225e..35edbe22e9a9 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask) | |||
80 | 80 | ||
81 | buffer = kmalloc(size, gfp_mask); | 81 | buffer = kmalloc(size, gfp_mask); |
82 | if (!buffer) { | 82 | if (!buffer) { |
83 | _kfifo_init(fifo, 0, 0); | 83 | _kfifo_init(fifo, NULL, 0); |
84 | return -ENOMEM; | 84 | return -ENOMEM; |
85 | } | 85 | } |
86 | 86 | ||
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc); | |||
97 | void kfifo_free(struct kfifo *fifo) | 97 | void kfifo_free(struct kfifo *fifo) |
98 | { | 98 | { |
99 | kfree(fifo->buffer); | 99 | kfree(fifo->buffer); |
100 | _kfifo_init(fifo, NULL, 0); | ||
100 | } | 101 | } |
101 | EXPORT_SYMBOL(kfifo_free); | 102 | EXPORT_SYMBOL(kfifo_free); |
102 | 103 | ||
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index c7ade62e4ef0..761fdd2b3034 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
@@ -599,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs) | |||
599 | 599 | ||
600 | /* Signal the primary CPU that we are done: */ | 600 | /* Signal the primary CPU that we are done: */ |
601 | atomic_set(&cpu_in_kgdb[cpu], 0); | 601 | atomic_set(&cpu_in_kgdb[cpu], 0); |
602 | touch_softlockup_watchdog(); | 602 | touch_softlockup_watchdog_sync(); |
603 | clocksource_touch_watchdog(); | 603 | clocksource_touch_watchdog(); |
604 | local_irq_restore(flags); | 604 | local_irq_restore(flags); |
605 | } | 605 | } |
@@ -1453,7 +1453,7 @@ acquirelock: | |||
1453 | (kgdb_info[cpu].task && | 1453 | (kgdb_info[cpu].task && |
1454 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { | 1454 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { |
1455 | atomic_set(&kgdb_active, -1); | 1455 | atomic_set(&kgdb_active, -1); |
1456 | touch_softlockup_watchdog(); | 1456 | touch_softlockup_watchdog_sync(); |
1457 | clocksource_touch_watchdog(); | 1457 | clocksource_touch_watchdog(); |
1458 | local_irq_restore(flags); | 1458 | local_irq_restore(flags); |
1459 | 1459 | ||
@@ -1553,7 +1553,7 @@ kgdb_restore: | |||
1553 | } | 1553 | } |
1554 | /* Free kgdb_active */ | 1554 | /* Free kgdb_active */ |
1555 | atomic_set(&kgdb_active, -1); | 1555 | atomic_set(&kgdb_active, -1); |
1556 | touch_softlockup_watchdog(); | 1556 | touch_softlockup_watchdog_sync(); |
1557 | clocksource_touch_watchdog(); | 1557 | clocksource_touch_watchdog(); |
1558 | local_irq_restore(flags); | 1558 | local_irq_restore(flags); |
1559 | 1559 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b7df302a0204..ccec774c716d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/debugfs.h> | 44 | #include <linux/debugfs.h> |
45 | #include <linux/kdebug.h> | 45 | #include <linux/kdebug.h> |
46 | #include <linux/memory.h> | 46 | #include <linux/memory.h> |
47 | #include <linux/ftrace.h> | ||
47 | 48 | ||
48 | #include <asm-generic/sections.h> | 49 | #include <asm-generic/sections.h> |
49 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
@@ -93,6 +94,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = { | |||
93 | {"native_get_debugreg",}, | 94 | {"native_get_debugreg",}, |
94 | {"irq_entries_start",}, | 95 | {"irq_entries_start",}, |
95 | {"common_interrupt",}, | 96 | {"common_interrupt",}, |
97 | {"mcount",}, /* mcount can be called from everywhere */ | ||
96 | {NULL} /* Terminator */ | 98 | {NULL} /* Terminator */ |
97 | }; | 99 | }; |
98 | 100 | ||
@@ -124,30 +126,6 @@ static LIST_HEAD(kprobe_insn_pages); | |||
124 | static int kprobe_garbage_slots; | 126 | static int kprobe_garbage_slots; |
125 | static int collect_garbage_slots(void); | 127 | static int collect_garbage_slots(void); |
126 | 128 | ||
127 | static int __kprobes check_safety(void) | ||
128 | { | ||
129 | int ret = 0; | ||
130 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) | ||
131 | ret = freeze_processes(); | ||
132 | if (ret == 0) { | ||
133 | struct task_struct *p, *q; | ||
134 | do_each_thread(p, q) { | ||
135 | if (p != current && p->state == TASK_RUNNING && | ||
136 | p->pid != 0) { | ||
137 | printk("Check failed: %s is running\n",p->comm); | ||
138 | ret = -1; | ||
139 | goto loop_end; | ||
140 | } | ||
141 | } while_each_thread(p, q); | ||
142 | } | ||
143 | loop_end: | ||
144 | thaw_processes(); | ||
145 | #else | ||
146 | synchronize_sched(); | ||
147 | #endif | ||
148 | return ret; | ||
149 | } | ||
150 | |||
151 | /** | 129 | /** |
152 | * __get_insn_slot() - Find a slot on an executable page for an instruction. | 130 | * __get_insn_slot() - Find a slot on an executable page for an instruction. |
153 | * We allocate an executable page if there's no room on existing ones. | 131 | * We allocate an executable page if there's no room on existing ones. |
@@ -235,9 +213,8 @@ static int __kprobes collect_garbage_slots(void) | |||
235 | { | 213 | { |
236 | struct kprobe_insn_page *kip, *next; | 214 | struct kprobe_insn_page *kip, *next; |
237 | 215 | ||
238 | /* Ensure no-one is preepmted on the garbages */ | 216 | /* Ensure no-one is interrupted on the garbages */ |
239 | if (check_safety()) | 217 | synchronize_sched(); |
240 | return -EAGAIN; | ||
241 | 218 | ||
242 | list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) { | 219 | list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) { |
243 | int i; | 220 | int i; |
@@ -728,7 +705,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
728 | 705 | ||
729 | preempt_disable(); | 706 | preempt_disable(); |
730 | if (!kernel_text_address((unsigned long) p->addr) || | 707 | if (!kernel_text_address((unsigned long) p->addr) || |
731 | in_kprobes_functions((unsigned long) p->addr)) { | 708 | in_kprobes_functions((unsigned long) p->addr) || |
709 | ftrace_text_reserved(p->addr, p->addr)) { | ||
732 | preempt_enable(); | 710 | preempt_enable(); |
733 | return -EINVAL; | 711 | return -EINVAL; |
734 | } | 712 | } |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2ae7409bf38f..482d5e1d3764 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -56,21 +56,6 @@ static atomic_t nr_task_events __read_mostly; | |||
56 | */ | 56 | */ |
57 | int sysctl_perf_event_paranoid __read_mostly = 1; | 57 | int sysctl_perf_event_paranoid __read_mostly = 1; |
58 | 58 | ||
59 | static inline bool perf_paranoid_tracepoint_raw(void) | ||
60 | { | ||
61 | return sysctl_perf_event_paranoid > -1; | ||
62 | } | ||
63 | |||
64 | static inline bool perf_paranoid_cpu(void) | ||
65 | { | ||
66 | return sysctl_perf_event_paranoid > 0; | ||
67 | } | ||
68 | |||
69 | static inline bool perf_paranoid_kernel(void) | ||
70 | { | ||
71 | return sysctl_perf_event_paranoid > 1; | ||
72 | } | ||
73 | |||
74 | int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ | 59 | int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ |
75 | 60 | ||
76 | /* | 61 | /* |
@@ -98,11 +83,12 @@ void __weak hw_perf_enable(void) { barrier(); } | |||
98 | 83 | ||
99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } | 84 | void __weak hw_perf_event_setup(int cpu) { barrier(); } |
100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } | 85 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } |
86 | void __weak hw_perf_event_setup_offline(int cpu) { barrier(); } | ||
101 | 87 | ||
102 | int __weak | 88 | int __weak |
103 | hw_perf_group_sched_in(struct perf_event *group_leader, | 89 | hw_perf_group_sched_in(struct perf_event *group_leader, |
104 | struct perf_cpu_context *cpuctx, | 90 | struct perf_cpu_context *cpuctx, |
105 | struct perf_event_context *ctx, int cpu) | 91 | struct perf_event_context *ctx) |
106 | { | 92 | { |
107 | return 0; | 93 | return 0; |
108 | } | 94 | } |
@@ -248,7 +234,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
248 | 234 | ||
249 | static inline u64 perf_clock(void) | 235 | static inline u64 perf_clock(void) |
250 | { | 236 | { |
251 | return cpu_clock(smp_processor_id()); | 237 | return cpu_clock(raw_smp_processor_id()); |
252 | } | 238 | } |
253 | 239 | ||
254 | /* | 240 | /* |
@@ -289,6 +275,15 @@ static void update_event_times(struct perf_event *event) | |||
289 | event->total_time_running = run_end - event->tstamp_running; | 275 | event->total_time_running = run_end - event->tstamp_running; |
290 | } | 276 | } |
291 | 277 | ||
278 | static struct list_head * | ||
279 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | ||
280 | { | ||
281 | if (event->attr.pinned) | ||
282 | return &ctx->pinned_groups; | ||
283 | else | ||
284 | return &ctx->flexible_groups; | ||
285 | } | ||
286 | |||
292 | /* | 287 | /* |
293 | * Add a event from the lists for its context. | 288 | * Add a event from the lists for its context. |
294 | * Must be called with ctx->mutex and ctx->lock held. | 289 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -303,9 +298,19 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
303 | * add it straight to the context's event list, or to the group | 298 | * add it straight to the context's event list, or to the group |
304 | * leader's sibling list: | 299 | * leader's sibling list: |
305 | */ | 300 | */ |
306 | if (group_leader == event) | 301 | if (group_leader == event) { |
307 | list_add_tail(&event->group_entry, &ctx->group_list); | 302 | struct list_head *list; |
308 | else { | 303 | |
304 | if (is_software_event(event)) | ||
305 | event->group_flags |= PERF_GROUP_SOFTWARE; | ||
306 | |||
307 | list = ctx_group_list(event, ctx); | ||
308 | list_add_tail(&event->group_entry, list); | ||
309 | } else { | ||
310 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
311 | !is_software_event(event)) | ||
312 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
313 | |||
309 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 314 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
310 | group_leader->nr_siblings++; | 315 | group_leader->nr_siblings++; |
311 | } | 316 | } |
@@ -355,9 +360,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
355 | * to the context list directly: | 360 | * to the context list directly: |
356 | */ | 361 | */ |
357 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 362 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
363 | struct list_head *list; | ||
358 | 364 | ||
359 | list_move_tail(&sibling->group_entry, &ctx->group_list); | 365 | list = ctx_group_list(event, ctx); |
366 | list_move_tail(&sibling->group_entry, list); | ||
360 | sibling->group_leader = sibling; | 367 | sibling->group_leader = sibling; |
368 | |||
369 | /* Inherit group flags from the previous leader */ | ||
370 | sibling->group_flags = event->group_flags; | ||
361 | } | 371 | } |
362 | } | 372 | } |
363 | 373 | ||
@@ -608,14 +618,13 @@ void perf_event_disable(struct perf_event *event) | |||
608 | static int | 618 | static int |
609 | event_sched_in(struct perf_event *event, | 619 | event_sched_in(struct perf_event *event, |
610 | struct perf_cpu_context *cpuctx, | 620 | struct perf_cpu_context *cpuctx, |
611 | struct perf_event_context *ctx, | 621 | struct perf_event_context *ctx) |
612 | int cpu) | ||
613 | { | 622 | { |
614 | if (event->state <= PERF_EVENT_STATE_OFF) | 623 | if (event->state <= PERF_EVENT_STATE_OFF) |
615 | return 0; | 624 | return 0; |
616 | 625 | ||
617 | event->state = PERF_EVENT_STATE_ACTIVE; | 626 | event->state = PERF_EVENT_STATE_ACTIVE; |
618 | event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | 627 | event->oncpu = smp_processor_id(); |
619 | /* | 628 | /* |
620 | * The new state must be visible before we turn it on in the hardware: | 629 | * The new state must be visible before we turn it on in the hardware: |
621 | */ | 630 | */ |
@@ -642,8 +651,7 @@ event_sched_in(struct perf_event *event, | |||
642 | static int | 651 | static int |
643 | group_sched_in(struct perf_event *group_event, | 652 | group_sched_in(struct perf_event *group_event, |
644 | struct perf_cpu_context *cpuctx, | 653 | struct perf_cpu_context *cpuctx, |
645 | struct perf_event_context *ctx, | 654 | struct perf_event_context *ctx) |
646 | int cpu) | ||
647 | { | 655 | { |
648 | struct perf_event *event, *partial_group; | 656 | struct perf_event *event, *partial_group; |
649 | int ret; | 657 | int ret; |
@@ -651,18 +659,18 @@ group_sched_in(struct perf_event *group_event, | |||
651 | if (group_event->state == PERF_EVENT_STATE_OFF) | 659 | if (group_event->state == PERF_EVENT_STATE_OFF) |
652 | return 0; | 660 | return 0; |
653 | 661 | ||
654 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); | 662 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); |
655 | if (ret) | 663 | if (ret) |
656 | return ret < 0 ? ret : 0; | 664 | return ret < 0 ? ret : 0; |
657 | 665 | ||
658 | if (event_sched_in(group_event, cpuctx, ctx, cpu)) | 666 | if (event_sched_in(group_event, cpuctx, ctx)) |
659 | return -EAGAIN; | 667 | return -EAGAIN; |
660 | 668 | ||
661 | /* | 669 | /* |
662 | * Schedule in siblings as one group (if any): | 670 | * Schedule in siblings as one group (if any): |
663 | */ | 671 | */ |
664 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 672 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
665 | if (event_sched_in(event, cpuctx, ctx, cpu)) { | 673 | if (event_sched_in(event, cpuctx, ctx)) { |
666 | partial_group = event; | 674 | partial_group = event; |
667 | goto group_error; | 675 | goto group_error; |
668 | } | 676 | } |
@@ -686,24 +694,6 @@ group_error: | |||
686 | } | 694 | } |
687 | 695 | ||
688 | /* | 696 | /* |
689 | * Return 1 for a group consisting entirely of software events, | ||
690 | * 0 if the group contains any hardware events. | ||
691 | */ | ||
692 | static int is_software_only_group(struct perf_event *leader) | ||
693 | { | ||
694 | struct perf_event *event; | ||
695 | |||
696 | if (!is_software_event(leader)) | ||
697 | return 0; | ||
698 | |||
699 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
700 | if (!is_software_event(event)) | ||
701 | return 0; | ||
702 | |||
703 | return 1; | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * Work out whether we can put this event group on the CPU now. | 697 | * Work out whether we can put this event group on the CPU now. |
708 | */ | 698 | */ |
709 | static int group_can_go_on(struct perf_event *event, | 699 | static int group_can_go_on(struct perf_event *event, |
@@ -713,7 +703,7 @@ static int group_can_go_on(struct perf_event *event, | |||
713 | /* | 703 | /* |
714 | * Groups consisting entirely of software events can always go on. | 704 | * Groups consisting entirely of software events can always go on. |
715 | */ | 705 | */ |
716 | if (is_software_only_group(event)) | 706 | if (event->group_flags & PERF_GROUP_SOFTWARE) |
717 | return 1; | 707 | return 1; |
718 | /* | 708 | /* |
719 | * If an exclusive group is already on, no other hardware | 709 | * If an exclusive group is already on, no other hardware |
@@ -754,7 +744,6 @@ static void __perf_install_in_context(void *info) | |||
754 | struct perf_event *event = info; | 744 | struct perf_event *event = info; |
755 | struct perf_event_context *ctx = event->ctx; | 745 | struct perf_event_context *ctx = event->ctx; |
756 | struct perf_event *leader = event->group_leader; | 746 | struct perf_event *leader = event->group_leader; |
757 | int cpu = smp_processor_id(); | ||
758 | int err; | 747 | int err; |
759 | 748 | ||
760 | /* | 749 | /* |
@@ -801,7 +790,7 @@ static void __perf_install_in_context(void *info) | |||
801 | if (!group_can_go_on(event, cpuctx, 1)) | 790 | if (!group_can_go_on(event, cpuctx, 1)) |
802 | err = -EEXIST; | 791 | err = -EEXIST; |
803 | else | 792 | else |
804 | err = event_sched_in(event, cpuctx, ctx, cpu); | 793 | err = event_sched_in(event, cpuctx, ctx); |
805 | 794 | ||
806 | if (err) { | 795 | if (err) { |
807 | /* | 796 | /* |
@@ -943,11 +932,9 @@ static void __perf_event_enable(void *info) | |||
943 | } else { | 932 | } else { |
944 | perf_disable(); | 933 | perf_disable(); |
945 | if (event == leader) | 934 | if (event == leader) |
946 | err = group_sched_in(event, cpuctx, ctx, | 935 | err = group_sched_in(event, cpuctx, ctx); |
947 | smp_processor_id()); | ||
948 | else | 936 | else |
949 | err = event_sched_in(event, cpuctx, ctx, | 937 | err = event_sched_in(event, cpuctx, ctx); |
950 | smp_processor_id()); | ||
951 | perf_enable(); | 938 | perf_enable(); |
952 | } | 939 | } |
953 | 940 | ||
@@ -1043,8 +1030,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1043 | return 0; | 1030 | return 0; |
1044 | } | 1031 | } |
1045 | 1032 | ||
1046 | void __perf_event_sched_out(struct perf_event_context *ctx, | 1033 | enum event_type_t { |
1047 | struct perf_cpu_context *cpuctx) | 1034 | EVENT_FLEXIBLE = 0x1, |
1035 | EVENT_PINNED = 0x2, | ||
1036 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
1037 | }; | ||
1038 | |||
1039 | static void ctx_sched_out(struct perf_event_context *ctx, | ||
1040 | struct perf_cpu_context *cpuctx, | ||
1041 | enum event_type_t event_type) | ||
1048 | { | 1042 | { |
1049 | struct perf_event *event; | 1043 | struct perf_event *event; |
1050 | 1044 | ||
@@ -1055,10 +1049,18 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1055 | update_context_time(ctx); | 1049 | update_context_time(ctx); |
1056 | 1050 | ||
1057 | perf_disable(); | 1051 | perf_disable(); |
1058 | if (ctx->nr_active) { | 1052 | if (!ctx->nr_active) |
1059 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1053 | goto out_enable; |
1054 | |||
1055 | if (event_type & EVENT_PINNED) | ||
1056 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | ||
1060 | group_sched_out(event, cpuctx, ctx); | 1057 | group_sched_out(event, cpuctx, ctx); |
1061 | } | 1058 | |
1059 | if (event_type & EVENT_FLEXIBLE) | ||
1060 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | ||
1061 | group_sched_out(event, cpuctx, ctx); | ||
1062 | |||
1063 | out_enable: | ||
1062 | perf_enable(); | 1064 | perf_enable(); |
1063 | out: | 1065 | out: |
1064 | raw_spin_unlock(&ctx->lock); | 1066 | raw_spin_unlock(&ctx->lock); |
@@ -1170,9 +1172,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1170 | * not restart the event. | 1172 | * not restart the event. |
1171 | */ | 1173 | */ |
1172 | void perf_event_task_sched_out(struct task_struct *task, | 1174 | void perf_event_task_sched_out(struct task_struct *task, |
1173 | struct task_struct *next, int cpu) | 1175 | struct task_struct *next) |
1174 | { | 1176 | { |
1175 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1177 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1176 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1178 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1177 | struct perf_event_context *next_ctx; | 1179 | struct perf_event_context *next_ctx; |
1178 | struct perf_event_context *parent; | 1180 | struct perf_event_context *parent; |
@@ -1220,15 +1222,13 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1220 | rcu_read_unlock(); | 1222 | rcu_read_unlock(); |
1221 | 1223 | ||
1222 | if (do_switch) { | 1224 | if (do_switch) { |
1223 | __perf_event_sched_out(ctx, cpuctx); | 1225 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
1224 | cpuctx->task_ctx = NULL; | 1226 | cpuctx->task_ctx = NULL; |
1225 | } | 1227 | } |
1226 | } | 1228 | } |
1227 | 1229 | ||
1228 | /* | 1230 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
1229 | * Called with IRQs disabled | 1231 | enum event_type_t event_type) |
1230 | */ | ||
1231 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) | ||
1232 | { | 1232 | { |
1233 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1233 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1234 | 1234 | ||
@@ -1238,47 +1238,41 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx) | |||
1238 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | 1238 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) |
1239 | return; | 1239 | return; |
1240 | 1240 | ||
1241 | __perf_event_sched_out(ctx, cpuctx); | 1241 | ctx_sched_out(ctx, cpuctx, event_type); |
1242 | cpuctx->task_ctx = NULL; | 1242 | cpuctx->task_ctx = NULL; |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | /* | 1245 | /* |
1246 | * Called with IRQs disabled | 1246 | * Called with IRQs disabled |
1247 | */ | 1247 | */ |
1248 | static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) | 1248 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) |
1249 | { | ||
1250 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
1251 | } | ||
1252 | |||
1253 | /* | ||
1254 | * Called with IRQs disabled | ||
1255 | */ | ||
1256 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
1257 | enum event_type_t event_type) | ||
1249 | { | 1258 | { |
1250 | __perf_event_sched_out(&cpuctx->ctx, cpuctx); | 1259 | ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); |
1251 | } | 1260 | } |
1252 | 1261 | ||
1253 | static void | 1262 | static void |
1254 | __perf_event_sched_in(struct perf_event_context *ctx, | 1263 | ctx_pinned_sched_in(struct perf_event_context *ctx, |
1255 | struct perf_cpu_context *cpuctx, int cpu) | 1264 | struct perf_cpu_context *cpuctx) |
1256 | { | 1265 | { |
1257 | struct perf_event *event; | 1266 | struct perf_event *event; |
1258 | int can_add_hw = 1; | ||
1259 | |||
1260 | raw_spin_lock(&ctx->lock); | ||
1261 | ctx->is_active = 1; | ||
1262 | if (likely(!ctx->nr_events)) | ||
1263 | goto out; | ||
1264 | |||
1265 | ctx->timestamp = perf_clock(); | ||
1266 | |||
1267 | perf_disable(); | ||
1268 | 1267 | ||
1269 | /* | 1268 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1270 | * First go through the list and put on any pinned groups | 1269 | if (event->state <= PERF_EVENT_STATE_OFF) |
1271 | * in order to give them the best chance of going on. | ||
1272 | */ | ||
1273 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1274 | if (event->state <= PERF_EVENT_STATE_OFF || | ||
1275 | !event->attr.pinned) | ||
1276 | continue; | 1270 | continue; |
1277 | if (event->cpu != -1 && event->cpu != cpu) | 1271 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1278 | continue; | 1272 | continue; |
1279 | 1273 | ||
1280 | if (group_can_go_on(event, cpuctx, 1)) | 1274 | if (group_can_go_on(event, cpuctx, 1)) |
1281 | group_sched_in(event, cpuctx, ctx, cpu); | 1275 | group_sched_in(event, cpuctx, ctx); |
1282 | 1276 | ||
1283 | /* | 1277 | /* |
1284 | * If this pinned group hasn't been scheduled, | 1278 | * If this pinned group hasn't been scheduled, |
@@ -1289,32 +1283,83 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1289 | event->state = PERF_EVENT_STATE_ERROR; | 1283 | event->state = PERF_EVENT_STATE_ERROR; |
1290 | } | 1284 | } |
1291 | } | 1285 | } |
1286 | } | ||
1292 | 1287 | ||
1293 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1288 | static void |
1294 | /* | 1289 | ctx_flexible_sched_in(struct perf_event_context *ctx, |
1295 | * Ignore events in OFF or ERROR state, and | 1290 | struct perf_cpu_context *cpuctx) |
1296 | * ignore pinned events since we did them already. | 1291 | { |
1297 | */ | 1292 | struct perf_event *event; |
1298 | if (event->state <= PERF_EVENT_STATE_OFF || | 1293 | int can_add_hw = 1; |
1299 | event->attr.pinned) | ||
1300 | continue; | ||
1301 | 1294 | ||
1295 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | ||
1296 | /* Ignore events in OFF or ERROR state */ | ||
1297 | if (event->state <= PERF_EVENT_STATE_OFF) | ||
1298 | continue; | ||
1302 | /* | 1299 | /* |
1303 | * Listen to the 'cpu' scheduling filter constraint | 1300 | * Listen to the 'cpu' scheduling filter constraint |
1304 | * of events: | 1301 | * of events: |
1305 | */ | 1302 | */ |
1306 | if (event->cpu != -1 && event->cpu != cpu) | 1303 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1307 | continue; | 1304 | continue; |
1308 | 1305 | ||
1309 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1306 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
1310 | if (group_sched_in(event, cpuctx, ctx, cpu)) | 1307 | if (group_sched_in(event, cpuctx, ctx)) |
1311 | can_add_hw = 0; | 1308 | can_add_hw = 0; |
1312 | } | 1309 | } |
1310 | } | ||
1311 | |||
1312 | static void | ||
1313 | ctx_sched_in(struct perf_event_context *ctx, | ||
1314 | struct perf_cpu_context *cpuctx, | ||
1315 | enum event_type_t event_type) | ||
1316 | { | ||
1317 | raw_spin_lock(&ctx->lock); | ||
1318 | ctx->is_active = 1; | ||
1319 | if (likely(!ctx->nr_events)) | ||
1320 | goto out; | ||
1321 | |||
1322 | ctx->timestamp = perf_clock(); | ||
1323 | |||
1324 | perf_disable(); | ||
1325 | |||
1326 | /* | ||
1327 | * First go through the list and put on any pinned groups | ||
1328 | * in order to give them the best chance of going on. | ||
1329 | */ | ||
1330 | if (event_type & EVENT_PINNED) | ||
1331 | ctx_pinned_sched_in(ctx, cpuctx); | ||
1332 | |||
1333 | /* Then walk through the lower prio flexible groups */ | ||
1334 | if (event_type & EVENT_FLEXIBLE) | ||
1335 | ctx_flexible_sched_in(ctx, cpuctx); | ||
1336 | |||
1313 | perf_enable(); | 1337 | perf_enable(); |
1314 | out: | 1338 | out: |
1315 | raw_spin_unlock(&ctx->lock); | 1339 | raw_spin_unlock(&ctx->lock); |
1316 | } | 1340 | } |
1317 | 1341 | ||
1342 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
1343 | enum event_type_t event_type) | ||
1344 | { | ||
1345 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
1346 | |||
1347 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1348 | } | ||
1349 | |||
1350 | static void task_ctx_sched_in(struct task_struct *task, | ||
1351 | enum event_type_t event_type) | ||
1352 | { | ||
1353 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1354 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1355 | |||
1356 | if (likely(!ctx)) | ||
1357 | return; | ||
1358 | if (cpuctx->task_ctx == ctx) | ||
1359 | return; | ||
1360 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1361 | cpuctx->task_ctx = ctx; | ||
1362 | } | ||
1318 | /* | 1363 | /* |
1319 | * Called from scheduler to add the events of the current task | 1364 | * Called from scheduler to add the events of the current task |
1320 | * with interrupts disabled. | 1365 | * with interrupts disabled. |
@@ -1326,38 +1371,128 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1326 | * accessing the event control register. If a NMI hits, then it will | 1371 | * accessing the event control register. If a NMI hits, then it will |
1327 | * keep the event running. | 1372 | * keep the event running. |
1328 | */ | 1373 | */ |
1329 | void perf_event_task_sched_in(struct task_struct *task, int cpu) | 1374 | void perf_event_task_sched_in(struct task_struct *task) |
1330 | { | 1375 | { |
1331 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1376 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1332 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1377 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1333 | 1378 | ||
1334 | if (likely(!ctx)) | 1379 | if (likely(!ctx)) |
1335 | return; | 1380 | return; |
1381 | |||
1336 | if (cpuctx->task_ctx == ctx) | 1382 | if (cpuctx->task_ctx == ctx) |
1337 | return; | 1383 | return; |
1338 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1384 | |
1385 | /* | ||
1386 | * We want to keep the following priority order: | ||
1387 | * cpu pinned (that don't need to move), task pinned, | ||
1388 | * cpu flexible, task flexible. | ||
1389 | */ | ||
1390 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | ||
1391 | |||
1392 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED); | ||
1393 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | ||
1394 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | ||
1395 | |||
1339 | cpuctx->task_ctx = ctx; | 1396 | cpuctx->task_ctx = ctx; |
1340 | } | 1397 | } |
1341 | 1398 | ||
1342 | static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | 1399 | #define MAX_INTERRUPTS (~0ULL) |
1400 | |||
1401 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
1402 | |||
1403 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | ||
1343 | { | 1404 | { |
1344 | struct perf_event_context *ctx = &cpuctx->ctx; | 1405 | u64 frequency = event->attr.sample_freq; |
1406 | u64 sec = NSEC_PER_SEC; | ||
1407 | u64 divisor, dividend; | ||
1408 | |||
1409 | int count_fls, nsec_fls, frequency_fls, sec_fls; | ||
1410 | |||
1411 | count_fls = fls64(count); | ||
1412 | nsec_fls = fls64(nsec); | ||
1413 | frequency_fls = fls64(frequency); | ||
1414 | sec_fls = 30; | ||
1415 | |||
1416 | /* | ||
1417 | * We got @count in @nsec, with a target of sample_freq HZ | ||
1418 | * the target period becomes: | ||
1419 | * | ||
1420 | * @count * 10^9 | ||
1421 | * period = ------------------- | ||
1422 | * @nsec * sample_freq | ||
1423 | * | ||
1424 | */ | ||
1425 | |||
1426 | /* | ||
1427 | * Reduce accuracy by one bit such that @a and @b converge | ||
1428 | * to a similar magnitude. | ||
1429 | */ | ||
1430 | #define REDUCE_FLS(a, b) \ | ||
1431 | do { \ | ||
1432 | if (a##_fls > b##_fls) { \ | ||
1433 | a >>= 1; \ | ||
1434 | a##_fls--; \ | ||
1435 | } else { \ | ||
1436 | b >>= 1; \ | ||
1437 | b##_fls--; \ | ||
1438 | } \ | ||
1439 | } while (0) | ||
1440 | |||
1441 | /* | ||
1442 | * Reduce accuracy until either term fits in a u64, then proceed with | ||
1443 | * the other, so that finally we can do a u64/u64 division. | ||
1444 | */ | ||
1445 | while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { | ||
1446 | REDUCE_FLS(nsec, frequency); | ||
1447 | REDUCE_FLS(sec, count); | ||
1448 | } | ||
1449 | |||
1450 | if (count_fls + sec_fls > 64) { | ||
1451 | divisor = nsec * frequency; | ||
1452 | |||
1453 | while (count_fls + sec_fls > 64) { | ||
1454 | REDUCE_FLS(count, sec); | ||
1455 | divisor >>= 1; | ||
1456 | } | ||
1345 | 1457 | ||
1346 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1458 | dividend = count * sec; |
1459 | } else { | ||
1460 | dividend = count * sec; | ||
1461 | |||
1462 | while (nsec_fls + frequency_fls > 64) { | ||
1463 | REDUCE_FLS(nsec, frequency); | ||
1464 | dividend >>= 1; | ||
1465 | } | ||
1466 | |||
1467 | divisor = nsec * frequency; | ||
1468 | } | ||
1469 | |||
1470 | return div64_u64(dividend, divisor); | ||
1347 | } | 1471 | } |
1348 | 1472 | ||
1349 | #define MAX_INTERRUPTS (~0ULL) | 1473 | static void perf_event_stop(struct perf_event *event) |
1474 | { | ||
1475 | if (!event->pmu->stop) | ||
1476 | return event->pmu->disable(event); | ||
1350 | 1477 | ||
1351 | static void perf_log_throttle(struct perf_event *event, int enable); | 1478 | return event->pmu->stop(event); |
1479 | } | ||
1480 | |||
1481 | static int perf_event_start(struct perf_event *event) | ||
1482 | { | ||
1483 | if (!event->pmu->start) | ||
1484 | return event->pmu->enable(event); | ||
1352 | 1485 | ||
1353 | static void perf_adjust_period(struct perf_event *event, u64 events) | 1486 | return event->pmu->start(event); |
1487 | } | ||
1488 | |||
1489 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | ||
1354 | { | 1490 | { |
1355 | struct hw_perf_event *hwc = &event->hw; | 1491 | struct hw_perf_event *hwc = &event->hw; |
1356 | u64 period, sample_period; | 1492 | u64 period, sample_period; |
1357 | s64 delta; | 1493 | s64 delta; |
1358 | 1494 | ||
1359 | events *= hwc->sample_period; | 1495 | period = perf_calculate_period(event, nsec, count); |
1360 | period = div64_u64(events, event->attr.sample_freq); | ||
1361 | 1496 | ||
1362 | delta = (s64)(period - hwc->sample_period); | 1497 | delta = (s64)(period - hwc->sample_period); |
1363 | delta = (delta + 7) / 8; /* low pass filter */ | 1498 | delta = (delta + 7) / 8; /* low pass filter */ |
@@ -1368,13 +1503,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) | |||
1368 | sample_period = 1; | 1503 | sample_period = 1; |
1369 | 1504 | ||
1370 | hwc->sample_period = sample_period; | 1505 | hwc->sample_period = sample_period; |
1506 | |||
1507 | if (atomic64_read(&hwc->period_left) > 8*sample_period) { | ||
1508 | perf_disable(); | ||
1509 | perf_event_stop(event); | ||
1510 | atomic64_set(&hwc->period_left, 0); | ||
1511 | perf_event_start(event); | ||
1512 | perf_enable(); | ||
1513 | } | ||
1371 | } | 1514 | } |
1372 | 1515 | ||
1373 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1516 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
1374 | { | 1517 | { |
1375 | struct perf_event *event; | 1518 | struct perf_event *event; |
1376 | struct hw_perf_event *hwc; | 1519 | struct hw_perf_event *hwc; |
1377 | u64 interrupts, freq; | 1520 | u64 interrupts, now; |
1521 | s64 delta; | ||
1378 | 1522 | ||
1379 | raw_spin_lock(&ctx->lock); | 1523 | raw_spin_lock(&ctx->lock); |
1380 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 1524 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
@@ -1395,44 +1539,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1395 | if (interrupts == MAX_INTERRUPTS) { | 1539 | if (interrupts == MAX_INTERRUPTS) { |
1396 | perf_log_throttle(event, 1); | 1540 | perf_log_throttle(event, 1); |
1397 | event->pmu->unthrottle(event); | 1541 | event->pmu->unthrottle(event); |
1398 | interrupts = 2*sysctl_perf_event_sample_rate/HZ; | ||
1399 | } | 1542 | } |
1400 | 1543 | ||
1401 | if (!event->attr.freq || !event->attr.sample_freq) | 1544 | if (!event->attr.freq || !event->attr.sample_freq) |
1402 | continue; | 1545 | continue; |
1403 | 1546 | ||
1404 | /* | 1547 | event->pmu->read(event); |
1405 | * if the specified freq < HZ then we need to skip ticks | 1548 | now = atomic64_read(&event->count); |
1406 | */ | 1549 | delta = now - hwc->freq_count_stamp; |
1407 | if (event->attr.sample_freq < HZ) { | 1550 | hwc->freq_count_stamp = now; |
1408 | freq = event->attr.sample_freq; | ||
1409 | |||
1410 | hwc->freq_count += freq; | ||
1411 | hwc->freq_interrupts += interrupts; | ||
1412 | |||
1413 | if (hwc->freq_count < HZ) | ||
1414 | continue; | ||
1415 | |||
1416 | interrupts = hwc->freq_interrupts; | ||
1417 | hwc->freq_interrupts = 0; | ||
1418 | hwc->freq_count -= HZ; | ||
1419 | } else | ||
1420 | freq = HZ; | ||
1421 | |||
1422 | perf_adjust_period(event, freq * interrupts); | ||
1423 | 1551 | ||
1424 | /* | 1552 | if (delta > 0) |
1425 | * In order to avoid being stalled by an (accidental) huge | 1553 | perf_adjust_period(event, TICK_NSEC, delta); |
1426 | * sample period, force reset the sample period if we didn't | ||
1427 | * get any events in this freq period. | ||
1428 | */ | ||
1429 | if (!interrupts) { | ||
1430 | perf_disable(); | ||
1431 | event->pmu->disable(event); | ||
1432 | atomic64_set(&hwc->period_left, 0); | ||
1433 | event->pmu->enable(event); | ||
1434 | perf_enable(); | ||
1435 | } | ||
1436 | } | 1554 | } |
1437 | raw_spin_unlock(&ctx->lock); | 1555 | raw_spin_unlock(&ctx->lock); |
1438 | } | 1556 | } |
@@ -1442,26 +1560,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1442 | */ | 1560 | */ |
1443 | static void rotate_ctx(struct perf_event_context *ctx) | 1561 | static void rotate_ctx(struct perf_event_context *ctx) |
1444 | { | 1562 | { |
1445 | struct perf_event *event; | ||
1446 | |||
1447 | if (!ctx->nr_events) | 1563 | if (!ctx->nr_events) |
1448 | return; | 1564 | return; |
1449 | 1565 | ||
1450 | raw_spin_lock(&ctx->lock); | 1566 | raw_spin_lock(&ctx->lock); |
1451 | /* | 1567 | |
1452 | * Rotate the first entry last (works just fine for group events too): | 1568 | /* Rotate the first entry last of non-pinned groups */ |
1453 | */ | 1569 | list_rotate_left(&ctx->flexible_groups); |
1454 | perf_disable(); | ||
1455 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1456 | list_move_tail(&event->group_entry, &ctx->group_list); | ||
1457 | break; | ||
1458 | } | ||
1459 | perf_enable(); | ||
1460 | 1570 | ||
1461 | raw_spin_unlock(&ctx->lock); | 1571 | raw_spin_unlock(&ctx->lock); |
1462 | } | 1572 | } |
1463 | 1573 | ||
1464 | void perf_event_task_tick(struct task_struct *curr, int cpu) | 1574 | void perf_event_task_tick(struct task_struct *curr) |
1465 | { | 1575 | { |
1466 | struct perf_cpu_context *cpuctx; | 1576 | struct perf_cpu_context *cpuctx; |
1467 | struct perf_event_context *ctx; | 1577 | struct perf_event_context *ctx; |
@@ -1469,24 +1579,43 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) | |||
1469 | if (!atomic_read(&nr_events)) | 1579 | if (!atomic_read(&nr_events)) |
1470 | return; | 1580 | return; |
1471 | 1581 | ||
1472 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1582 | cpuctx = &__get_cpu_var(perf_cpu_context); |
1473 | ctx = curr->perf_event_ctxp; | 1583 | ctx = curr->perf_event_ctxp; |
1474 | 1584 | ||
1585 | perf_disable(); | ||
1586 | |||
1475 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1587 | perf_ctx_adjust_freq(&cpuctx->ctx); |
1476 | if (ctx) | 1588 | if (ctx) |
1477 | perf_ctx_adjust_freq(ctx); | 1589 | perf_ctx_adjust_freq(ctx); |
1478 | 1590 | ||
1479 | perf_event_cpu_sched_out(cpuctx); | 1591 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
1480 | if (ctx) | 1592 | if (ctx) |
1481 | __perf_event_task_sched_out(ctx); | 1593 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); |
1482 | 1594 | ||
1483 | rotate_ctx(&cpuctx->ctx); | 1595 | rotate_ctx(&cpuctx->ctx); |
1484 | if (ctx) | 1596 | if (ctx) |
1485 | rotate_ctx(ctx); | 1597 | rotate_ctx(ctx); |
1486 | 1598 | ||
1487 | perf_event_cpu_sched_in(cpuctx, cpu); | 1599 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
1488 | if (ctx) | 1600 | if (ctx) |
1489 | perf_event_task_sched_in(curr, cpu); | 1601 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); |
1602 | |||
1603 | perf_enable(); | ||
1604 | } | ||
1605 | |||
1606 | static int event_enable_on_exec(struct perf_event *event, | ||
1607 | struct perf_event_context *ctx) | ||
1608 | { | ||
1609 | if (!event->attr.enable_on_exec) | ||
1610 | return 0; | ||
1611 | |||
1612 | event->attr.enable_on_exec = 0; | ||
1613 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | ||
1614 | return 0; | ||
1615 | |||
1616 | __perf_event_mark_enabled(event, ctx); | ||
1617 | |||
1618 | return 1; | ||
1490 | } | 1619 | } |
1491 | 1620 | ||
1492 | /* | 1621 | /* |
@@ -1499,6 +1628,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1499 | struct perf_event *event; | 1628 | struct perf_event *event; |
1500 | unsigned long flags; | 1629 | unsigned long flags; |
1501 | int enabled = 0; | 1630 | int enabled = 0; |
1631 | int ret; | ||
1502 | 1632 | ||
1503 | local_irq_save(flags); | 1633 | local_irq_save(flags); |
1504 | ctx = task->perf_event_ctxp; | 1634 | ctx = task->perf_event_ctxp; |
@@ -1509,14 +1639,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1509 | 1639 | ||
1510 | raw_spin_lock(&ctx->lock); | 1640 | raw_spin_lock(&ctx->lock); |
1511 | 1641 | ||
1512 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1642 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1513 | if (!event->attr.enable_on_exec) | 1643 | ret = event_enable_on_exec(event, ctx); |
1514 | continue; | 1644 | if (ret) |
1515 | event->attr.enable_on_exec = 0; | 1645 | enabled = 1; |
1516 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1646 | } |
1517 | continue; | 1647 | |
1518 | __perf_event_mark_enabled(event, ctx); | 1648 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { |
1519 | enabled = 1; | 1649 | ret = event_enable_on_exec(event, ctx); |
1650 | if (ret) | ||
1651 | enabled = 1; | ||
1520 | } | 1652 | } |
1521 | 1653 | ||
1522 | /* | 1654 | /* |
@@ -1527,7 +1659,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1527 | 1659 | ||
1528 | raw_spin_unlock(&ctx->lock); | 1660 | raw_spin_unlock(&ctx->lock); |
1529 | 1661 | ||
1530 | perf_event_task_sched_in(task, smp_processor_id()); | 1662 | perf_event_task_sched_in(task); |
1531 | out: | 1663 | out: |
1532 | local_irq_restore(flags); | 1664 | local_irq_restore(flags); |
1533 | } | 1665 | } |
@@ -1590,7 +1722,8 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
1590 | { | 1722 | { |
1591 | raw_spin_lock_init(&ctx->lock); | 1723 | raw_spin_lock_init(&ctx->lock); |
1592 | mutex_init(&ctx->mutex); | 1724 | mutex_init(&ctx->mutex); |
1593 | INIT_LIST_HEAD(&ctx->group_list); | 1725 | INIT_LIST_HEAD(&ctx->pinned_groups); |
1726 | INIT_LIST_HEAD(&ctx->flexible_groups); | ||
1594 | INIT_LIST_HEAD(&ctx->event_list); | 1727 | INIT_LIST_HEAD(&ctx->event_list); |
1595 | atomic_set(&ctx->refcount, 1); | 1728 | atomic_set(&ctx->refcount, 1); |
1596 | ctx->task = task; | 1729 | ctx->task = task; |
@@ -3608,7 +3741,7 @@ void __perf_event_mmap(struct vm_area_struct *vma) | |||
3608 | /* .tid */ | 3741 | /* .tid */ |
3609 | .start = vma->vm_start, | 3742 | .start = vma->vm_start, |
3610 | .len = vma->vm_end - vma->vm_start, | 3743 | .len = vma->vm_end - vma->vm_start, |
3611 | .pgoff = vma->vm_pgoff, | 3744 | .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT, |
3612 | }, | 3745 | }, |
3613 | }; | 3746 | }; |
3614 | 3747 | ||
@@ -3688,12 +3821,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3688 | 3821 | ||
3689 | if (event->attr.freq) { | 3822 | if (event->attr.freq) { |
3690 | u64 now = perf_clock(); | 3823 | u64 now = perf_clock(); |
3691 | s64 delta = now - hwc->freq_stamp; | 3824 | s64 delta = now - hwc->freq_time_stamp; |
3692 | 3825 | ||
3693 | hwc->freq_stamp = now; | 3826 | hwc->freq_time_stamp = now; |
3694 | 3827 | ||
3695 | if (delta > 0 && delta < TICK_NSEC) | 3828 | if (delta > 0 && delta < 2*TICK_NSEC) |
3696 | perf_adjust_period(event, NSEC_PER_SEC / (int)delta); | 3829 | perf_adjust_period(event, delta, hwc->last_period); |
3697 | } | 3830 | } |
3698 | 3831 | ||
3699 | /* | 3832 | /* |
@@ -4184,7 +4317,7 @@ static const struct pmu perf_ops_task_clock = { | |||
4184 | .read = task_clock_perf_event_read, | 4317 | .read = task_clock_perf_event_read, |
4185 | }; | 4318 | }; |
4186 | 4319 | ||
4187 | #ifdef CONFIG_EVENT_PROFILE | 4320 | #ifdef CONFIG_EVENT_TRACING |
4188 | 4321 | ||
4189 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4322 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
4190 | int entry_size) | 4323 | int entry_size) |
@@ -4289,7 +4422,7 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4289 | { | 4422 | { |
4290 | } | 4423 | } |
4291 | 4424 | ||
4292 | #endif /* CONFIG_EVENT_PROFILE */ | 4425 | #endif /* CONFIG_EVENT_TRACING */ |
4293 | 4426 | ||
4294 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4427 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
4295 | static void bp_perf_event_destroy(struct perf_event *event) | 4428 | static void bp_perf_event_destroy(struct perf_event *event) |
@@ -4870,8 +5003,15 @@ inherit_event(struct perf_event *parent_event, | |||
4870 | else | 5003 | else |
4871 | child_event->state = PERF_EVENT_STATE_OFF; | 5004 | child_event->state = PERF_EVENT_STATE_OFF; |
4872 | 5005 | ||
4873 | if (parent_event->attr.freq) | 5006 | if (parent_event->attr.freq) { |
4874 | child_event->hw.sample_period = parent_event->hw.sample_period; | 5007 | u64 sample_period = parent_event->hw.sample_period; |
5008 | struct hw_perf_event *hwc = &child_event->hw; | ||
5009 | |||
5010 | hwc->sample_period = sample_period; | ||
5011 | hwc->last_period = sample_period; | ||
5012 | |||
5013 | atomic64_set(&hwc->period_left, sample_period); | ||
5014 | } | ||
4875 | 5015 | ||
4876 | child_event->overflow_handler = parent_event->overflow_handler; | 5016 | child_event->overflow_handler = parent_event->overflow_handler; |
4877 | 5017 | ||
@@ -5039,7 +5179,11 @@ void perf_event_exit_task(struct task_struct *child) | |||
5039 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5179 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); |
5040 | 5180 | ||
5041 | again: | 5181 | again: |
5042 | list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, | 5182 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, |
5183 | group_entry) | ||
5184 | __perf_event_exit_task(child_event, child_ctx, child); | ||
5185 | |||
5186 | list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, | ||
5043 | group_entry) | 5187 | group_entry) |
5044 | __perf_event_exit_task(child_event, child_ctx, child); | 5188 | __perf_event_exit_task(child_event, child_ctx, child); |
5045 | 5189 | ||
@@ -5048,7 +5192,8 @@ again: | |||
5048 | * its siblings to the list, but we obtained 'tmp' before that which | 5192 | * its siblings to the list, but we obtained 'tmp' before that which |
5049 | * will still point to the list head terminating the iteration. | 5193 | * will still point to the list head terminating the iteration. |
5050 | */ | 5194 | */ |
5051 | if (!list_empty(&child_ctx->group_list)) | 5195 | if (!list_empty(&child_ctx->pinned_groups) || |
5196 | !list_empty(&child_ctx->flexible_groups)) | ||
5052 | goto again; | 5197 | goto again; |
5053 | 5198 | ||
5054 | mutex_unlock(&child_ctx->mutex); | 5199 | mutex_unlock(&child_ctx->mutex); |
@@ -5056,6 +5201,24 @@ again: | |||
5056 | put_ctx(child_ctx); | 5201 | put_ctx(child_ctx); |
5057 | } | 5202 | } |
5058 | 5203 | ||
5204 | static void perf_free_event(struct perf_event *event, | ||
5205 | struct perf_event_context *ctx) | ||
5206 | { | ||
5207 | struct perf_event *parent = event->parent; | ||
5208 | |||
5209 | if (WARN_ON_ONCE(!parent)) | ||
5210 | return; | ||
5211 | |||
5212 | mutex_lock(&parent->child_mutex); | ||
5213 | list_del_init(&event->child_list); | ||
5214 | mutex_unlock(&parent->child_mutex); | ||
5215 | |||
5216 | fput(parent->filp); | ||
5217 | |||
5218 | list_del_event(event, ctx); | ||
5219 | free_event(event); | ||
5220 | } | ||
5221 | |||
5059 | /* | 5222 | /* |
5060 | * free an unexposed, unused context as created by inheritance by | 5223 | * free an unexposed, unused context as created by inheritance by |
5061 | * init_task below, used by fork() in case of fail. | 5224 | * init_task below, used by fork() in case of fail. |
@@ -5070,36 +5233,70 @@ void perf_event_free_task(struct task_struct *task) | |||
5070 | 5233 | ||
5071 | mutex_lock(&ctx->mutex); | 5234 | mutex_lock(&ctx->mutex); |
5072 | again: | 5235 | again: |
5073 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { | 5236 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5074 | struct perf_event *parent = event->parent; | 5237 | perf_free_event(event, ctx); |
5075 | 5238 | ||
5076 | if (WARN_ON_ONCE(!parent)) | 5239 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5077 | continue; | 5240 | group_entry) |
5241 | perf_free_event(event, ctx); | ||
5242 | |||
5243 | if (!list_empty(&ctx->pinned_groups) || | ||
5244 | !list_empty(&ctx->flexible_groups)) | ||
5245 | goto again; | ||
5078 | 5246 | ||
5079 | mutex_lock(&parent->child_mutex); | 5247 | mutex_unlock(&ctx->mutex); |
5080 | list_del_init(&event->child_list); | ||
5081 | mutex_unlock(&parent->child_mutex); | ||
5082 | 5248 | ||
5083 | fput(parent->filp); | 5249 | put_ctx(ctx); |
5250 | } | ||
5084 | 5251 | ||
5085 | list_del_event(event, ctx); | 5252 | static int |
5086 | free_event(event); | 5253 | inherit_task_group(struct perf_event *event, struct task_struct *parent, |
5254 | struct perf_event_context *parent_ctx, | ||
5255 | struct task_struct *child, | ||
5256 | int *inherited_all) | ||
5257 | { | ||
5258 | int ret; | ||
5259 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | ||
5260 | |||
5261 | if (!event->attr.inherit) { | ||
5262 | *inherited_all = 0; | ||
5263 | return 0; | ||
5087 | } | 5264 | } |
5088 | 5265 | ||
5089 | if (!list_empty(&ctx->group_list)) | 5266 | if (!child_ctx) { |
5090 | goto again; | 5267 | /* |
5268 | * This is executed from the parent task context, so | ||
5269 | * inherit events that have been marked for cloning. | ||
5270 | * First allocate and initialize a context for the | ||
5271 | * child. | ||
5272 | */ | ||
5091 | 5273 | ||
5092 | mutex_unlock(&ctx->mutex); | 5274 | child_ctx = kzalloc(sizeof(struct perf_event_context), |
5275 | GFP_KERNEL); | ||
5276 | if (!child_ctx) | ||
5277 | return -ENOMEM; | ||
5093 | 5278 | ||
5094 | put_ctx(ctx); | 5279 | __perf_event_init_context(child_ctx, child); |
5280 | child->perf_event_ctxp = child_ctx; | ||
5281 | get_task_struct(child); | ||
5282 | } | ||
5283 | |||
5284 | ret = inherit_group(event, parent, parent_ctx, | ||
5285 | child, child_ctx); | ||
5286 | |||
5287 | if (ret) | ||
5288 | *inherited_all = 0; | ||
5289 | |||
5290 | return ret; | ||
5095 | } | 5291 | } |
5096 | 5292 | ||
5293 | |||
5097 | /* | 5294 | /* |
5098 | * Initialize the perf_event context in task_struct | 5295 | * Initialize the perf_event context in task_struct |
5099 | */ | 5296 | */ |
5100 | int perf_event_init_task(struct task_struct *child) | 5297 | int perf_event_init_task(struct task_struct *child) |
5101 | { | 5298 | { |
5102 | struct perf_event_context *child_ctx = NULL, *parent_ctx; | 5299 | struct perf_event_context *child_ctx, *parent_ctx; |
5103 | struct perf_event_context *cloned_ctx; | 5300 | struct perf_event_context *cloned_ctx; |
5104 | struct perf_event *event; | 5301 | struct perf_event *event; |
5105 | struct task_struct *parent = current; | 5302 | struct task_struct *parent = current; |
@@ -5137,41 +5334,22 @@ int perf_event_init_task(struct task_struct *child) | |||
5137 | * We dont have to disable NMIs - we are only looking at | 5334 | * We dont have to disable NMIs - we are only looking at |
5138 | * the list, not manipulating it: | 5335 | * the list, not manipulating it: |
5139 | */ | 5336 | */ |
5140 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { | 5337 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5141 | 5338 | ret = inherit_task_group(event, parent, parent_ctx, child, | |
5142 | if (!event->attr.inherit) { | 5339 | &inherited_all); |
5143 | inherited_all = 0; | 5340 | if (ret) |
5144 | continue; | 5341 | break; |
5145 | } | 5342 | } |
5146 | |||
5147 | if (!child->perf_event_ctxp) { | ||
5148 | /* | ||
5149 | * This is executed from the parent task context, so | ||
5150 | * inherit events that have been marked for cloning. | ||
5151 | * First allocate and initialize a context for the | ||
5152 | * child. | ||
5153 | */ | ||
5154 | |||
5155 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5156 | GFP_KERNEL); | ||
5157 | if (!child_ctx) { | ||
5158 | ret = -ENOMEM; | ||
5159 | break; | ||
5160 | } | ||
5161 | |||
5162 | __perf_event_init_context(child_ctx, child); | ||
5163 | child->perf_event_ctxp = child_ctx; | ||
5164 | get_task_struct(child); | ||
5165 | } | ||
5166 | 5343 | ||
5167 | ret = inherit_group(event, parent, parent_ctx, | 5344 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5168 | child, child_ctx); | 5345 | ret = inherit_task_group(event, parent, parent_ctx, child, |
5169 | if (ret) { | 5346 | &inherited_all); |
5170 | inherited_all = 0; | 5347 | if (ret) |
5171 | break; | 5348 | break; |
5172 | } | ||
5173 | } | 5349 | } |
5174 | 5350 | ||
5351 | child_ctx = child->perf_event_ctxp; | ||
5352 | |||
5175 | if (child_ctx && inherited_all) { | 5353 | if (child_ctx && inherited_all) { |
5176 | /* | 5354 | /* |
5177 | * Mark the child context as a clone of the parent | 5355 | * Mark the child context as a clone of the parent |
@@ -5220,7 +5398,9 @@ static void __perf_event_exit_cpu(void *info) | |||
5220 | struct perf_event_context *ctx = &cpuctx->ctx; | 5398 | struct perf_event_context *ctx = &cpuctx->ctx; |
5221 | struct perf_event *event, *tmp; | 5399 | struct perf_event *event, *tmp; |
5222 | 5400 | ||
5223 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) | 5401 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5402 | __perf_event_remove_from_context(event); | ||
5403 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
5224 | __perf_event_remove_from_context(event); | 5404 | __perf_event_remove_from_context(event); |
5225 | } | 5405 | } |
5226 | static void perf_event_exit_cpu(int cpu) | 5406 | static void perf_event_exit_cpu(int cpu) |
@@ -5258,6 +5438,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
5258 | perf_event_exit_cpu(cpu); | 5438 | perf_event_exit_cpu(cpu); |
5259 | break; | 5439 | break; |
5260 | 5440 | ||
5441 | case CPU_DEAD: | ||
5442 | hw_perf_event_setup_offline(cpu); | ||
5443 | break; | ||
5444 | |||
5261 | default: | 5445 | default: |
5262 | break; | 5446 | break; |
5263 | } | 5447 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index 3a8fb30a91b1..3e71ebb101c2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2794,7 +2794,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2794 | */ | 2794 | */ |
2795 | prev_state = prev->state; | 2795 | prev_state = prev->state; |
2796 | finish_arch_switch(prev); | 2796 | finish_arch_switch(prev); |
2797 | perf_event_task_sched_in(current, cpu_of(rq)); | 2797 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2798 | local_irq_disable(); | ||
2799 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
2800 | perf_event_task_sched_in(current); | ||
2801 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
2802 | local_irq_enable(); | ||
2803 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
2798 | finish_lock_switch(rq, prev); | 2804 | finish_lock_switch(rq, prev); |
2799 | 2805 | ||
2800 | fire_sched_in_preempt_notifiers(current); | 2806 | fire_sched_in_preempt_notifiers(current); |
@@ -5309,7 +5315,7 @@ void scheduler_tick(void) | |||
5309 | curr->sched_class->task_tick(rq, curr, 0); | 5315 | curr->sched_class->task_tick(rq, curr, 0); |
5310 | raw_spin_unlock(&rq->lock); | 5316 | raw_spin_unlock(&rq->lock); |
5311 | 5317 | ||
5312 | perf_event_task_tick(curr, cpu); | 5318 | perf_event_task_tick(curr); |
5313 | 5319 | ||
5314 | #ifdef CONFIG_SMP | 5320 | #ifdef CONFIG_SMP |
5315 | rq->idle_at_tick = idle_cpu(cpu); | 5321 | rq->idle_at_tick = idle_cpu(cpu); |
@@ -5523,7 +5529,7 @@ need_resched_nonpreemptible: | |||
5523 | 5529 | ||
5524 | if (likely(prev != next)) { | 5530 | if (likely(prev != next)) { |
5525 | sched_info_switch(prev, next); | 5531 | sched_info_switch(prev, next); |
5526 | perf_event_task_sched_out(prev, next, cpu); | 5532 | perf_event_task_sched_out(prev, next); |
5527 | 5533 | ||
5528 | rq->nr_switches++; | 5534 | rq->nr_switches++; |
5529 | rq->curr = next; | 5535 | rq->curr = next; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index a09502e2ef75..7c1a67ef0274 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill); | |||
500 | */ | 500 | */ |
501 | 501 | ||
502 | /* | 502 | /* |
503 | * The trampoline is called when the hrtimer expires. If this is | 503 | * The trampoline is called when the hrtimer expires. It schedules a tasklet |
504 | * called from the hrtimer interrupt then we schedule the tasklet as | 504 | * to run __tasklet_hrtimer_trampoline() which in turn will call the intended |
505 | * the timer callback function expects to run in softirq context. If | 505 | * hrtimer callback, but from softirq context. |
506 | * it's called in softirq context anyway (i.e. high resolution timers | ||
507 | * disabled) then the hrtimer callback is called right away. | ||
508 | */ | 506 | */ |
509 | static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) | 507 | static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) |
510 | { | 508 | { |
511 | struct tasklet_hrtimer *ttimer = | 509 | struct tasklet_hrtimer *ttimer = |
512 | container_of(timer, struct tasklet_hrtimer, timer); | 510 | container_of(timer, struct tasklet_hrtimer, timer); |
513 | 511 | ||
514 | if (hrtimer_is_hres_active(timer)) { | 512 | tasklet_hi_schedule(&ttimer->tasklet); |
515 | tasklet_hi_schedule(&ttimer->tasklet); | 513 | return HRTIMER_NORESTART; |
516 | return HRTIMER_NORESTART; | ||
517 | } | ||
518 | return ttimer->function(timer); | ||
519 | } | 514 | } |
520 | 515 | ||
521 | /* | 516 | /* |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d22579087e27..0d4c7898ab80 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock); | |||
25 | static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ | 25 | static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ |
26 | static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ | 26 | static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ |
27 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); | 27 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); |
28 | static DEFINE_PER_CPU(bool, softlock_touch_sync); | ||
28 | 29 | ||
29 | static int __read_mostly did_panic; | 30 | static int __read_mostly did_panic; |
30 | int __read_mostly softlockup_thresh = 60; | 31 | int __read_mostly softlockup_thresh = 60; |
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void) | |||
79 | } | 80 | } |
80 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 81 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
81 | 82 | ||
83 | void touch_softlockup_watchdog_sync(void) | ||
84 | { | ||
85 | __raw_get_cpu_var(softlock_touch_sync) = true; | ||
86 | __raw_get_cpu_var(softlockup_touch_ts) = 0; | ||
87 | } | ||
88 | |||
82 | void touch_all_softlockup_watchdogs(void) | 89 | void touch_all_softlockup_watchdogs(void) |
83 | { | 90 | { |
84 | int cpu; | 91 | int cpu; |
@@ -118,6 +125,14 @@ void softlockup_tick(void) | |||
118 | } | 125 | } |
119 | 126 | ||
120 | if (touch_ts == 0) { | 127 | if (touch_ts == 0) { |
128 | if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) { | ||
129 | /* | ||
130 | * If the time stamp was touched atomically | ||
131 | * make sure the scheduler tick is up to date. | ||
132 | */ | ||
133 | per_cpu(softlock_touch_sync, this_cpu) = false; | ||
134 | sched_clock_tick(); | ||
135 | } | ||
121 | __touch_softlockup_watchdog(); | 136 | __touch_softlockup_watchdog(); |
122 | return; | 137 | return; |
123 | } | 138 | } |
diff --git a/kernel/sys.c b/kernel/sys.c index 26a6b73a6b85..18bde979f346 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
222 | if (which > PRIO_USER || which < PRIO_PROCESS) | 222 | if (which > PRIO_USER || which < PRIO_PROCESS) |
223 | return -EINVAL; | 223 | return -EINVAL; |
224 | 224 | ||
225 | rcu_read_lock(); | ||
225 | read_lock(&tasklist_lock); | 226 | read_lock(&tasklist_lock); |
226 | switch (which) { | 227 | switch (which) { |
227 | case PRIO_PROCESS: | 228 | case PRIO_PROCESS: |
@@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
267 | } | 268 | } |
268 | out_unlock: | 269 | out_unlock: |
269 | read_unlock(&tasklist_lock); | 270 | read_unlock(&tasklist_lock); |
271 | rcu_read_unlock(); | ||
270 | 272 | ||
271 | return retval; | 273 | return retval; |
272 | } | 274 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7faaa32fbf4f..e2ab064c6d41 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -880,6 +880,7 @@ void getboottime(struct timespec *ts) | |||
880 | 880 | ||
881 | set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); | 881 | set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); |
882 | } | 882 | } |
883 | EXPORT_SYMBOL_GPL(getboottime); | ||
883 | 884 | ||
884 | /** | 885 | /** |
885 | * monotonic_to_bootbased - Convert the monotonic time to boot based. | 886 | * monotonic_to_bootbased - Convert the monotonic time to boot based. |
@@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts) | |||
889 | { | 890 | { |
890 | *ts = timespec_add_safe(*ts, total_sleep_time); | 891 | *ts = timespec_add_safe(*ts, total_sleep_time); |
891 | } | 892 | } |
893 | EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | ||
892 | 894 | ||
893 | unsigned long get_seconds(void) | 895 | unsigned long get_seconds(void) |
894 | { | 896 | { |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cd9ecd89ec77..d00c6fe23f54 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -51,7 +51,9 @@ endif | |||
51 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o | 51 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o |
52 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o | 52 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o |
53 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 53 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 54 | ifeq ($(CONFIG_PERF_EVENTS),y) |
55 | obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o | ||
56 | endif | ||
55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 57 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 58 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
57 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o | 59 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1e6640f80454..1904797f4a8a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
24 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
25 | #include <linux/kprobes.h> | ||
26 | #include <linux/ftrace.h> | 25 | #include <linux/ftrace.h> |
27 | #include <linux/sysctl.h> | 26 | #include <linux/sysctl.h> |
28 | #include <linux/ctype.h> | 27 | #include <linux/ctype.h> |
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records; | |||
898 | } \ | 897 | } \ |
899 | } | 898 | } |
900 | 899 | ||
901 | #ifdef CONFIG_KPROBES | ||
902 | |||
903 | static int frozen_record_count; | ||
904 | |||
905 | static inline void freeze_record(struct dyn_ftrace *rec) | ||
906 | { | ||
907 | if (!(rec->flags & FTRACE_FL_FROZEN)) { | ||
908 | rec->flags |= FTRACE_FL_FROZEN; | ||
909 | frozen_record_count++; | ||
910 | } | ||
911 | } | ||
912 | |||
913 | static inline void unfreeze_record(struct dyn_ftrace *rec) | ||
914 | { | ||
915 | if (rec->flags & FTRACE_FL_FROZEN) { | ||
916 | rec->flags &= ~FTRACE_FL_FROZEN; | ||
917 | frozen_record_count--; | ||
918 | } | ||
919 | } | ||
920 | |||
921 | static inline int record_frozen(struct dyn_ftrace *rec) | ||
922 | { | ||
923 | return rec->flags & FTRACE_FL_FROZEN; | ||
924 | } | ||
925 | #else | ||
926 | # define freeze_record(rec) ({ 0; }) | ||
927 | # define unfreeze_record(rec) ({ 0; }) | ||
928 | # define record_frozen(rec) ({ 0; }) | ||
929 | #endif /* CONFIG_KPROBES */ | ||
930 | |||
931 | static void ftrace_free_rec(struct dyn_ftrace *rec) | 900 | static void ftrace_free_rec(struct dyn_ftrace *rec) |
932 | { | 901 | { |
933 | rec->freelist = ftrace_free_records; | 902 | rec->freelist = ftrace_free_records; |
@@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip) | |||
1025 | } | 994 | } |
1026 | 995 | ||
1027 | 996 | ||
997 | /* Return 1 if the address range is reserved for ftrace */ | ||
998 | int ftrace_text_reserved(void *start, void *end) | ||
999 | { | ||
1000 | struct dyn_ftrace *rec; | ||
1001 | struct ftrace_page *pg; | ||
1002 | |||
1003 | do_for_each_ftrace_rec(pg, rec) { | ||
1004 | if (rec->ip <= (unsigned long)end && | ||
1005 | rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start) | ||
1006 | return 1; | ||
1007 | } while_for_each_ftrace_rec(); | ||
1008 | return 0; | ||
1009 | } | ||
1010 | |||
1011 | |||
1028 | static int | 1012 | static int |
1029 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | 1013 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) |
1030 | { | 1014 | { |
@@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable) | |||
1076 | !(rec->flags & FTRACE_FL_CONVERTED)) | 1060 | !(rec->flags & FTRACE_FL_CONVERTED)) |
1077 | continue; | 1061 | continue; |
1078 | 1062 | ||
1079 | /* ignore updates to this record's mcount site */ | ||
1080 | if (get_kprobe((void *)rec->ip)) { | ||
1081 | freeze_record(rec); | ||
1082 | continue; | ||
1083 | } else { | ||
1084 | unfreeze_record(rec); | ||
1085 | } | ||
1086 | |||
1087 | failed = __ftrace_replace_code(rec, enable); | 1063 | failed = __ftrace_replace_code(rec, enable); |
1088 | if (failed) { | 1064 | if (failed) { |
1089 | rec->flags |= FTRACE_FL_FAILED; | 1065 | rec->flags |= FTRACE_FL_FAILED; |
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 9e25573242cf..f0d693005075 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -6,14 +6,12 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/kprobes.h> | ||
9 | #include "trace.h" | 10 | #include "trace.h" |
10 | 11 | ||
11 | 12 | ||
12 | char *perf_trace_buf; | 13 | static char *perf_trace_buf; |
13 | EXPORT_SYMBOL_GPL(perf_trace_buf); | 14 | static char *perf_trace_buf_nmi; |
14 | |||
15 | char *perf_trace_buf_nmi; | ||
16 | EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); | ||
17 | 15 | ||
18 | typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; | 16 | typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; |
19 | 17 | ||
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id) | |||
120 | } | 118 | } |
121 | mutex_unlock(&event_mutex); | 119 | mutex_unlock(&event_mutex); |
122 | } | 120 | } |
121 | |||
122 | __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, | ||
123 | int *rctxp, unsigned long *irq_flags) | ||
124 | { | ||
125 | struct trace_entry *entry; | ||
126 | char *trace_buf, *raw_data; | ||
127 | int pc, cpu; | ||
128 | |||
129 | pc = preempt_count(); | ||
130 | |||
131 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
132 | local_irq_save(*irq_flags); | ||
133 | |||
134 | *rctxp = perf_swevent_get_recursion_context(); | ||
135 | if (*rctxp < 0) | ||
136 | goto err_recursion; | ||
137 | |||
138 | cpu = smp_processor_id(); | ||
139 | |||
140 | if (in_nmi()) | ||
141 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
142 | else | ||
143 | trace_buf = rcu_dereference(perf_trace_buf); | ||
144 | |||
145 | if (!trace_buf) | ||
146 | goto err; | ||
147 | |||
148 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
149 | |||
150 | /* zero the dead bytes from align to not leak stack to user */ | ||
151 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
152 | |||
153 | entry = (struct trace_entry *)raw_data; | ||
154 | tracing_generic_entry_update(entry, *irq_flags, pc); | ||
155 | entry->type = type; | ||
156 | |||
157 | return raw_data; | ||
158 | err: | ||
159 | perf_swevent_put_recursion_context(*rctxp); | ||
160 | err_recursion: | ||
161 | local_irq_restore(*irq_flags); | ||
162 | return NULL; | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); | ||
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e42af9aad69f..4615f62a04f1 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -1371,7 +1371,7 @@ out_unlock: | |||
1371 | return err; | 1371 | return err; |
1372 | } | 1372 | } |
1373 | 1373 | ||
1374 | #ifdef CONFIG_EVENT_PROFILE | 1374 | #ifdef CONFIG_PERF_EVENTS |
1375 | 1375 | ||
1376 | void ftrace_profile_free_filter(struct perf_event *event) | 1376 | void ftrace_profile_free_filter(struct perf_event *event) |
1377 | { | 1377 | { |
@@ -1439,5 +1439,5 @@ out_unlock: | |||
1439 | return err; | 1439 | return err; |
1440 | } | 1440 | } |
1441 | 1441 | ||
1442 | #endif /* CONFIG_EVENT_PROFILE */ | 1442 | #endif /* CONFIG_PERF_EVENTS */ |
1443 | 1443 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c96..356c10227c98 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) | |||
91 | return retval; | 91 | return retval; |
92 | } | 92 | } |
93 | 93 | ||
94 | static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) | ||
95 | { | ||
96 | return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); | ||
97 | } | ||
98 | |||
99 | static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, | 94 | static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, |
100 | void *dummy) | 95 | void *dummy) |
101 | { | 96 | { |
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) | |||
231 | { | 226 | { |
232 | int ret = -EINVAL; | 227 | int ret = -EINVAL; |
233 | 228 | ||
234 | if (ff->func == fetch_argument) | 229 | if (ff->func == fetch_register) { |
235 | ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); | ||
236 | else if (ff->func == fetch_register) { | ||
237 | const char *name; | 230 | const char *name; |
238 | name = regs_query_register_name((unsigned int)((long)ff->data)); | 231 | name = regs_query_register_name((unsigned int)((long)ff->data)); |
239 | ret = snprintf(buf, n, "%%%s", name); | 232 | ret = snprintf(buf, n, "%%%s", name); |
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) | |||
489 | } | 482 | } |
490 | } else | 483 | } else |
491 | ret = -EINVAL; | 484 | ret = -EINVAL; |
492 | } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { | ||
493 | ret = strict_strtoul(arg + 3, 10, ¶m); | ||
494 | if (ret || param > PARAM_MAX_ARGS) | ||
495 | ret = -EINVAL; | ||
496 | else { | ||
497 | ff->func = fetch_argument; | ||
498 | ff->data = (void *)param; | ||
499 | } | ||
500 | } else | 485 | } else |
501 | ret = -EINVAL; | 486 | ret = -EINVAL; |
502 | return ret; | 487 | return ret; |
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv) | |||
611 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] | 596 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] |
612 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] | 597 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] |
613 | * Fetch args: | 598 | * Fetch args: |
614 | * $argN : fetch Nth of function argument. (N:0-) | ||
615 | * $retval : fetch return value | 599 | * $retval : fetch return value |
616 | * $stack : fetch stack address | 600 | * $stack : fetch stack address |
617 | * $stackN : fetch Nth of stack (N:0-) | 601 | * $stackN : fetch Nth of stack (N:0-) |
@@ -689,7 +673,7 @@ static int create_trace_probe(int argc, char **argv) | |||
689 | return -EINVAL; | 673 | return -EINVAL; |
690 | } | 674 | } |
691 | /* an address specified */ | 675 | /* an address specified */ |
692 | ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); | 676 | ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); |
693 | if (ret) { | 677 | if (ret) { |
694 | pr_info("Failed to parse address.\n"); | 678 | pr_info("Failed to parse address.\n"); |
695 | return ret; | 679 | return ret; |
@@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = { | |||
958 | }; | 942 | }; |
959 | 943 | ||
960 | /* Kprobe handler */ | 944 | /* Kprobe handler */ |
961 | static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | 945 | static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) |
962 | { | 946 | { |
963 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 947 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
964 | struct kprobe_trace_entry *entry; | 948 | struct kprobe_trace_entry *entry; |
@@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | |||
978 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, | 962 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, |
979 | irq_flags, pc); | 963 | irq_flags, pc); |
980 | if (!event) | 964 | if (!event) |
981 | return 0; | 965 | return; |
982 | 966 | ||
983 | entry = ring_buffer_event_data(event); | 967 | entry = ring_buffer_event_data(event); |
984 | entry->nargs = tp->nr_args; | 968 | entry->nargs = tp->nr_args; |
@@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | |||
988 | 972 | ||
989 | if (!filter_current_check_discard(buffer, call, entry, event)) | 973 | if (!filter_current_check_discard(buffer, call, entry, event)) |
990 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 974 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); |
991 | return 0; | ||
992 | } | 975 | } |
993 | 976 | ||
994 | /* Kretprobe handler */ | 977 | /* Kretprobe handler */ |
995 | static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, | 978 | static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, |
996 | struct pt_regs *regs) | 979 | struct pt_regs *regs) |
997 | { | 980 | { |
998 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 981 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
@@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, | |||
1011 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, | 994 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, |
1012 | irq_flags, pc); | 995 | irq_flags, pc); |
1013 | if (!event) | 996 | if (!event) |
1014 | return 0; | 997 | return; |
1015 | 998 | ||
1016 | entry = ring_buffer_event_data(event); | 999 | entry = ring_buffer_event_data(event); |
1017 | entry->nargs = tp->nr_args; | 1000 | entry->nargs = tp->nr_args; |
@@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, | |||
1022 | 1005 | ||
1023 | if (!filter_current_check_discard(buffer, call, entry, event)) | 1006 | if (!filter_current_check_discard(buffer, call, entry, event)) |
1024 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 1007 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); |
1025 | |||
1026 | return 0; | ||
1027 | } | 1008 | } |
1028 | 1009 | ||
1029 | /* Event entry printers */ | 1010 | /* Event entry printers */ |
@@ -1250,137 +1231,67 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, | |||
1250 | ", REC->" FIELD_STRING_RETIP); | 1231 | ", REC->" FIELD_STRING_RETIP); |
1251 | } | 1232 | } |
1252 | 1233 | ||
1253 | #ifdef CONFIG_EVENT_PROFILE | 1234 | #ifdef CONFIG_PERF_EVENTS |
1254 | 1235 | ||
1255 | /* Kprobe profile handler */ | 1236 | /* Kprobe profile handler */ |
1256 | static __kprobes int kprobe_profile_func(struct kprobe *kp, | 1237 | static __kprobes void kprobe_profile_func(struct kprobe *kp, |
1257 | struct pt_regs *regs) | 1238 | struct pt_regs *regs) |
1258 | { | 1239 | { |
1259 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 1240 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
1260 | struct ftrace_event_call *call = &tp->call; | 1241 | struct ftrace_event_call *call = &tp->call; |
1261 | struct kprobe_trace_entry *entry; | 1242 | struct kprobe_trace_entry *entry; |
1262 | struct trace_entry *ent; | 1243 | int size, __size, i; |
1263 | int size, __size, i, pc, __cpu; | ||
1264 | unsigned long irq_flags; | 1244 | unsigned long irq_flags; |
1265 | char *trace_buf; | ||
1266 | char *raw_data; | ||
1267 | int rctx; | 1245 | int rctx; |
1268 | 1246 | ||
1269 | pc = preempt_count(); | ||
1270 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); | 1247 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); |
1271 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1248 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1272 | size -= sizeof(u32); | 1249 | size -= sizeof(u32); |
1273 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | 1250 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
1274 | "profile buffer not large enough")) | 1251 | "profile buffer not large enough")) |
1275 | return 0; | 1252 | return; |
1276 | |||
1277 | /* | ||
1278 | * Protect the non nmi buffer | ||
1279 | * This also protects the rcu read side | ||
1280 | */ | ||
1281 | local_irq_save(irq_flags); | ||
1282 | 1253 | ||
1283 | rctx = perf_swevent_get_recursion_context(); | 1254 | entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); |
1284 | if (rctx < 0) | 1255 | if (!entry) |
1285 | goto end_recursion; | 1256 | return; |
1286 | |||
1287 | __cpu = smp_processor_id(); | ||
1288 | |||
1289 | if (in_nmi()) | ||
1290 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1291 | else | ||
1292 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1293 | |||
1294 | if (!trace_buf) | ||
1295 | goto end; | ||
1296 | |||
1297 | raw_data = per_cpu_ptr(trace_buf, __cpu); | ||
1298 | |||
1299 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | ||
1300 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
1301 | entry = (struct kprobe_trace_entry *)raw_data; | ||
1302 | ent = &entry->ent; | ||
1303 | 1257 | ||
1304 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1305 | ent->type = call->id; | ||
1306 | entry->nargs = tp->nr_args; | 1258 | entry->nargs = tp->nr_args; |
1307 | entry->ip = (unsigned long)kp->addr; | 1259 | entry->ip = (unsigned long)kp->addr; |
1308 | for (i = 0; i < tp->nr_args; i++) | 1260 | for (i = 0; i < tp->nr_args; i++) |
1309 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1261 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1310 | perf_tp_event(call->id, entry->ip, 1, entry, size); | ||
1311 | 1262 | ||
1312 | end: | 1263 | ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); |
1313 | perf_swevent_put_recursion_context(rctx); | ||
1314 | end_recursion: | ||
1315 | local_irq_restore(irq_flags); | ||
1316 | |||
1317 | return 0; | ||
1318 | } | 1264 | } |
1319 | 1265 | ||
1320 | /* Kretprobe profile handler */ | 1266 | /* Kretprobe profile handler */ |
1321 | static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | 1267 | static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, |
1322 | struct pt_regs *regs) | 1268 | struct pt_regs *regs) |
1323 | { | 1269 | { |
1324 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 1270 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
1325 | struct ftrace_event_call *call = &tp->call; | 1271 | struct ftrace_event_call *call = &tp->call; |
1326 | struct kretprobe_trace_entry *entry; | 1272 | struct kretprobe_trace_entry *entry; |
1327 | struct trace_entry *ent; | 1273 | int size, __size, i; |
1328 | int size, __size, i, pc, __cpu; | ||
1329 | unsigned long irq_flags; | 1274 | unsigned long irq_flags; |
1330 | char *trace_buf; | ||
1331 | char *raw_data; | ||
1332 | int rctx; | 1275 | int rctx; |
1333 | 1276 | ||
1334 | pc = preempt_count(); | ||
1335 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); | 1277 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); |
1336 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1278 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1337 | size -= sizeof(u32); | 1279 | size -= sizeof(u32); |
1338 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | 1280 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
1339 | "profile buffer not large enough")) | 1281 | "profile buffer not large enough")) |
1340 | return 0; | 1282 | return; |
1341 | |||
1342 | /* | ||
1343 | * Protect the non nmi buffer | ||
1344 | * This also protects the rcu read side | ||
1345 | */ | ||
1346 | local_irq_save(irq_flags); | ||
1347 | |||
1348 | rctx = perf_swevent_get_recursion_context(); | ||
1349 | if (rctx < 0) | ||
1350 | goto end_recursion; | ||
1351 | |||
1352 | __cpu = smp_processor_id(); | ||
1353 | |||
1354 | if (in_nmi()) | ||
1355 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1356 | else | ||
1357 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1358 | |||
1359 | if (!trace_buf) | ||
1360 | goto end; | ||
1361 | |||
1362 | raw_data = per_cpu_ptr(trace_buf, __cpu); | ||
1363 | 1283 | ||
1364 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | 1284 | entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); |
1365 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 1285 | if (!entry) |
1366 | entry = (struct kretprobe_trace_entry *)raw_data; | 1286 | return; |
1367 | ent = &entry->ent; | ||
1368 | 1287 | ||
1369 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1370 | ent->type = call->id; | ||
1371 | entry->nargs = tp->nr_args; | 1288 | entry->nargs = tp->nr_args; |
1372 | entry->func = (unsigned long)tp->rp.kp.addr; | 1289 | entry->func = (unsigned long)tp->rp.kp.addr; |
1373 | entry->ret_ip = (unsigned long)ri->ret_addr; | 1290 | entry->ret_ip = (unsigned long)ri->ret_addr; |
1374 | for (i = 0; i < tp->nr_args; i++) | 1291 | for (i = 0; i < tp->nr_args; i++) |
1375 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1292 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1376 | perf_tp_event(call->id, entry->ret_ip, 1, entry, size); | ||
1377 | |||
1378 | end: | ||
1379 | perf_swevent_put_recursion_context(rctx); | ||
1380 | end_recursion: | ||
1381 | local_irq_restore(irq_flags); | ||
1382 | 1293 | ||
1383 | return 0; | 1294 | ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); |
1384 | } | 1295 | } |
1385 | 1296 | ||
1386 | static int probe_profile_enable(struct ftrace_event_call *call) | 1297 | static int probe_profile_enable(struct ftrace_event_call *call) |
@@ -1408,7 +1319,7 @@ static void probe_profile_disable(struct ftrace_event_call *call) | |||
1408 | disable_kprobe(&tp->rp.kp); | 1319 | disable_kprobe(&tp->rp.kp); |
1409 | } | 1320 | } |
1410 | } | 1321 | } |
1411 | #endif /* CONFIG_EVENT_PROFILE */ | 1322 | #endif /* CONFIG_PERF_EVENTS */ |
1412 | 1323 | ||
1413 | 1324 | ||
1414 | static __kprobes | 1325 | static __kprobes |
@@ -1418,10 +1329,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) | |||
1418 | 1329 | ||
1419 | if (tp->flags & TP_FLAG_TRACE) | 1330 | if (tp->flags & TP_FLAG_TRACE) |
1420 | kprobe_trace_func(kp, regs); | 1331 | kprobe_trace_func(kp, regs); |
1421 | #ifdef CONFIG_EVENT_PROFILE | 1332 | #ifdef CONFIG_PERF_EVENTS |
1422 | if (tp->flags & TP_FLAG_PROFILE) | 1333 | if (tp->flags & TP_FLAG_PROFILE) |
1423 | kprobe_profile_func(kp, regs); | 1334 | kprobe_profile_func(kp, regs); |
1424 | #endif /* CONFIG_EVENT_PROFILE */ | 1335 | #endif |
1425 | return 0; /* We don't tweek kernel, so just return 0 */ | 1336 | return 0; /* We don't tweek kernel, so just return 0 */ |
1426 | } | 1337 | } |
1427 | 1338 | ||
@@ -1432,10 +1343,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) | |||
1432 | 1343 | ||
1433 | if (tp->flags & TP_FLAG_TRACE) | 1344 | if (tp->flags & TP_FLAG_TRACE) |
1434 | kretprobe_trace_func(ri, regs); | 1345 | kretprobe_trace_func(ri, regs); |
1435 | #ifdef CONFIG_EVENT_PROFILE | 1346 | #ifdef CONFIG_PERF_EVENTS |
1436 | if (tp->flags & TP_FLAG_PROFILE) | 1347 | if (tp->flags & TP_FLAG_PROFILE) |
1437 | kretprobe_profile_func(ri, regs); | 1348 | kretprobe_profile_func(ri, regs); |
1438 | #endif /* CONFIG_EVENT_PROFILE */ | 1349 | #endif |
1439 | return 0; /* We don't tweek kernel, so just return 0 */ | 1350 | return 0; /* We don't tweek kernel, so just return 0 */ |
1440 | } | 1351 | } |
1441 | 1352 | ||
@@ -1464,7 +1375,7 @@ static int register_probe_event(struct trace_probe *tp) | |||
1464 | call->regfunc = probe_event_enable; | 1375 | call->regfunc = probe_event_enable; |
1465 | call->unregfunc = probe_event_disable; | 1376 | call->unregfunc = probe_event_disable; |
1466 | 1377 | ||
1467 | #ifdef CONFIG_EVENT_PROFILE | 1378 | #ifdef CONFIG_PERF_EVENTS |
1468 | call->profile_enable = probe_profile_enable; | 1379 | call->profile_enable = probe_profile_enable; |
1469 | call->profile_disable = probe_profile_disable; | 1380 | call->profile_disable = probe_profile_disable; |
1470 | #endif | 1381 | #endif |
@@ -1523,28 +1434,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3, | |||
1523 | 1434 | ||
1524 | static __init int kprobe_trace_self_tests_init(void) | 1435 | static __init int kprobe_trace_self_tests_init(void) |
1525 | { | 1436 | { |
1526 | int ret; | 1437 | int ret, warn = 0; |
1527 | int (*target)(int, int, int, int, int, int); | 1438 | int (*target)(int, int, int, int, int, int); |
1439 | struct trace_probe *tp; | ||
1528 | 1440 | ||
1529 | target = kprobe_trace_selftest_target; | 1441 | target = kprobe_trace_selftest_target; |
1530 | 1442 | ||
1531 | pr_info("Testing kprobe tracing: "); | 1443 | pr_info("Testing kprobe tracing: "); |
1532 | 1444 | ||
1533 | ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " | 1445 | ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " |
1534 | "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); | 1446 | "$stack $stack0 +0($stack)"); |
1535 | if (WARN_ON_ONCE(ret)) | 1447 | if (WARN_ON_ONCE(ret)) { |
1536 | pr_warning("error enabling function entry\n"); | 1448 | pr_warning("error on probing function entry.\n"); |
1449 | warn++; | ||
1450 | } else { | ||
1451 | /* Enable trace point */ | ||
1452 | tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); | ||
1453 | if (WARN_ON_ONCE(tp == NULL)) { | ||
1454 | pr_warning("error on getting new probe.\n"); | ||
1455 | warn++; | ||
1456 | } else | ||
1457 | probe_event_enable(&tp->call); | ||
1458 | } | ||
1537 | 1459 | ||
1538 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " | 1460 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " |
1539 | "$retval"); | 1461 | "$retval"); |
1540 | if (WARN_ON_ONCE(ret)) | 1462 | if (WARN_ON_ONCE(ret)) { |
1541 | pr_warning("error enabling function return\n"); | 1463 | pr_warning("error on probing function return.\n"); |
1464 | warn++; | ||
1465 | } else { | ||
1466 | /* Enable trace point */ | ||
1467 | tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); | ||
1468 | if (WARN_ON_ONCE(tp == NULL)) { | ||
1469 | pr_warning("error on getting new probe.\n"); | ||
1470 | warn++; | ||
1471 | } else | ||
1472 | probe_event_enable(&tp->call); | ||
1473 | } | ||
1474 | |||
1475 | if (warn) | ||
1476 | goto end; | ||
1542 | 1477 | ||
1543 | ret = target(1, 2, 3, 4, 5, 6); | 1478 | ret = target(1, 2, 3, 4, 5, 6); |
1544 | 1479 | ||
1545 | cleanup_all_probes(); | 1480 | ret = command_trace_probe("-:testprobe"); |
1481 | if (WARN_ON_ONCE(ret)) { | ||
1482 | pr_warning("error on deleting a probe.\n"); | ||
1483 | warn++; | ||
1484 | } | ||
1485 | |||
1486 | ret = command_trace_probe("-:testprobe2"); | ||
1487 | if (WARN_ON_ONCE(ret)) { | ||
1488 | pr_warning("error on deleting a probe.\n"); | ||
1489 | warn++; | ||
1490 | } | ||
1546 | 1491 | ||
1547 | pr_cont("OK\n"); | 1492 | end: |
1493 | cleanup_all_probes(); | ||
1494 | if (warn) | ||
1495 | pr_cont("NG: Some tests are failed. Please check them.\n"); | ||
1496 | else | ||
1497 | pr_cont("OK\n"); | ||
1548 | return 0; | 1498 | return 0; |
1549 | } | 1499 | } |
1550 | 1500 | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 678a5120ee30..f4bc9b27de5f 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
157 | unsigned long val, flags; | 157 | unsigned long val, flags; |
158 | char buf[64]; | 158 | char buf[64]; |
159 | int ret; | 159 | int ret; |
160 | int cpu; | ||
160 | 161 | ||
161 | if (count >= sizeof(buf)) | 162 | if (count >= sizeof(buf)) |
162 | return -EINVAL; | 163 | return -EINVAL; |
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
171 | return ret; | 172 | return ret; |
172 | 173 | ||
173 | local_irq_save(flags); | 174 | local_irq_save(flags); |
175 | |||
176 | /* | ||
177 | * In case we trace inside arch_spin_lock() or after (NMI), | ||
178 | * we will cause circular lock, so we also need to increase | ||
179 | * the percpu trace_active here. | ||
180 | */ | ||
181 | cpu = smp_processor_id(); | ||
182 | per_cpu(trace_active, cpu)++; | ||
183 | |||
174 | arch_spin_lock(&max_stack_lock); | 184 | arch_spin_lock(&max_stack_lock); |
175 | *ptr = val; | 185 | *ptr = val; |
176 | arch_spin_unlock(&max_stack_lock); | 186 | arch_spin_unlock(&max_stack_lock); |
187 | |||
188 | per_cpu(trace_active, cpu)--; | ||
177 | local_irq_restore(flags); | 189 | local_irq_restore(flags); |
178 | 190 | ||
179 | return count; | 191 | return count; |
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
206 | 218 | ||
207 | static void *t_start(struct seq_file *m, loff_t *pos) | 219 | static void *t_start(struct seq_file *m, loff_t *pos) |
208 | { | 220 | { |
221 | int cpu; | ||
222 | |||
209 | local_irq_disable(); | 223 | local_irq_disable(); |
224 | |||
225 | cpu = smp_processor_id(); | ||
226 | per_cpu(trace_active, cpu)++; | ||
227 | |||
210 | arch_spin_lock(&max_stack_lock); | 228 | arch_spin_lock(&max_stack_lock); |
211 | 229 | ||
212 | if (*pos == 0) | 230 | if (*pos == 0) |
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
217 | 235 | ||
218 | static void t_stop(struct seq_file *m, void *p) | 236 | static void t_stop(struct seq_file *m, void *p) |
219 | { | 237 | { |
238 | int cpu; | ||
239 | |||
220 | arch_spin_unlock(&max_stack_lock); | 240 | arch_spin_unlock(&max_stack_lock); |
241 | |||
242 | cpu = smp_processor_id(); | ||
243 | per_cpu(trace_active, cpu)--; | ||
244 | |||
221 | local_irq_enable(); | 245 | local_irq_enable(); |
222 | } | 246 | } |
223 | 247 | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 75289f372dd2..4e332b9e449c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void) | |||
421 | } | 421 | } |
422 | core_initcall(init_ftrace_syscalls); | 422 | core_initcall(init_ftrace_syscalls); |
423 | 423 | ||
424 | #ifdef CONFIG_EVENT_PROFILE | 424 | #ifdef CONFIG_PERF_EVENTS |
425 | 425 | ||
426 | static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); | 426 | static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); |
427 | static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); | 427 | static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); |
@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
433 | struct syscall_metadata *sys_data; | 433 | struct syscall_metadata *sys_data; |
434 | struct syscall_trace_enter *rec; | 434 | struct syscall_trace_enter *rec; |
435 | unsigned long flags; | 435 | unsigned long flags; |
436 | char *trace_buf; | ||
437 | char *raw_data; | ||
438 | int syscall_nr; | 436 | int syscall_nr; |
439 | int rctx; | 437 | int rctx; |
440 | int size; | 438 | int size; |
441 | int cpu; | ||
442 | 439 | ||
443 | syscall_nr = syscall_get_nr(current, regs); | 440 | syscall_nr = syscall_get_nr(current, regs); |
444 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) | 441 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
457 | "profile buffer not large enough")) | 454 | "profile buffer not large enough")) |
458 | return; | 455 | return; |
459 | 456 | ||
460 | /* Protect the per cpu buffer, begin the rcu read side */ | 457 | rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, |
461 | local_irq_save(flags); | 458 | sys_data->enter_event->id, &rctx, &flags); |
462 | 459 | if (!rec) | |
463 | rctx = perf_swevent_get_recursion_context(); | 460 | return; |
464 | if (rctx < 0) | ||
465 | goto end_recursion; | ||
466 | |||
467 | cpu = smp_processor_id(); | ||
468 | |||
469 | trace_buf = rcu_dereference(perf_trace_buf); | ||
470 | |||
471 | if (!trace_buf) | ||
472 | goto end; | ||
473 | |||
474 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
475 | |||
476 | /* zero the dead bytes from align to not leak stack to user */ | ||
477 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
478 | 461 | ||
479 | rec = (struct syscall_trace_enter *) raw_data; | ||
480 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
481 | rec->ent.type = sys_data->enter_event->id; | ||
482 | rec->nr = syscall_nr; | 462 | rec->nr = syscall_nr; |
483 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 463 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
484 | (unsigned long *)&rec->args); | 464 | (unsigned long *)&rec->args); |
485 | perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); | 465 | ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); |
486 | |||
487 | end: | ||
488 | perf_swevent_put_recursion_context(rctx); | ||
489 | end_recursion: | ||
490 | local_irq_restore(flags); | ||
491 | } | 466 | } |
492 | 467 | ||
493 | int prof_sysenter_enable(struct ftrace_event_call *call) | 468 | int prof_sysenter_enable(struct ftrace_event_call *call) |
@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
531 | struct syscall_trace_exit *rec; | 506 | struct syscall_trace_exit *rec; |
532 | unsigned long flags; | 507 | unsigned long flags; |
533 | int syscall_nr; | 508 | int syscall_nr; |
534 | char *trace_buf; | ||
535 | char *raw_data; | ||
536 | int rctx; | 509 | int rctx; |
537 | int size; | 510 | int size; |
538 | int cpu; | ||
539 | 511 | ||
540 | syscall_nr = syscall_get_nr(current, regs); | 512 | syscall_nr = syscall_get_nr(current, regs); |
541 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | 513 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) |
@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
557 | "exit event has grown above profile buffer size")) | 529 | "exit event has grown above profile buffer size")) |
558 | return; | 530 | return; |
559 | 531 | ||
560 | /* Protect the per cpu buffer, begin the rcu read side */ | 532 | rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, |
561 | local_irq_save(flags); | 533 | sys_data->exit_event->id, &rctx, &flags); |
562 | 534 | if (!rec) | |
563 | rctx = perf_swevent_get_recursion_context(); | 535 | return; |
564 | if (rctx < 0) | ||
565 | goto end_recursion; | ||
566 | |||
567 | cpu = smp_processor_id(); | ||
568 | |||
569 | trace_buf = rcu_dereference(perf_trace_buf); | ||
570 | |||
571 | if (!trace_buf) | ||
572 | goto end; | ||
573 | |||
574 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
575 | |||
576 | /* zero the dead bytes from align to not leak stack to user */ | ||
577 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
578 | |||
579 | rec = (struct syscall_trace_exit *)raw_data; | ||
580 | 536 | ||
581 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
582 | rec->ent.type = sys_data->exit_event->id; | ||
583 | rec->nr = syscall_nr; | 537 | rec->nr = syscall_nr; |
584 | rec->ret = syscall_get_return_value(current, regs); | 538 | rec->ret = syscall_get_return_value(current, regs); |
585 | 539 | ||
586 | perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); | 540 | ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); |
587 | |||
588 | end: | ||
589 | perf_swevent_put_recursion_context(rctx); | ||
590 | end_recursion: | ||
591 | local_irq_restore(flags); | ||
592 | } | 541 | } |
593 | 542 | ||
594 | int prof_sysexit_enable(struct ftrace_event_call *call) | 543 | int prof_sysexit_enable(struct ftrace_event_call *call) |
@@ -626,6 +575,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call) | |||
626 | mutex_unlock(&syscall_trace_lock); | 575 | mutex_unlock(&syscall_trace_lock); |
627 | } | 576 | } |
628 | 577 | ||
629 | #endif | 578 | #endif /* CONFIG_PERF_EVENTS */ |
630 | |||
631 | 579 | ||