diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 379 |
1 files changed, 275 insertions, 104 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2f3fbf84215a..a4fa381db3c2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/file.h> | 16 | #include <linux/file.h> |
17 | #include <linux/poll.h> | 17 | #include <linux/poll.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/hash.h> | ||
19 | #include <linux/sysfs.h> | 20 | #include <linux/sysfs.h> |
20 | #include <linux/dcache.h> | 21 | #include <linux/dcache.h> |
21 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
@@ -82,14 +83,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
82 | void __weak hw_perf_disable(void) { barrier(); } | 83 | void __weak hw_perf_disable(void) { barrier(); } |
83 | void __weak hw_perf_enable(void) { barrier(); } | 84 | void __weak hw_perf_enable(void) { barrier(); } |
84 | 85 | ||
85 | int __weak | ||
86 | hw_perf_group_sched_in(struct perf_event *group_leader, | ||
87 | struct perf_cpu_context *cpuctx, | ||
88 | struct perf_event_context *ctx) | ||
89 | { | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | void __weak perf_event_print_debug(void) { } | 86 | void __weak perf_event_print_debug(void) { } |
94 | 87 | ||
95 | static DEFINE_PER_CPU(int, perf_disable_count); | 88 | static DEFINE_PER_CPU(int, perf_disable_count); |
@@ -262,6 +255,18 @@ static void update_event_times(struct perf_event *event) | |||
262 | event->total_time_running = run_end - event->tstamp_running; | 255 | event->total_time_running = run_end - event->tstamp_running; |
263 | } | 256 | } |
264 | 257 | ||
258 | /* | ||
259 | * Update total_time_enabled and total_time_running for all events in a group. | ||
260 | */ | ||
261 | static void update_group_times(struct perf_event *leader) | ||
262 | { | ||
263 | struct perf_event *event; | ||
264 | |||
265 | update_event_times(leader); | ||
266 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
267 | update_event_times(event); | ||
268 | } | ||
269 | |||
265 | static struct list_head * | 270 | static struct list_head * |
266 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | 271 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) |
267 | { | 272 | { |
@@ -315,8 +320,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
315 | static void | 320 | static void |
316 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 321 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
317 | { | 322 | { |
318 | struct perf_event *sibling, *tmp; | ||
319 | |||
320 | if (list_empty(&event->group_entry)) | 323 | if (list_empty(&event->group_entry)) |
321 | return; | 324 | return; |
322 | ctx->nr_events--; | 325 | ctx->nr_events--; |
@@ -329,7 +332,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
329 | if (event->group_leader != event) | 332 | if (event->group_leader != event) |
330 | event->group_leader->nr_siblings--; | 333 | event->group_leader->nr_siblings--; |
331 | 334 | ||
332 | update_event_times(event); | 335 | update_group_times(event); |
333 | 336 | ||
334 | /* | 337 | /* |
335 | * If event was in error state, then keep it | 338 | * If event was in error state, then keep it |
@@ -340,6 +343,12 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
340 | */ | 343 | */ |
341 | if (event->state > PERF_EVENT_STATE_OFF) | 344 | if (event->state > PERF_EVENT_STATE_OFF) |
342 | event->state = PERF_EVENT_STATE_OFF; | 345 | event->state = PERF_EVENT_STATE_OFF; |
346 | } | ||
347 | |||
348 | static void | ||
349 | perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx) | ||
350 | { | ||
351 | struct perf_event *sibling, *tmp; | ||
343 | 352 | ||
344 | /* | 353 | /* |
345 | * If this was a group event with sibling events then | 354 | * If this was a group event with sibling events then |
@@ -505,18 +514,6 @@ retry: | |||
505 | } | 514 | } |
506 | 515 | ||
507 | /* | 516 | /* |
508 | * Update total_time_enabled and total_time_running for all events in a group. | ||
509 | */ | ||
510 | static void update_group_times(struct perf_event *leader) | ||
511 | { | ||
512 | struct perf_event *event; | ||
513 | |||
514 | update_event_times(leader); | ||
515 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
516 | update_event_times(event); | ||
517 | } | ||
518 | |||
519 | /* | ||
520 | * Cross CPU call to disable a performance event | 517 | * Cross CPU call to disable a performance event |
521 | */ | 518 | */ |
522 | static void __perf_event_disable(void *info) | 519 | static void __perf_event_disable(void *info) |
@@ -640,15 +637,20 @@ group_sched_in(struct perf_event *group_event, | |||
640 | struct perf_cpu_context *cpuctx, | 637 | struct perf_cpu_context *cpuctx, |
641 | struct perf_event_context *ctx) | 638 | struct perf_event_context *ctx) |
642 | { | 639 | { |
643 | struct perf_event *event, *partial_group; | 640 | struct perf_event *event, *partial_group = NULL; |
641 | const struct pmu *pmu = group_event->pmu; | ||
642 | bool txn = false; | ||
644 | int ret; | 643 | int ret; |
645 | 644 | ||
646 | if (group_event->state == PERF_EVENT_STATE_OFF) | 645 | if (group_event->state == PERF_EVENT_STATE_OFF) |
647 | return 0; | 646 | return 0; |
648 | 647 | ||
649 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); | 648 | /* Check if group transaction availabe */ |
650 | if (ret) | 649 | if (pmu->start_txn) |
651 | return ret < 0 ? ret : 0; | 650 | txn = true; |
651 | |||
652 | if (txn) | ||
653 | pmu->start_txn(pmu); | ||
652 | 654 | ||
653 | if (event_sched_in(group_event, cpuctx, ctx)) | 655 | if (event_sched_in(group_event, cpuctx, ctx)) |
654 | return -EAGAIN; | 656 | return -EAGAIN; |
@@ -663,9 +665,19 @@ group_sched_in(struct perf_event *group_event, | |||
663 | } | 665 | } |
664 | } | 666 | } |
665 | 667 | ||
666 | return 0; | 668 | if (!txn) |
669 | return 0; | ||
670 | |||
671 | ret = pmu->commit_txn(pmu); | ||
672 | if (!ret) { | ||
673 | pmu->cancel_txn(pmu); | ||
674 | return 0; | ||
675 | } | ||
667 | 676 | ||
668 | group_error: | 677 | group_error: |
678 | if (txn) | ||
679 | pmu->cancel_txn(pmu); | ||
680 | |||
669 | /* | 681 | /* |
670 | * Groups can be scheduled in as one unit only, so undo any | 682 | * Groups can be scheduled in as one unit only, so undo any |
671 | * partial group before returning: | 683 | * partial group before returning: |
@@ -1367,6 +1379,8 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1367 | if (cpuctx->task_ctx == ctx) | 1379 | if (cpuctx->task_ctx == ctx) |
1368 | return; | 1380 | return; |
1369 | 1381 | ||
1382 | perf_disable(); | ||
1383 | |||
1370 | /* | 1384 | /* |
1371 | * We want to keep the following priority order: | 1385 | * We want to keep the following priority order: |
1372 | * cpu pinned (that don't need to move), task pinned, | 1386 | * cpu pinned (that don't need to move), task pinned, |
@@ -1379,6 +1393,8 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1379 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | 1393 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); |
1380 | 1394 | ||
1381 | cpuctx->task_ctx = ctx; | 1395 | cpuctx->task_ctx = ctx; |
1396 | |||
1397 | perf_enable(); | ||
1382 | } | 1398 | } |
1383 | 1399 | ||
1384 | #define MAX_INTERRUPTS (~0ULL) | 1400 | #define MAX_INTERRUPTS (~0ULL) |
@@ -1856,9 +1872,30 @@ int perf_event_release_kernel(struct perf_event *event) | |||
1856 | { | 1872 | { |
1857 | struct perf_event_context *ctx = event->ctx; | 1873 | struct perf_event_context *ctx = event->ctx; |
1858 | 1874 | ||
1875 | /* | ||
1876 | * Remove from the PMU, can't get re-enabled since we got | ||
1877 | * here because the last ref went. | ||
1878 | */ | ||
1879 | perf_event_disable(event); | ||
1880 | |||
1859 | WARN_ON_ONCE(ctx->parent_ctx); | 1881 | WARN_ON_ONCE(ctx->parent_ctx); |
1860 | mutex_lock(&ctx->mutex); | 1882 | /* |
1861 | perf_event_remove_from_context(event); | 1883 | * There are two ways this annotation is useful: |
1884 | * | ||
1885 | * 1) there is a lock recursion from perf_event_exit_task | ||
1886 | * see the comment there. | ||
1887 | * | ||
1888 | * 2) there is a lock-inversion with mmap_sem through | ||
1889 | * perf_event_read_group(), which takes faults while | ||
1890 | * holding ctx->mutex, however this is called after | ||
1891 | * the last filedesc died, so there is no possibility | ||
1892 | * to trigger the AB-BA case. | ||
1893 | */ | ||
1894 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | ||
1895 | raw_spin_lock_irq(&ctx->lock); | ||
1896 | list_del_event(event, ctx); | ||
1897 | perf_destroy_group(event, ctx); | ||
1898 | raw_spin_unlock_irq(&ctx->lock); | ||
1862 | mutex_unlock(&ctx->mutex); | 1899 | mutex_unlock(&ctx->mutex); |
1863 | 1900 | ||
1864 | mutex_lock(&event->owner->perf_event_mutex); | 1901 | mutex_lock(&event->owner->perf_event_mutex); |
@@ -2642,6 +2679,7 @@ static int perf_fasync(int fd, struct file *filp, int on) | |||
2642 | } | 2679 | } |
2643 | 2680 | ||
2644 | static const struct file_operations perf_fops = { | 2681 | static const struct file_operations perf_fops = { |
2682 | .llseek = no_llseek, | ||
2645 | .release = perf_release, | 2683 | .release = perf_release, |
2646 | .read = perf_read, | 2684 | .read = perf_read, |
2647 | .poll = perf_poll, | 2685 | .poll = perf_poll, |
@@ -2792,6 +2830,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
2792 | 2830 | ||
2793 | 2831 | ||
2794 | /* | 2832 | /* |
2833 | * We assume there is only KVM supporting the callbacks. | ||
2834 | * Later on, we might change it to a list if there is | ||
2835 | * another virtualization implementation supporting the callbacks. | ||
2836 | */ | ||
2837 | struct perf_guest_info_callbacks *perf_guest_cbs; | ||
2838 | |||
2839 | int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) | ||
2840 | { | ||
2841 | perf_guest_cbs = cbs; | ||
2842 | return 0; | ||
2843 | } | ||
2844 | EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); | ||
2845 | |||
2846 | int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) | ||
2847 | { | ||
2848 | perf_guest_cbs = NULL; | ||
2849 | return 0; | ||
2850 | } | ||
2851 | EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); | ||
2852 | |||
2853 | /* | ||
2795 | * Output | 2854 | * Output |
2796 | */ | 2855 | */ |
2797 | static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, | 2856 | static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, |
@@ -3743,7 +3802,7 @@ void __perf_event_mmap(struct vm_area_struct *vma) | |||
3743 | .event_id = { | 3802 | .event_id = { |
3744 | .header = { | 3803 | .header = { |
3745 | .type = PERF_RECORD_MMAP, | 3804 | .type = PERF_RECORD_MMAP, |
3746 | .misc = 0, | 3805 | .misc = PERF_RECORD_MISC_USER, |
3747 | /* .size */ | 3806 | /* .size */ |
3748 | }, | 3807 | }, |
3749 | /* .pid */ | 3808 | /* .pid */ |
@@ -3961,36 +4020,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
3961 | perf_swevent_overflow(event, 0, nmi, data, regs); | 4020 | perf_swevent_overflow(event, 0, nmi, data, regs); |
3962 | } | 4021 | } |
3963 | 4022 | ||
3964 | static int perf_swevent_is_counting(struct perf_event *event) | ||
3965 | { | ||
3966 | /* | ||
3967 | * The event is active, we're good! | ||
3968 | */ | ||
3969 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
3970 | return 1; | ||
3971 | |||
3972 | /* | ||
3973 | * The event is off/error, not counting. | ||
3974 | */ | ||
3975 | if (event->state != PERF_EVENT_STATE_INACTIVE) | ||
3976 | return 0; | ||
3977 | |||
3978 | /* | ||
3979 | * The event is inactive, if the context is active | ||
3980 | * we're part of a group that didn't make it on the 'pmu', | ||
3981 | * not counting. | ||
3982 | */ | ||
3983 | if (event->ctx->is_active) | ||
3984 | return 0; | ||
3985 | |||
3986 | /* | ||
3987 | * We're inactive and the context is too, this means the | ||
3988 | * task is scheduled out, we're counting events that happen | ||
3989 | * to us, like migration events. | ||
3990 | */ | ||
3991 | return 1; | ||
3992 | } | ||
3993 | |||
3994 | static int perf_tp_event_match(struct perf_event *event, | 4023 | static int perf_tp_event_match(struct perf_event *event, |
3995 | struct perf_sample_data *data); | 4024 | struct perf_sample_data *data); |
3996 | 4025 | ||
@@ -4014,12 +4043,6 @@ static int perf_swevent_match(struct perf_event *event, | |||
4014 | struct perf_sample_data *data, | 4043 | struct perf_sample_data *data, |
4015 | struct pt_regs *regs) | 4044 | struct pt_regs *regs) |
4016 | { | 4045 | { |
4017 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
4018 | return 0; | ||
4019 | |||
4020 | if (!perf_swevent_is_counting(event)) | ||
4021 | return 0; | ||
4022 | |||
4023 | if (event->attr.type != type) | 4046 | if (event->attr.type != type) |
4024 | return 0; | 4047 | return 0; |
4025 | 4048 | ||
@@ -4036,18 +4059,53 @@ static int perf_swevent_match(struct perf_event *event, | |||
4036 | return 1; | 4059 | return 1; |
4037 | } | 4060 | } |
4038 | 4061 | ||
4039 | static void perf_swevent_ctx_event(struct perf_event_context *ctx, | 4062 | static inline u64 swevent_hash(u64 type, u32 event_id) |
4040 | enum perf_type_id type, | 4063 | { |
4041 | u32 event_id, u64 nr, int nmi, | 4064 | u64 val = event_id | (type << 32); |
4042 | struct perf_sample_data *data, | 4065 | |
4043 | struct pt_regs *regs) | 4066 | return hash_64(val, SWEVENT_HLIST_BITS); |
4067 | } | ||
4068 | |||
4069 | static struct hlist_head * | ||
4070 | find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id) | ||
4071 | { | ||
4072 | u64 hash; | ||
4073 | struct swevent_hlist *hlist; | ||
4074 | |||
4075 | hash = swevent_hash(type, event_id); | ||
4076 | |||
4077 | hlist = rcu_dereference(ctx->swevent_hlist); | ||
4078 | if (!hlist) | ||
4079 | return NULL; | ||
4080 | |||
4081 | return &hlist->heads[hash]; | ||
4082 | } | ||
4083 | |||
4084 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | ||
4085 | u64 nr, int nmi, | ||
4086 | struct perf_sample_data *data, | ||
4087 | struct pt_regs *regs) | ||
4044 | { | 4088 | { |
4089 | struct perf_cpu_context *cpuctx; | ||
4045 | struct perf_event *event; | 4090 | struct perf_event *event; |
4091 | struct hlist_node *node; | ||
4092 | struct hlist_head *head; | ||
4046 | 4093 | ||
4047 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 4094 | cpuctx = &__get_cpu_var(perf_cpu_context); |
4095 | |||
4096 | rcu_read_lock(); | ||
4097 | |||
4098 | head = find_swevent_head(cpuctx, type, event_id); | ||
4099 | |||
4100 | if (!head) | ||
4101 | goto end; | ||
4102 | |||
4103 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | ||
4048 | if (perf_swevent_match(event, type, event_id, data, regs)) | 4104 | if (perf_swevent_match(event, type, event_id, data, regs)) |
4049 | perf_swevent_add(event, nr, nmi, data, regs); | 4105 | perf_swevent_add(event, nr, nmi, data, regs); |
4050 | } | 4106 | } |
4107 | end: | ||
4108 | rcu_read_unlock(); | ||
4051 | } | 4109 | } |
4052 | 4110 | ||
4053 | int perf_swevent_get_recursion_context(void) | 4111 | int perf_swevent_get_recursion_context(void) |
@@ -4085,27 +4143,6 @@ void perf_swevent_put_recursion_context(int rctx) | |||
4085 | } | 4143 | } |
4086 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | 4144 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); |
4087 | 4145 | ||
4088 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | ||
4089 | u64 nr, int nmi, | ||
4090 | struct perf_sample_data *data, | ||
4091 | struct pt_regs *regs) | ||
4092 | { | ||
4093 | struct perf_cpu_context *cpuctx; | ||
4094 | struct perf_event_context *ctx; | ||
4095 | |||
4096 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
4097 | rcu_read_lock(); | ||
4098 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, | ||
4099 | nr, nmi, data, regs); | ||
4100 | /* | ||
4101 | * doesn't really matter which of the child contexts the | ||
4102 | * events ends up in. | ||
4103 | */ | ||
4104 | ctx = rcu_dereference(current->perf_event_ctxp); | ||
4105 | if (ctx) | ||
4106 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); | ||
4107 | rcu_read_unlock(); | ||
4108 | } | ||
4109 | 4146 | ||
4110 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4147 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
4111 | struct pt_regs *regs, u64 addr) | 4148 | struct pt_regs *regs, u64 addr) |
@@ -4131,16 +4168,28 @@ static void perf_swevent_read(struct perf_event *event) | |||
4131 | static int perf_swevent_enable(struct perf_event *event) | 4168 | static int perf_swevent_enable(struct perf_event *event) |
4132 | { | 4169 | { |
4133 | struct hw_perf_event *hwc = &event->hw; | 4170 | struct hw_perf_event *hwc = &event->hw; |
4171 | struct perf_cpu_context *cpuctx; | ||
4172 | struct hlist_head *head; | ||
4173 | |||
4174 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
4134 | 4175 | ||
4135 | if (hwc->sample_period) { | 4176 | if (hwc->sample_period) { |
4136 | hwc->last_period = hwc->sample_period; | 4177 | hwc->last_period = hwc->sample_period; |
4137 | perf_swevent_set_period(event); | 4178 | perf_swevent_set_period(event); |
4138 | } | 4179 | } |
4180 | |||
4181 | head = find_swevent_head(cpuctx, event->attr.type, event->attr.config); | ||
4182 | if (WARN_ON_ONCE(!head)) | ||
4183 | return -EINVAL; | ||
4184 | |||
4185 | hlist_add_head_rcu(&event->hlist_entry, head); | ||
4186 | |||
4139 | return 0; | 4187 | return 0; |
4140 | } | 4188 | } |
4141 | 4189 | ||
4142 | static void perf_swevent_disable(struct perf_event *event) | 4190 | static void perf_swevent_disable(struct perf_event *event) |
4143 | { | 4191 | { |
4192 | hlist_del_rcu(&event->hlist_entry); | ||
4144 | } | 4193 | } |
4145 | 4194 | ||
4146 | static const struct pmu perf_ops_generic = { | 4195 | static const struct pmu perf_ops_generic = { |
@@ -4168,15 +4217,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
4168 | perf_sample_data_init(&data, 0); | 4217 | perf_sample_data_init(&data, 0); |
4169 | data.period = event->hw.last_period; | 4218 | data.period = event->hw.last_period; |
4170 | regs = get_irq_regs(); | 4219 | regs = get_irq_regs(); |
4171 | /* | ||
4172 | * In case we exclude kernel IPs or are somehow not in interrupt | ||
4173 | * context, provide the next best thing, the user IP. | ||
4174 | */ | ||
4175 | if ((event->attr.exclude_kernel || !regs) && | ||
4176 | !event->attr.exclude_user) | ||
4177 | regs = task_pt_regs(current); | ||
4178 | 4220 | ||
4179 | if (regs) { | 4221 | if (regs && !perf_exclude_event(event, regs)) { |
4180 | if (!(event->attr.exclude_idle && current->pid == 0)) | 4222 | if (!(event->attr.exclude_idle && current->pid == 0)) |
4181 | if (perf_event_overflow(event, 0, &data, regs)) | 4223 | if (perf_event_overflow(event, 0, &data, regs)) |
4182 | ret = HRTIMER_NORESTART; | 4224 | ret = HRTIMER_NORESTART; |
@@ -4324,6 +4366,105 @@ static const struct pmu perf_ops_task_clock = { | |||
4324 | .read = task_clock_perf_event_read, | 4366 | .read = task_clock_perf_event_read, |
4325 | }; | 4367 | }; |
4326 | 4368 | ||
4369 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | ||
4370 | { | ||
4371 | struct swevent_hlist *hlist; | ||
4372 | |||
4373 | hlist = container_of(rcu_head, struct swevent_hlist, rcu_head); | ||
4374 | kfree(hlist); | ||
4375 | } | ||
4376 | |||
4377 | static void swevent_hlist_release(struct perf_cpu_context *cpuctx) | ||
4378 | { | ||
4379 | struct swevent_hlist *hlist; | ||
4380 | |||
4381 | if (!cpuctx->swevent_hlist) | ||
4382 | return; | ||
4383 | |||
4384 | hlist = cpuctx->swevent_hlist; | ||
4385 | rcu_assign_pointer(cpuctx->swevent_hlist, NULL); | ||
4386 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); | ||
4387 | } | ||
4388 | |||
4389 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) | ||
4390 | { | ||
4391 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
4392 | |||
4393 | mutex_lock(&cpuctx->hlist_mutex); | ||
4394 | |||
4395 | if (!--cpuctx->hlist_refcount) | ||
4396 | swevent_hlist_release(cpuctx); | ||
4397 | |||
4398 | mutex_unlock(&cpuctx->hlist_mutex); | ||
4399 | } | ||
4400 | |||
4401 | static void swevent_hlist_put(struct perf_event *event) | ||
4402 | { | ||
4403 | int cpu; | ||
4404 | |||
4405 | if (event->cpu != -1) { | ||
4406 | swevent_hlist_put_cpu(event, event->cpu); | ||
4407 | return; | ||
4408 | } | ||
4409 | |||
4410 | for_each_possible_cpu(cpu) | ||
4411 | swevent_hlist_put_cpu(event, cpu); | ||
4412 | } | ||
4413 | |||
4414 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | ||
4415 | { | ||
4416 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
4417 | int err = 0; | ||
4418 | |||
4419 | mutex_lock(&cpuctx->hlist_mutex); | ||
4420 | |||
4421 | if (!cpuctx->swevent_hlist && cpu_online(cpu)) { | ||
4422 | struct swevent_hlist *hlist; | ||
4423 | |||
4424 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | ||
4425 | if (!hlist) { | ||
4426 | err = -ENOMEM; | ||
4427 | goto exit; | ||
4428 | } | ||
4429 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | ||
4430 | } | ||
4431 | cpuctx->hlist_refcount++; | ||
4432 | exit: | ||
4433 | mutex_unlock(&cpuctx->hlist_mutex); | ||
4434 | |||
4435 | return err; | ||
4436 | } | ||
4437 | |||
4438 | static int swevent_hlist_get(struct perf_event *event) | ||
4439 | { | ||
4440 | int err; | ||
4441 | int cpu, failed_cpu; | ||
4442 | |||
4443 | if (event->cpu != -1) | ||
4444 | return swevent_hlist_get_cpu(event, event->cpu); | ||
4445 | |||
4446 | get_online_cpus(); | ||
4447 | for_each_possible_cpu(cpu) { | ||
4448 | err = swevent_hlist_get_cpu(event, cpu); | ||
4449 | if (err) { | ||
4450 | failed_cpu = cpu; | ||
4451 | goto fail; | ||
4452 | } | ||
4453 | } | ||
4454 | put_online_cpus(); | ||
4455 | |||
4456 | return 0; | ||
4457 | fail: | ||
4458 | for_each_possible_cpu(cpu) { | ||
4459 | if (cpu == failed_cpu) | ||
4460 | break; | ||
4461 | swevent_hlist_put_cpu(event, cpu); | ||
4462 | } | ||
4463 | |||
4464 | put_online_cpus(); | ||
4465 | return err; | ||
4466 | } | ||
4467 | |||
4327 | #ifdef CONFIG_EVENT_TRACING | 4468 | #ifdef CONFIG_EVENT_TRACING |
4328 | 4469 | ||
4329 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4470 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
@@ -4357,10 +4498,13 @@ static int perf_tp_event_match(struct perf_event *event, | |||
4357 | static void tp_perf_event_destroy(struct perf_event *event) | 4498 | static void tp_perf_event_destroy(struct perf_event *event) |
4358 | { | 4499 | { |
4359 | perf_trace_disable(event->attr.config); | 4500 | perf_trace_disable(event->attr.config); |
4501 | swevent_hlist_put(event); | ||
4360 | } | 4502 | } |
4361 | 4503 | ||
4362 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4504 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
4363 | { | 4505 | { |
4506 | int err; | ||
4507 | |||
4364 | /* | 4508 | /* |
4365 | * Raw tracepoint data is a severe data leak, only allow root to | 4509 | * Raw tracepoint data is a severe data leak, only allow root to |
4366 | * have these. | 4510 | * have these. |
@@ -4374,6 +4518,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4374 | return NULL; | 4518 | return NULL; |
4375 | 4519 | ||
4376 | event->destroy = tp_perf_event_destroy; | 4520 | event->destroy = tp_perf_event_destroy; |
4521 | err = swevent_hlist_get(event); | ||
4522 | if (err) { | ||
4523 | perf_trace_disable(event->attr.config); | ||
4524 | return ERR_PTR(err); | ||
4525 | } | ||
4377 | 4526 | ||
4378 | return &perf_ops_generic; | 4527 | return &perf_ops_generic; |
4379 | } | 4528 | } |
@@ -4474,6 +4623,7 @@ static void sw_perf_event_destroy(struct perf_event *event) | |||
4474 | WARN_ON(event->parent); | 4623 | WARN_ON(event->parent); |
4475 | 4624 | ||
4476 | atomic_dec(&perf_swevent_enabled[event_id]); | 4625 | atomic_dec(&perf_swevent_enabled[event_id]); |
4626 | swevent_hlist_put(event); | ||
4477 | } | 4627 | } |
4478 | 4628 | ||
4479 | static const struct pmu *sw_perf_event_init(struct perf_event *event) | 4629 | static const struct pmu *sw_perf_event_init(struct perf_event *event) |
@@ -4512,6 +4662,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
4512 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | 4662 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: |
4513 | case PERF_COUNT_SW_EMULATION_FAULTS: | 4663 | case PERF_COUNT_SW_EMULATION_FAULTS: |
4514 | if (!event->parent) { | 4664 | if (!event->parent) { |
4665 | int err; | ||
4666 | |||
4667 | err = swevent_hlist_get(event); | ||
4668 | if (err) | ||
4669 | return ERR_PTR(err); | ||
4670 | |||
4515 | atomic_inc(&perf_swevent_enabled[event_id]); | 4671 | atomic_inc(&perf_swevent_enabled[event_id]); |
4516 | event->destroy = sw_perf_event_destroy; | 4672 | event->destroy = sw_perf_event_destroy; |
4517 | } | 4673 | } |
@@ -4897,7 +5053,7 @@ err_fput_free_put_context: | |||
4897 | 5053 | ||
4898 | err_free_put_context: | 5054 | err_free_put_context: |
4899 | if (err < 0) | 5055 | if (err < 0) |
4900 | kfree(event); | 5056 | free_event(event); |
4901 | 5057 | ||
4902 | err_put_context: | 5058 | err_put_context: |
4903 | if (err < 0) | 5059 | if (err < 0) |
@@ -5176,7 +5332,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
5176 | * | 5332 | * |
5177 | * But since its the parent context it won't be the same instance. | 5333 | * But since its the parent context it won't be the same instance. |
5178 | */ | 5334 | */ |
5179 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5335 | mutex_lock(&child_ctx->mutex); |
5180 | 5336 | ||
5181 | again: | 5337 | again: |
5182 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, | 5338 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, |
@@ -5384,6 +5540,7 @@ static void __init perf_event_init_all_cpus(void) | |||
5384 | 5540 | ||
5385 | for_each_possible_cpu(cpu) { | 5541 | for_each_possible_cpu(cpu) { |
5386 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 5542 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
5543 | mutex_init(&cpuctx->hlist_mutex); | ||
5387 | __perf_event_init_context(&cpuctx->ctx, NULL); | 5544 | __perf_event_init_context(&cpuctx->ctx, NULL); |
5388 | } | 5545 | } |
5389 | } | 5546 | } |
@@ -5397,6 +5554,16 @@ static void __cpuinit perf_event_init_cpu(int cpu) | |||
5397 | spin_lock(&perf_resource_lock); | 5554 | spin_lock(&perf_resource_lock); |
5398 | cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; | 5555 | cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; |
5399 | spin_unlock(&perf_resource_lock); | 5556 | spin_unlock(&perf_resource_lock); |
5557 | |||
5558 | mutex_lock(&cpuctx->hlist_mutex); | ||
5559 | if (cpuctx->hlist_refcount > 0) { | ||
5560 | struct swevent_hlist *hlist; | ||
5561 | |||
5562 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | ||
5563 | WARN_ON_ONCE(!hlist); | ||
5564 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | ||
5565 | } | ||
5566 | mutex_unlock(&cpuctx->hlist_mutex); | ||
5400 | } | 5567 | } |
5401 | 5568 | ||
5402 | #ifdef CONFIG_HOTPLUG_CPU | 5569 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -5416,6 +5583,10 @@ static void perf_event_exit_cpu(int cpu) | |||
5416 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 5583 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
5417 | struct perf_event_context *ctx = &cpuctx->ctx; | 5584 | struct perf_event_context *ctx = &cpuctx->ctx; |
5418 | 5585 | ||
5586 | mutex_lock(&cpuctx->hlist_mutex); | ||
5587 | swevent_hlist_release(cpuctx); | ||
5588 | mutex_unlock(&cpuctx->hlist_mutex); | ||
5589 | |||
5419 | mutex_lock(&ctx->mutex); | 5590 | mutex_lock(&ctx->mutex); |
5420 | smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); | 5591 | smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); |
5421 | mutex_unlock(&ctx->mutex); | 5592 | mutex_unlock(&ctx->mutex); |