diff options
Diffstat (limited to 'kernel/perf_event.c')
| -rw-r--r-- | kernel/perf_event.c | 377 | 
1 files changed, 274 insertions, 103 deletions
| diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3d1552d3c12b..a4fa381db3c2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/file.h> | 16 | #include <linux/file.h> | 
| 17 | #include <linux/poll.h> | 17 | #include <linux/poll.h> | 
| 18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> | 
| 19 | #include <linux/hash.h> | ||
| 19 | #include <linux/sysfs.h> | 20 | #include <linux/sysfs.h> | 
| 20 | #include <linux/dcache.h> | 21 | #include <linux/dcache.h> | 
| 21 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> | 
| @@ -82,14 +83,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
| 82 | void __weak hw_perf_disable(void) { barrier(); } | 83 | void __weak hw_perf_disable(void) { barrier(); } | 
| 83 | void __weak hw_perf_enable(void) { barrier(); } | 84 | void __weak hw_perf_enable(void) { barrier(); } | 
| 84 | 85 | ||
| 85 | int __weak | ||
| 86 | hw_perf_group_sched_in(struct perf_event *group_leader, | ||
| 87 | struct perf_cpu_context *cpuctx, | ||
| 88 | struct perf_event_context *ctx) | ||
| 89 | { | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | void __weak perf_event_print_debug(void) { } | 86 | void __weak perf_event_print_debug(void) { } | 
| 94 | 87 | ||
| 95 | static DEFINE_PER_CPU(int, perf_disable_count); | 88 | static DEFINE_PER_CPU(int, perf_disable_count); | 
| @@ -262,6 +255,18 @@ static void update_event_times(struct perf_event *event) | |||
| 262 | event->total_time_running = run_end - event->tstamp_running; | 255 | event->total_time_running = run_end - event->tstamp_running; | 
| 263 | } | 256 | } | 
| 264 | 257 | ||
| 258 | /* | ||
| 259 | * Update total_time_enabled and total_time_running for all events in a group. | ||
| 260 | */ | ||
| 261 | static void update_group_times(struct perf_event *leader) | ||
| 262 | { | ||
| 263 | struct perf_event *event; | ||
| 264 | |||
| 265 | update_event_times(leader); | ||
| 266 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
| 267 | update_event_times(event); | ||
| 268 | } | ||
| 269 | |||
| 265 | static struct list_head * | 270 | static struct list_head * | 
| 266 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | 271 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | 
| 267 | { | 272 | { | 
| @@ -315,8 +320,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 315 | static void | 320 | static void | 
| 316 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 321 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 
| 317 | { | 322 | { | 
| 318 | struct perf_event *sibling, *tmp; | ||
| 319 | |||
| 320 | if (list_empty(&event->group_entry)) | 323 | if (list_empty(&event->group_entry)) | 
| 321 | return; | 324 | return; | 
| 322 | ctx->nr_events--; | 325 | ctx->nr_events--; | 
| @@ -329,7 +332,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 329 | if (event->group_leader != event) | 332 | if (event->group_leader != event) | 
| 330 | event->group_leader->nr_siblings--; | 333 | event->group_leader->nr_siblings--; | 
| 331 | 334 | ||
| 332 | update_event_times(event); | 335 | update_group_times(event); | 
| 333 | 336 | ||
| 334 | /* | 337 | /* | 
| 335 | * If event was in error state, then keep it | 338 | * If event was in error state, then keep it | 
| @@ -340,6 +343,12 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 340 | */ | 343 | */ | 
| 341 | if (event->state > PERF_EVENT_STATE_OFF) | 344 | if (event->state > PERF_EVENT_STATE_OFF) | 
| 342 | event->state = PERF_EVENT_STATE_OFF; | 345 | event->state = PERF_EVENT_STATE_OFF; | 
| 346 | } | ||
| 347 | |||
| 348 | static void | ||
| 349 | perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx) | ||
| 350 | { | ||
| 351 | struct perf_event *sibling, *tmp; | ||
| 343 | 352 | ||
| 344 | /* | 353 | /* | 
| 345 | * If this was a group event with sibling events then | 354 | * If this was a group event with sibling events then | 
| @@ -505,18 +514,6 @@ retry: | |||
| 505 | } | 514 | } | 
| 506 | 515 | ||
| 507 | /* | 516 | /* | 
| 508 | * Update total_time_enabled and total_time_running for all events in a group. | ||
| 509 | */ | ||
| 510 | static void update_group_times(struct perf_event *leader) | ||
| 511 | { | ||
| 512 | struct perf_event *event; | ||
| 513 | |||
| 514 | update_event_times(leader); | ||
| 515 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
| 516 | update_event_times(event); | ||
| 517 | } | ||
| 518 | |||
| 519 | /* | ||
| 520 | * Cross CPU call to disable a performance event | 517 | * Cross CPU call to disable a performance event | 
| 521 | */ | 518 | */ | 
| 522 | static void __perf_event_disable(void *info) | 519 | static void __perf_event_disable(void *info) | 
| @@ -640,15 +637,20 @@ group_sched_in(struct perf_event *group_event, | |||
| 640 | struct perf_cpu_context *cpuctx, | 637 | struct perf_cpu_context *cpuctx, | 
| 641 | struct perf_event_context *ctx) | 638 | struct perf_event_context *ctx) | 
| 642 | { | 639 | { | 
| 643 | struct perf_event *event, *partial_group; | 640 | struct perf_event *event, *partial_group = NULL; | 
| 641 | const struct pmu *pmu = group_event->pmu; | ||
| 642 | bool txn = false; | ||
| 644 | int ret; | 643 | int ret; | 
| 645 | 644 | ||
| 646 | if (group_event->state == PERF_EVENT_STATE_OFF) | 645 | if (group_event->state == PERF_EVENT_STATE_OFF) | 
| 647 | return 0; | 646 | return 0; | 
| 648 | 647 | ||
| 649 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); | 648 | /* Check if group transaction availabe */ | 
| 650 | if (ret) | 649 | if (pmu->start_txn) | 
| 651 | return ret < 0 ? ret : 0; | 650 | txn = true; | 
| 651 | |||
| 652 | if (txn) | ||
| 653 | pmu->start_txn(pmu); | ||
| 652 | 654 | ||
| 653 | if (event_sched_in(group_event, cpuctx, ctx)) | 655 | if (event_sched_in(group_event, cpuctx, ctx)) | 
| 654 | return -EAGAIN; | 656 | return -EAGAIN; | 
| @@ -663,9 +665,19 @@ group_sched_in(struct perf_event *group_event, | |||
| 663 | } | 665 | } | 
| 664 | } | 666 | } | 
| 665 | 667 | ||
| 666 | return 0; | 668 | if (!txn) | 
| 669 | return 0; | ||
| 670 | |||
| 671 | ret = pmu->commit_txn(pmu); | ||
| 672 | if (!ret) { | ||
| 673 | pmu->cancel_txn(pmu); | ||
| 674 | return 0; | ||
| 675 | } | ||
| 667 | 676 | ||
| 668 | group_error: | 677 | group_error: | 
| 678 | if (txn) | ||
| 679 | pmu->cancel_txn(pmu); | ||
| 680 | |||
| 669 | /* | 681 | /* | 
| 670 | * Groups can be scheduled in as one unit only, so undo any | 682 | * Groups can be scheduled in as one unit only, so undo any | 
| 671 | * partial group before returning: | 683 | * partial group before returning: | 
| @@ -1367,6 +1379,8 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
| 1367 | if (cpuctx->task_ctx == ctx) | 1379 | if (cpuctx->task_ctx == ctx) | 
| 1368 | return; | 1380 | return; | 
| 1369 | 1381 | ||
| 1382 | perf_disable(); | ||
| 1383 | |||
| 1370 | /* | 1384 | /* | 
| 1371 | * We want to keep the following priority order: | 1385 | * We want to keep the following priority order: | 
| 1372 | * cpu pinned (that don't need to move), task pinned, | 1386 | * cpu pinned (that don't need to move), task pinned, | 
| @@ -1379,6 +1393,8 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
| 1379 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | 1393 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | 
| 1380 | 1394 | ||
| 1381 | cpuctx->task_ctx = ctx; | 1395 | cpuctx->task_ctx = ctx; | 
| 1396 | |||
| 1397 | perf_enable(); | ||
| 1382 | } | 1398 | } | 
| 1383 | 1399 | ||
| 1384 | #define MAX_INTERRUPTS (~0ULL) | 1400 | #define MAX_INTERRUPTS (~0ULL) | 
| @@ -1856,9 +1872,30 @@ int perf_event_release_kernel(struct perf_event *event) | |||
| 1856 | { | 1872 | { | 
| 1857 | struct perf_event_context *ctx = event->ctx; | 1873 | struct perf_event_context *ctx = event->ctx; | 
| 1858 | 1874 | ||
| 1875 | /* | ||
| 1876 | * Remove from the PMU, can't get re-enabled since we got | ||
| 1877 | * here because the last ref went. | ||
| 1878 | */ | ||
| 1879 | perf_event_disable(event); | ||
| 1880 | |||
| 1859 | WARN_ON_ONCE(ctx->parent_ctx); | 1881 | WARN_ON_ONCE(ctx->parent_ctx); | 
| 1860 | mutex_lock(&ctx->mutex); | 1882 | /* | 
| 1861 | perf_event_remove_from_context(event); | 1883 | * There are two ways this annotation is useful: | 
| 1884 | * | ||
| 1885 | * 1) there is a lock recursion from perf_event_exit_task | ||
| 1886 | * see the comment there. | ||
| 1887 | * | ||
| 1888 | * 2) there is a lock-inversion with mmap_sem through | ||
| 1889 | * perf_event_read_group(), which takes faults while | ||
| 1890 | * holding ctx->mutex, however this is called after | ||
| 1891 | * the last filedesc died, so there is no possibility | ||
| 1892 | * to trigger the AB-BA case. | ||
| 1893 | */ | ||
| 1894 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | ||
| 1895 | raw_spin_lock_irq(&ctx->lock); | ||
| 1896 | list_del_event(event, ctx); | ||
| 1897 | perf_destroy_group(event, ctx); | ||
| 1898 | raw_spin_unlock_irq(&ctx->lock); | ||
| 1862 | mutex_unlock(&ctx->mutex); | 1899 | mutex_unlock(&ctx->mutex); | 
| 1863 | 1900 | ||
| 1864 | mutex_lock(&event->owner->perf_event_mutex); | 1901 | mutex_lock(&event->owner->perf_event_mutex); | 
| @@ -2642,6 +2679,7 @@ static int perf_fasync(int fd, struct file *filp, int on) | |||
| 2642 | } | 2679 | } | 
| 2643 | 2680 | ||
| 2644 | static const struct file_operations perf_fops = { | 2681 | static const struct file_operations perf_fops = { | 
| 2682 | .llseek = no_llseek, | ||
| 2645 | .release = perf_release, | 2683 | .release = perf_release, | 
| 2646 | .read = perf_read, | 2684 | .read = perf_read, | 
| 2647 | .poll = perf_poll, | 2685 | .poll = perf_poll, | 
| @@ -2792,6 +2830,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
| 2792 | 2830 | ||
| 2793 | 2831 | ||
| 2794 | /* | 2832 | /* | 
| 2833 | * We assume there is only KVM supporting the callbacks. | ||
| 2834 | * Later on, we might change it to a list if there is | ||
| 2835 | * another virtualization implementation supporting the callbacks. | ||
| 2836 | */ | ||
| 2837 | struct perf_guest_info_callbacks *perf_guest_cbs; | ||
| 2838 | |||
| 2839 | int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) | ||
| 2840 | { | ||
| 2841 | perf_guest_cbs = cbs; | ||
| 2842 | return 0; | ||
| 2843 | } | ||
| 2844 | EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); | ||
| 2845 | |||
| 2846 | int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) | ||
| 2847 | { | ||
| 2848 | perf_guest_cbs = NULL; | ||
| 2849 | return 0; | ||
| 2850 | } | ||
| 2851 | EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); | ||
| 2852 | |||
| 2853 | /* | ||
| 2795 | * Output | 2854 | * Output | 
| 2796 | */ | 2855 | */ | 
| 2797 | static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, | 2856 | static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, | 
| @@ -3743,7 +3802,7 @@ void __perf_event_mmap(struct vm_area_struct *vma) | |||
| 3743 | .event_id = { | 3802 | .event_id = { | 
| 3744 | .header = { | 3803 | .header = { | 
| 3745 | .type = PERF_RECORD_MMAP, | 3804 | .type = PERF_RECORD_MMAP, | 
| 3746 | .misc = 0, | 3805 | .misc = PERF_RECORD_MISC_USER, | 
| 3747 | /* .size */ | 3806 | /* .size */ | 
| 3748 | }, | 3807 | }, | 
| 3749 | /* .pid */ | 3808 | /* .pid */ | 
| @@ -3961,36 +4020,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
| 3961 | perf_swevent_overflow(event, 0, nmi, data, regs); | 4020 | perf_swevent_overflow(event, 0, nmi, data, regs); | 
| 3962 | } | 4021 | } | 
| 3963 | 4022 | ||
| 3964 | static int perf_swevent_is_counting(struct perf_event *event) | ||
| 3965 | { | ||
| 3966 | /* | ||
| 3967 | * The event is active, we're good! | ||
| 3968 | */ | ||
| 3969 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
| 3970 | return 1; | ||
| 3971 | |||
| 3972 | /* | ||
| 3973 | * The event is off/error, not counting. | ||
| 3974 | */ | ||
| 3975 | if (event->state != PERF_EVENT_STATE_INACTIVE) | ||
| 3976 | return 0; | ||
| 3977 | |||
| 3978 | /* | ||
| 3979 | * The event is inactive, if the context is active | ||
| 3980 | * we're part of a group that didn't make it on the 'pmu', | ||
| 3981 | * not counting. | ||
| 3982 | */ | ||
| 3983 | if (event->ctx->is_active) | ||
| 3984 | return 0; | ||
| 3985 | |||
| 3986 | /* | ||
| 3987 | * We're inactive and the context is too, this means the | ||
| 3988 | * task is scheduled out, we're counting events that happen | ||
| 3989 | * to us, like migration events. | ||
| 3990 | */ | ||
| 3991 | return 1; | ||
| 3992 | } | ||
| 3993 | |||
| 3994 | static int perf_tp_event_match(struct perf_event *event, | 4023 | static int perf_tp_event_match(struct perf_event *event, | 
| 3995 | struct perf_sample_data *data); | 4024 | struct perf_sample_data *data); | 
| 3996 | 4025 | ||
| @@ -4014,12 +4043,6 @@ static int perf_swevent_match(struct perf_event *event, | |||
| 4014 | struct perf_sample_data *data, | 4043 | struct perf_sample_data *data, | 
| 4015 | struct pt_regs *regs) | 4044 | struct pt_regs *regs) | 
| 4016 | { | 4045 | { | 
| 4017 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
| 4018 | return 0; | ||
| 4019 | |||
| 4020 | if (!perf_swevent_is_counting(event)) | ||
| 4021 | return 0; | ||
| 4022 | |||
| 4023 | if (event->attr.type != type) | 4046 | if (event->attr.type != type) | 
| 4024 | return 0; | 4047 | return 0; | 
| 4025 | 4048 | ||
| @@ -4036,18 +4059,53 @@ static int perf_swevent_match(struct perf_event *event, | |||
| 4036 | return 1; | 4059 | return 1; | 
| 4037 | } | 4060 | } | 
| 4038 | 4061 | ||
| 4039 | static void perf_swevent_ctx_event(struct perf_event_context *ctx, | 4062 | static inline u64 swevent_hash(u64 type, u32 event_id) | 
| 4040 | enum perf_type_id type, | ||
| 4041 | u32 event_id, u64 nr, int nmi, | ||
| 4042 | struct perf_sample_data *data, | ||
| 4043 | struct pt_regs *regs) | ||
| 4044 | { | 4063 | { | 
| 4064 | u64 val = event_id | (type << 32); | ||
| 4065 | |||
| 4066 | return hash_64(val, SWEVENT_HLIST_BITS); | ||
| 4067 | } | ||
| 4068 | |||
| 4069 | static struct hlist_head * | ||
| 4070 | find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id) | ||
| 4071 | { | ||
| 4072 | u64 hash; | ||
| 4073 | struct swevent_hlist *hlist; | ||
| 4074 | |||
| 4075 | hash = swevent_hash(type, event_id); | ||
| 4076 | |||
| 4077 | hlist = rcu_dereference(ctx->swevent_hlist); | ||
| 4078 | if (!hlist) | ||
| 4079 | return NULL; | ||
| 4080 | |||
| 4081 | return &hlist->heads[hash]; | ||
| 4082 | } | ||
| 4083 | |||
| 4084 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | ||
| 4085 | u64 nr, int nmi, | ||
| 4086 | struct perf_sample_data *data, | ||
| 4087 | struct pt_regs *regs) | ||
| 4088 | { | ||
| 4089 | struct perf_cpu_context *cpuctx; | ||
| 4045 | struct perf_event *event; | 4090 | struct perf_event *event; | 
| 4091 | struct hlist_node *node; | ||
| 4092 | struct hlist_head *head; | ||
| 4046 | 4093 | ||
| 4047 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 4094 | cpuctx = &__get_cpu_var(perf_cpu_context); | 
| 4095 | |||
| 4096 | rcu_read_lock(); | ||
| 4097 | |||
| 4098 | head = find_swevent_head(cpuctx, type, event_id); | ||
| 4099 | |||
| 4100 | if (!head) | ||
| 4101 | goto end; | ||
| 4102 | |||
| 4103 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | ||
| 4048 | if (perf_swevent_match(event, type, event_id, data, regs)) | 4104 | if (perf_swevent_match(event, type, event_id, data, regs)) | 
| 4049 | perf_swevent_add(event, nr, nmi, data, regs); | 4105 | perf_swevent_add(event, nr, nmi, data, regs); | 
| 4050 | } | 4106 | } | 
| 4107 | end: | ||
| 4108 | rcu_read_unlock(); | ||
| 4051 | } | 4109 | } | 
| 4052 | 4110 | ||
| 4053 | int perf_swevent_get_recursion_context(void) | 4111 | int perf_swevent_get_recursion_context(void) | 
| @@ -4085,27 +4143,6 @@ void perf_swevent_put_recursion_context(int rctx) | |||
| 4085 | } | 4143 | } | 
| 4086 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | 4144 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | 
| 4087 | 4145 | ||
| 4088 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | ||
| 4089 | u64 nr, int nmi, | ||
| 4090 | struct perf_sample_data *data, | ||
| 4091 | struct pt_regs *regs) | ||
| 4092 | { | ||
| 4093 | struct perf_cpu_context *cpuctx; | ||
| 4094 | struct perf_event_context *ctx; | ||
| 4095 | |||
| 4096 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 4097 | rcu_read_lock(); | ||
| 4098 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, | ||
| 4099 | nr, nmi, data, regs); | ||
| 4100 | /* | ||
| 4101 | * doesn't really matter which of the child contexts the | ||
| 4102 | * events ends up in. | ||
| 4103 | */ | ||
| 4104 | ctx = rcu_dereference(current->perf_event_ctxp); | ||
| 4105 | if (ctx) | ||
| 4106 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); | ||
| 4107 | rcu_read_unlock(); | ||
| 4108 | } | ||
| 4109 | 4146 | ||
| 4110 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4147 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 
| 4111 | struct pt_regs *regs, u64 addr) | 4148 | struct pt_regs *regs, u64 addr) | 
| @@ -4131,16 +4168,28 @@ static void perf_swevent_read(struct perf_event *event) | |||
| 4131 | static int perf_swevent_enable(struct perf_event *event) | 4168 | static int perf_swevent_enable(struct perf_event *event) | 
| 4132 | { | 4169 | { | 
| 4133 | struct hw_perf_event *hwc = &event->hw; | 4170 | struct hw_perf_event *hwc = &event->hw; | 
| 4171 | struct perf_cpu_context *cpuctx; | ||
| 4172 | struct hlist_head *head; | ||
| 4173 | |||
| 4174 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 4134 | 4175 | ||
| 4135 | if (hwc->sample_period) { | 4176 | if (hwc->sample_period) { | 
| 4136 | hwc->last_period = hwc->sample_period; | 4177 | hwc->last_period = hwc->sample_period; | 
| 4137 | perf_swevent_set_period(event); | 4178 | perf_swevent_set_period(event); | 
| 4138 | } | 4179 | } | 
| 4180 | |||
| 4181 | head = find_swevent_head(cpuctx, event->attr.type, event->attr.config); | ||
| 4182 | if (WARN_ON_ONCE(!head)) | ||
| 4183 | return -EINVAL; | ||
| 4184 | |||
| 4185 | hlist_add_head_rcu(&event->hlist_entry, head); | ||
| 4186 | |||
| 4139 | return 0; | 4187 | return 0; | 
| 4140 | } | 4188 | } | 
| 4141 | 4189 | ||
| 4142 | static void perf_swevent_disable(struct perf_event *event) | 4190 | static void perf_swevent_disable(struct perf_event *event) | 
| 4143 | { | 4191 | { | 
| 4192 | hlist_del_rcu(&event->hlist_entry); | ||
| 4144 | } | 4193 | } | 
| 4145 | 4194 | ||
| 4146 | static const struct pmu perf_ops_generic = { | 4195 | static const struct pmu perf_ops_generic = { | 
| @@ -4168,15 +4217,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
| 4168 | perf_sample_data_init(&data, 0); | 4217 | perf_sample_data_init(&data, 0); | 
| 4169 | data.period = event->hw.last_period; | 4218 | data.period = event->hw.last_period; | 
| 4170 | regs = get_irq_regs(); | 4219 | regs = get_irq_regs(); | 
| 4171 | /* | ||
| 4172 | * In case we exclude kernel IPs or are somehow not in interrupt | ||
| 4173 | * context, provide the next best thing, the user IP. | ||
| 4174 | */ | ||
| 4175 | if ((event->attr.exclude_kernel || !regs) && | ||
| 4176 | !event->attr.exclude_user) | ||
| 4177 | regs = task_pt_regs(current); | ||
| 4178 | 4220 | ||
| 4179 | if (regs) { | 4221 | if (regs && !perf_exclude_event(event, regs)) { | 
| 4180 | if (!(event->attr.exclude_idle && current->pid == 0)) | 4222 | if (!(event->attr.exclude_idle && current->pid == 0)) | 
| 4181 | if (perf_event_overflow(event, 0, &data, regs)) | 4223 | if (perf_event_overflow(event, 0, &data, regs)) | 
| 4182 | ret = HRTIMER_NORESTART; | 4224 | ret = HRTIMER_NORESTART; | 
| @@ -4324,6 +4366,105 @@ static const struct pmu perf_ops_task_clock = { | |||
| 4324 | .read = task_clock_perf_event_read, | 4366 | .read = task_clock_perf_event_read, | 
| 4325 | }; | 4367 | }; | 
| 4326 | 4368 | ||
| 4369 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | ||
| 4370 | { | ||
| 4371 | struct swevent_hlist *hlist; | ||
| 4372 | |||
| 4373 | hlist = container_of(rcu_head, struct swevent_hlist, rcu_head); | ||
| 4374 | kfree(hlist); | ||
| 4375 | } | ||
| 4376 | |||
| 4377 | static void swevent_hlist_release(struct perf_cpu_context *cpuctx) | ||
| 4378 | { | ||
| 4379 | struct swevent_hlist *hlist; | ||
| 4380 | |||
| 4381 | if (!cpuctx->swevent_hlist) | ||
| 4382 | return; | ||
| 4383 | |||
| 4384 | hlist = cpuctx->swevent_hlist; | ||
| 4385 | rcu_assign_pointer(cpuctx->swevent_hlist, NULL); | ||
| 4386 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); | ||
| 4387 | } | ||
| 4388 | |||
| 4389 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) | ||
| 4390 | { | ||
| 4391 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
| 4392 | |||
| 4393 | mutex_lock(&cpuctx->hlist_mutex); | ||
| 4394 | |||
| 4395 | if (!--cpuctx->hlist_refcount) | ||
| 4396 | swevent_hlist_release(cpuctx); | ||
| 4397 | |||
| 4398 | mutex_unlock(&cpuctx->hlist_mutex); | ||
| 4399 | } | ||
| 4400 | |||
| 4401 | static void swevent_hlist_put(struct perf_event *event) | ||
| 4402 | { | ||
| 4403 | int cpu; | ||
| 4404 | |||
| 4405 | if (event->cpu != -1) { | ||
| 4406 | swevent_hlist_put_cpu(event, event->cpu); | ||
| 4407 | return; | ||
| 4408 | } | ||
| 4409 | |||
| 4410 | for_each_possible_cpu(cpu) | ||
| 4411 | swevent_hlist_put_cpu(event, cpu); | ||
| 4412 | } | ||
| 4413 | |||
| 4414 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | ||
| 4415 | { | ||
| 4416 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
| 4417 | int err = 0; | ||
| 4418 | |||
| 4419 | mutex_lock(&cpuctx->hlist_mutex); | ||
| 4420 | |||
| 4421 | if (!cpuctx->swevent_hlist && cpu_online(cpu)) { | ||
| 4422 | struct swevent_hlist *hlist; | ||
| 4423 | |||
| 4424 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | ||
| 4425 | if (!hlist) { | ||
| 4426 | err = -ENOMEM; | ||
| 4427 | goto exit; | ||
| 4428 | } | ||
| 4429 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | ||
| 4430 | } | ||
| 4431 | cpuctx->hlist_refcount++; | ||
| 4432 | exit: | ||
| 4433 | mutex_unlock(&cpuctx->hlist_mutex); | ||
| 4434 | |||
| 4435 | return err; | ||
| 4436 | } | ||
| 4437 | |||
| 4438 | static int swevent_hlist_get(struct perf_event *event) | ||
| 4439 | { | ||
| 4440 | int err; | ||
| 4441 | int cpu, failed_cpu; | ||
| 4442 | |||
| 4443 | if (event->cpu != -1) | ||
| 4444 | return swevent_hlist_get_cpu(event, event->cpu); | ||
| 4445 | |||
| 4446 | get_online_cpus(); | ||
| 4447 | for_each_possible_cpu(cpu) { | ||
| 4448 | err = swevent_hlist_get_cpu(event, cpu); | ||
| 4449 | if (err) { | ||
| 4450 | failed_cpu = cpu; | ||
| 4451 | goto fail; | ||
| 4452 | } | ||
| 4453 | } | ||
| 4454 | put_online_cpus(); | ||
| 4455 | |||
| 4456 | return 0; | ||
| 4457 | fail: | ||
| 4458 | for_each_possible_cpu(cpu) { | ||
| 4459 | if (cpu == failed_cpu) | ||
| 4460 | break; | ||
| 4461 | swevent_hlist_put_cpu(event, cpu); | ||
| 4462 | } | ||
| 4463 | |||
| 4464 | put_online_cpus(); | ||
| 4465 | return err; | ||
| 4466 | } | ||
| 4467 | |||
| 4327 | #ifdef CONFIG_EVENT_TRACING | 4468 | #ifdef CONFIG_EVENT_TRACING | 
| 4328 | 4469 | ||
| 4329 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4470 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 
| @@ -4357,10 +4498,13 @@ static int perf_tp_event_match(struct perf_event *event, | |||
| 4357 | static void tp_perf_event_destroy(struct perf_event *event) | 4498 | static void tp_perf_event_destroy(struct perf_event *event) | 
| 4358 | { | 4499 | { | 
| 4359 | perf_trace_disable(event->attr.config); | 4500 | perf_trace_disable(event->attr.config); | 
| 4501 | swevent_hlist_put(event); | ||
| 4360 | } | 4502 | } | 
| 4361 | 4503 | ||
| 4362 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4504 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 
| 4363 | { | 4505 | { | 
| 4506 | int err; | ||
| 4507 | |||
| 4364 | /* | 4508 | /* | 
| 4365 | * Raw tracepoint data is a severe data leak, only allow root to | 4509 | * Raw tracepoint data is a severe data leak, only allow root to | 
| 4366 | * have these. | 4510 | * have these. | 
| @@ -4374,6 +4518,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
| 4374 | return NULL; | 4518 | return NULL; | 
| 4375 | 4519 | ||
| 4376 | event->destroy = tp_perf_event_destroy; | 4520 | event->destroy = tp_perf_event_destroy; | 
| 4521 | err = swevent_hlist_get(event); | ||
| 4522 | if (err) { | ||
| 4523 | perf_trace_disable(event->attr.config); | ||
| 4524 | return ERR_PTR(err); | ||
| 4525 | } | ||
| 4377 | 4526 | ||
| 4378 | return &perf_ops_generic; | 4527 | return &perf_ops_generic; | 
| 4379 | } | 4528 | } | 
| @@ -4474,6 +4623,7 @@ static void sw_perf_event_destroy(struct perf_event *event) | |||
| 4474 | WARN_ON(event->parent); | 4623 | WARN_ON(event->parent); | 
| 4475 | 4624 | ||
| 4476 | atomic_dec(&perf_swevent_enabled[event_id]); | 4625 | atomic_dec(&perf_swevent_enabled[event_id]); | 
| 4626 | swevent_hlist_put(event); | ||
| 4477 | } | 4627 | } | 
| 4478 | 4628 | ||
| 4479 | static const struct pmu *sw_perf_event_init(struct perf_event *event) | 4629 | static const struct pmu *sw_perf_event_init(struct perf_event *event) | 
| @@ -4512,6 +4662,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
| 4512 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | 4662 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | 
| 4513 | case PERF_COUNT_SW_EMULATION_FAULTS: | 4663 | case PERF_COUNT_SW_EMULATION_FAULTS: | 
| 4514 | if (!event->parent) { | 4664 | if (!event->parent) { | 
| 4665 | int err; | ||
| 4666 | |||
| 4667 | err = swevent_hlist_get(event); | ||
| 4668 | if (err) | ||
| 4669 | return ERR_PTR(err); | ||
| 4670 | |||
| 4515 | atomic_inc(&perf_swevent_enabled[event_id]); | 4671 | atomic_inc(&perf_swevent_enabled[event_id]); | 
| 4516 | event->destroy = sw_perf_event_destroy; | 4672 | event->destroy = sw_perf_event_destroy; | 
| 4517 | } | 4673 | } | 
| @@ -5176,7 +5332,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
| 5176 | * | 5332 | * | 
| 5177 | * But since its the parent context it won't be the same instance. | 5333 | * But since its the parent context it won't be the same instance. | 
| 5178 | */ | 5334 | */ | 
| 5179 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5335 | mutex_lock(&child_ctx->mutex); | 
| 5180 | 5336 | ||
| 5181 | again: | 5337 | again: | 
| 5182 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, | 5338 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, | 
| @@ -5384,6 +5540,7 @@ static void __init perf_event_init_all_cpus(void) | |||
| 5384 | 5540 | ||
| 5385 | for_each_possible_cpu(cpu) { | 5541 | for_each_possible_cpu(cpu) { | 
| 5386 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 5542 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 
| 5543 | mutex_init(&cpuctx->hlist_mutex); | ||
| 5387 | __perf_event_init_context(&cpuctx->ctx, NULL); | 5544 | __perf_event_init_context(&cpuctx->ctx, NULL); | 
| 5388 | } | 5545 | } | 
| 5389 | } | 5546 | } | 
| @@ -5397,6 +5554,16 @@ static void __cpuinit perf_event_init_cpu(int cpu) | |||
| 5397 | spin_lock(&perf_resource_lock); | 5554 | spin_lock(&perf_resource_lock); | 
| 5398 | cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; | 5555 | cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; | 
| 5399 | spin_unlock(&perf_resource_lock); | 5556 | spin_unlock(&perf_resource_lock); | 
| 5557 | |||
| 5558 | mutex_lock(&cpuctx->hlist_mutex); | ||
| 5559 | if (cpuctx->hlist_refcount > 0) { | ||
| 5560 | struct swevent_hlist *hlist; | ||
| 5561 | |||
| 5562 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | ||
| 5563 | WARN_ON_ONCE(!hlist); | ||
| 5564 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | ||
| 5565 | } | ||
| 5566 | mutex_unlock(&cpuctx->hlist_mutex); | ||
| 5400 | } | 5567 | } | 
| 5401 | 5568 | ||
| 5402 | #ifdef CONFIG_HOTPLUG_CPU | 5569 | #ifdef CONFIG_HOTPLUG_CPU | 
| @@ -5416,6 +5583,10 @@ static void perf_event_exit_cpu(int cpu) | |||
| 5416 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 5583 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 
| 5417 | struct perf_event_context *ctx = &cpuctx->ctx; | 5584 | struct perf_event_context *ctx = &cpuctx->ctx; | 
| 5418 | 5585 | ||
| 5586 | mutex_lock(&cpuctx->hlist_mutex); | ||
| 5587 | swevent_hlist_release(cpuctx); | ||
| 5588 | mutex_unlock(&cpuctx->hlist_mutex); | ||
| 5589 | |||
| 5419 | mutex_lock(&ctx->mutex); | 5590 | mutex_lock(&ctx->mutex); | 
| 5420 | smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); | 5591 | smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); | 
| 5421 | mutex_unlock(&ctx->mutex); | 5592 | mutex_unlock(&ctx->mutex); | 
