aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c283
1 files changed, 211 insertions, 72 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2f3fbf84215a..9dbe8cdaf145 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -16,6 +16,7 @@
16#include <linux/file.h> 16#include <linux/file.h>
17#include <linux/poll.h> 17#include <linux/poll.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/hash.h>
19#include <linux/sysfs.h> 20#include <linux/sysfs.h>
20#include <linux/dcache.h> 21#include <linux/dcache.h>
21#include <linux/percpu.h> 22#include <linux/percpu.h>
@@ -1367,6 +1368,8 @@ void perf_event_task_sched_in(struct task_struct *task)
1367 if (cpuctx->task_ctx == ctx) 1368 if (cpuctx->task_ctx == ctx)
1368 return; 1369 return;
1369 1370
1371 perf_disable();
1372
1370 /* 1373 /*
1371 * We want to keep the following priority order: 1374 * We want to keep the following priority order:
1372 * cpu pinned (that don't need to move), task pinned, 1375 * cpu pinned (that don't need to move), task pinned,
@@ -1379,6 +1382,8 @@ void perf_event_task_sched_in(struct task_struct *task)
1379 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); 1382 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
1380 1383
1381 cpuctx->task_ctx = ctx; 1384 cpuctx->task_ctx = ctx;
1385
1386 perf_enable();
1382} 1387}
1383 1388
1384#define MAX_INTERRUPTS (~0ULL) 1389#define MAX_INTERRUPTS (~0ULL)
@@ -2642,6 +2647,7 @@ static int perf_fasync(int fd, struct file *filp, int on)
2642} 2647}
2643 2648
2644static const struct file_operations perf_fops = { 2649static const struct file_operations perf_fops = {
2650 .llseek = no_llseek,
2645 .release = perf_release, 2651 .release = perf_release,
2646 .read = perf_read, 2652 .read = perf_read,
2647 .poll = perf_poll, 2653 .poll = perf_poll,
@@ -2792,6 +2798,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
2792 2798
2793 2799
2794/* 2800/*
2801 * We assume there is only KVM supporting the callbacks.
2802 * Later on, we might change it to a list if there is
2803 * another virtualization implementation supporting the callbacks.
2804 */
2805struct perf_guest_info_callbacks *perf_guest_cbs;
2806
2807int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
2808{
2809 perf_guest_cbs = cbs;
2810 return 0;
2811}
2812EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
2813
2814int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
2815{
2816 perf_guest_cbs = NULL;
2817 return 0;
2818}
2819EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
2820
2821/*
2795 * Output 2822 * Output
2796 */ 2823 */
2797static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, 2824static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
@@ -3743,7 +3770,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)
3743 .event_id = { 3770 .event_id = {
3744 .header = { 3771 .header = {
3745 .type = PERF_RECORD_MMAP, 3772 .type = PERF_RECORD_MMAP,
3746 .misc = 0, 3773 .misc = PERF_RECORD_MISC_USER,
3747 /* .size */ 3774 /* .size */
3748 }, 3775 },
3749 /* .pid */ 3776 /* .pid */
@@ -3961,36 +3988,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3961 perf_swevent_overflow(event, 0, nmi, data, regs); 3988 perf_swevent_overflow(event, 0, nmi, data, regs);
3962} 3989}
3963 3990
3964static int perf_swevent_is_counting(struct perf_event *event)
3965{
3966 /*
3967 * The event is active, we're good!
3968 */
3969 if (event->state == PERF_EVENT_STATE_ACTIVE)
3970 return 1;
3971
3972 /*
3973 * The event is off/error, not counting.
3974 */
3975 if (event->state != PERF_EVENT_STATE_INACTIVE)
3976 return 0;
3977
3978 /*
3979 * The event is inactive, if the context is active
3980 * we're part of a group that didn't make it on the 'pmu',
3981 * not counting.
3982 */
3983 if (event->ctx->is_active)
3984 return 0;
3985
3986 /*
3987 * We're inactive and the context is too, this means the
3988 * task is scheduled out, we're counting events that happen
3989 * to us, like migration events.
3990 */
3991 return 1;
3992}
3993
3994static int perf_tp_event_match(struct perf_event *event, 3991static int perf_tp_event_match(struct perf_event *event,
3995 struct perf_sample_data *data); 3992 struct perf_sample_data *data);
3996 3993
@@ -4014,12 +4011,6 @@ static int perf_swevent_match(struct perf_event *event,
4014 struct perf_sample_data *data, 4011 struct perf_sample_data *data,
4015 struct pt_regs *regs) 4012 struct pt_regs *regs)
4016{ 4013{
4017 if (event->cpu != -1 && event->cpu != smp_processor_id())
4018 return 0;
4019
4020 if (!perf_swevent_is_counting(event))
4021 return 0;
4022
4023 if (event->attr.type != type) 4014 if (event->attr.type != type)
4024 return 0; 4015 return 0;
4025 4016
@@ -4036,18 +4027,53 @@ static int perf_swevent_match(struct perf_event *event,
4036 return 1; 4027 return 1;
4037} 4028}
4038 4029
4039static void perf_swevent_ctx_event(struct perf_event_context *ctx, 4030static inline u64 swevent_hash(u64 type, u32 event_id)
4040 enum perf_type_id type, 4031{
4041 u32 event_id, u64 nr, int nmi, 4032 u64 val = event_id | (type << 32);
4042 struct perf_sample_data *data, 4033
4043 struct pt_regs *regs) 4034 return hash_64(val, SWEVENT_HLIST_BITS);
4035}
4036
4037static struct hlist_head *
4038find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id)
4044{ 4039{
4040 u64 hash;
4041 struct swevent_hlist *hlist;
4042
4043 hash = swevent_hash(type, event_id);
4044
4045 hlist = rcu_dereference(ctx->swevent_hlist);
4046 if (!hlist)
4047 return NULL;
4048
4049 return &hlist->heads[hash];
4050}
4051
4052static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4053 u64 nr, int nmi,
4054 struct perf_sample_data *data,
4055 struct pt_regs *regs)
4056{
4057 struct perf_cpu_context *cpuctx;
4045 struct perf_event *event; 4058 struct perf_event *event;
4059 struct hlist_node *node;
4060 struct hlist_head *head;
4046 4061
4047 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 4062 cpuctx = &__get_cpu_var(perf_cpu_context);
4063
4064 rcu_read_lock();
4065
4066 head = find_swevent_head(cpuctx, type, event_id);
4067
4068 if (!head)
4069 goto end;
4070
4071 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4048 if (perf_swevent_match(event, type, event_id, data, regs)) 4072 if (perf_swevent_match(event, type, event_id, data, regs))
4049 perf_swevent_add(event, nr, nmi, data, regs); 4073 perf_swevent_add(event, nr, nmi, data, regs);
4050 } 4074 }
4075end:
4076 rcu_read_unlock();
4051} 4077}
4052 4078
4053int perf_swevent_get_recursion_context(void) 4079int perf_swevent_get_recursion_context(void)
@@ -4085,27 +4111,6 @@ void perf_swevent_put_recursion_context(int rctx)
4085} 4111}
4086EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); 4112EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
4087 4113
4088static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4089 u64 nr, int nmi,
4090 struct perf_sample_data *data,
4091 struct pt_regs *regs)
4092{
4093 struct perf_cpu_context *cpuctx;
4094 struct perf_event_context *ctx;
4095
4096 cpuctx = &__get_cpu_var(perf_cpu_context);
4097 rcu_read_lock();
4098 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
4099 nr, nmi, data, regs);
4100 /*
4101 * doesn't really matter which of the child contexts the
4102 * events ends up in.
4103 */
4104 ctx = rcu_dereference(current->perf_event_ctxp);
4105 if (ctx)
4106 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
4107 rcu_read_unlock();
4108}
4109 4114
4110void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4115void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4111 struct pt_regs *regs, u64 addr) 4116 struct pt_regs *regs, u64 addr)
@@ -4131,16 +4136,28 @@ static void perf_swevent_read(struct perf_event *event)
4131static int perf_swevent_enable(struct perf_event *event) 4136static int perf_swevent_enable(struct perf_event *event)
4132{ 4137{
4133 struct hw_perf_event *hwc = &event->hw; 4138 struct hw_perf_event *hwc = &event->hw;
4139 struct perf_cpu_context *cpuctx;
4140 struct hlist_head *head;
4141
4142 cpuctx = &__get_cpu_var(perf_cpu_context);
4134 4143
4135 if (hwc->sample_period) { 4144 if (hwc->sample_period) {
4136 hwc->last_period = hwc->sample_period; 4145 hwc->last_period = hwc->sample_period;
4137 perf_swevent_set_period(event); 4146 perf_swevent_set_period(event);
4138 } 4147 }
4148
4149 head = find_swevent_head(cpuctx, event->attr.type, event->attr.config);
4150 if (WARN_ON_ONCE(!head))
4151 return -EINVAL;
4152
4153 hlist_add_head_rcu(&event->hlist_entry, head);
4154
4139 return 0; 4155 return 0;
4140} 4156}
4141 4157
4142static void perf_swevent_disable(struct perf_event *event) 4158static void perf_swevent_disable(struct perf_event *event)
4143{ 4159{
4160 hlist_del_rcu(&event->hlist_entry);
4144} 4161}
4145 4162
4146static const struct pmu perf_ops_generic = { 4163static const struct pmu perf_ops_generic = {
@@ -4168,15 +4185,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4168 perf_sample_data_init(&data, 0); 4185 perf_sample_data_init(&data, 0);
4169 data.period = event->hw.last_period; 4186 data.period = event->hw.last_period;
4170 regs = get_irq_regs(); 4187 regs = get_irq_regs();
4171 /*
4172 * In case we exclude kernel IPs or are somehow not in interrupt
4173 * context, provide the next best thing, the user IP.
4174 */
4175 if ((event->attr.exclude_kernel || !regs) &&
4176 !event->attr.exclude_user)
4177 regs = task_pt_regs(current);
4178 4188
4179 if (regs) { 4189 if (regs && !perf_exclude_event(event, regs)) {
4180 if (!(event->attr.exclude_idle && current->pid == 0)) 4190 if (!(event->attr.exclude_idle && current->pid == 0))
4181 if (perf_event_overflow(event, 0, &data, regs)) 4191 if (perf_event_overflow(event, 0, &data, regs))
4182 ret = HRTIMER_NORESTART; 4192 ret = HRTIMER_NORESTART;
@@ -4324,6 +4334,105 @@ static const struct pmu perf_ops_task_clock = {
4324 .read = task_clock_perf_event_read, 4334 .read = task_clock_perf_event_read,
4325}; 4335};
4326 4336
4337static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4338{
4339 struct swevent_hlist *hlist;
4340
4341 hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
4342 kfree(hlist);
4343}
4344
4345static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
4346{
4347 struct swevent_hlist *hlist;
4348
4349 if (!cpuctx->swevent_hlist)
4350 return;
4351
4352 hlist = cpuctx->swevent_hlist;
4353 rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
4354 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
4355}
4356
4357static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
4358{
4359 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
4360
4361 mutex_lock(&cpuctx->hlist_mutex);
4362
4363 if (!--cpuctx->hlist_refcount)
4364 swevent_hlist_release(cpuctx);
4365
4366 mutex_unlock(&cpuctx->hlist_mutex);
4367}
4368
4369static void swevent_hlist_put(struct perf_event *event)
4370{
4371 int cpu;
4372
4373 if (event->cpu != -1) {
4374 swevent_hlist_put_cpu(event, event->cpu);
4375 return;
4376 }
4377
4378 for_each_possible_cpu(cpu)
4379 swevent_hlist_put_cpu(event, cpu);
4380}
4381
4382static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4383{
4384 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
4385 int err = 0;
4386
4387 mutex_lock(&cpuctx->hlist_mutex);
4388
4389 if (!cpuctx->swevent_hlist && cpu_online(cpu)) {
4390 struct swevent_hlist *hlist;
4391
4392 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
4393 if (!hlist) {
4394 err = -ENOMEM;
4395 goto exit;
4396 }
4397 rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
4398 }
4399 cpuctx->hlist_refcount++;
4400 exit:
4401 mutex_unlock(&cpuctx->hlist_mutex);
4402
4403 return err;
4404}
4405
4406static int swevent_hlist_get(struct perf_event *event)
4407{
4408 int err;
4409 int cpu, failed_cpu;
4410
4411 if (event->cpu != -1)
4412 return swevent_hlist_get_cpu(event, event->cpu);
4413
4414 get_online_cpus();
4415 for_each_possible_cpu(cpu) {
4416 err = swevent_hlist_get_cpu(event, cpu);
4417 if (err) {
4418 failed_cpu = cpu;
4419 goto fail;
4420 }
4421 }
4422 put_online_cpus();
4423
4424 return 0;
4425 fail:
4426 for_each_possible_cpu(cpu) {
4427 if (cpu == failed_cpu)
4428 break;
4429 swevent_hlist_put_cpu(event, cpu);
4430 }
4431
4432 put_online_cpus();
4433 return err;
4434}
4435
4327#ifdef CONFIG_EVENT_TRACING 4436#ifdef CONFIG_EVENT_TRACING
4328 4437
4329void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4438void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
@@ -4357,10 +4466,13 @@ static int perf_tp_event_match(struct perf_event *event,
4357static void tp_perf_event_destroy(struct perf_event *event) 4466static void tp_perf_event_destroy(struct perf_event *event)
4358{ 4467{
4359 perf_trace_disable(event->attr.config); 4468 perf_trace_disable(event->attr.config);
4469 swevent_hlist_put(event);
4360} 4470}
4361 4471
4362static const struct pmu *tp_perf_event_init(struct perf_event *event) 4472static const struct pmu *tp_perf_event_init(struct perf_event *event)
4363{ 4473{
4474 int err;
4475
4364 /* 4476 /*
4365 * Raw tracepoint data is a severe data leak, only allow root to 4477 * Raw tracepoint data is a severe data leak, only allow root to
4366 * have these. 4478 * have these.
@@ -4374,6 +4486,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4374 return NULL; 4486 return NULL;
4375 4487
4376 event->destroy = tp_perf_event_destroy; 4488 event->destroy = tp_perf_event_destroy;
4489 err = swevent_hlist_get(event);
4490 if (err) {
4491 perf_trace_disable(event->attr.config);
4492 return ERR_PTR(err);
4493 }
4377 4494
4378 return &perf_ops_generic; 4495 return &perf_ops_generic;
4379} 4496}
@@ -4474,6 +4591,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
4474 WARN_ON(event->parent); 4591 WARN_ON(event->parent);
4475 4592
4476 atomic_dec(&perf_swevent_enabled[event_id]); 4593 atomic_dec(&perf_swevent_enabled[event_id]);
4594 swevent_hlist_put(event);
4477} 4595}
4478 4596
4479static const struct pmu *sw_perf_event_init(struct perf_event *event) 4597static const struct pmu *sw_perf_event_init(struct perf_event *event)
@@ -4512,6 +4630,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4512 case PERF_COUNT_SW_ALIGNMENT_FAULTS: 4630 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4513 case PERF_COUNT_SW_EMULATION_FAULTS: 4631 case PERF_COUNT_SW_EMULATION_FAULTS:
4514 if (!event->parent) { 4632 if (!event->parent) {
4633 int err;
4634
4635 err = swevent_hlist_get(event);
4636 if (err)
4637 return ERR_PTR(err);
4638
4515 atomic_inc(&perf_swevent_enabled[event_id]); 4639 atomic_inc(&perf_swevent_enabled[event_id]);
4516 event->destroy = sw_perf_event_destroy; 4640 event->destroy = sw_perf_event_destroy;
4517 } 4641 }
@@ -5384,6 +5508,7 @@ static void __init perf_event_init_all_cpus(void)
5384 5508
5385 for_each_possible_cpu(cpu) { 5509 for_each_possible_cpu(cpu) {
5386 cpuctx = &per_cpu(perf_cpu_context, cpu); 5510 cpuctx = &per_cpu(perf_cpu_context, cpu);
5511 mutex_init(&cpuctx->hlist_mutex);
5387 __perf_event_init_context(&cpuctx->ctx, NULL); 5512 __perf_event_init_context(&cpuctx->ctx, NULL);
5388 } 5513 }
5389} 5514}
@@ -5397,6 +5522,16 @@ static void __cpuinit perf_event_init_cpu(int cpu)
5397 spin_lock(&perf_resource_lock); 5522 spin_lock(&perf_resource_lock);
5398 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; 5523 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
5399 spin_unlock(&perf_resource_lock); 5524 spin_unlock(&perf_resource_lock);
5525
5526 mutex_lock(&cpuctx->hlist_mutex);
5527 if (cpuctx->hlist_refcount > 0) {
5528 struct swevent_hlist *hlist;
5529
5530 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
5531 WARN_ON_ONCE(!hlist);
5532 rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
5533 }
5534 mutex_unlock(&cpuctx->hlist_mutex);
5400} 5535}
5401 5536
5402#ifdef CONFIG_HOTPLUG_CPU 5537#ifdef CONFIG_HOTPLUG_CPU
@@ -5416,6 +5551,10 @@ static void perf_event_exit_cpu(int cpu)
5416 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 5551 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
5417 struct perf_event_context *ctx = &cpuctx->ctx; 5552 struct perf_event_context *ctx = &cpuctx->ctx;
5418 5553
5554 mutex_lock(&cpuctx->hlist_mutex);
5555 swevent_hlist_release(cpuctx);
5556 mutex_unlock(&cpuctx->hlist_mutex);
5557
5419 mutex_lock(&ctx->mutex); 5558 mutex_lock(&ctx->mutex);
5420 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); 5559 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
5421 mutex_unlock(&ctx->mutex); 5560 mutex_unlock(&ctx->mutex);