aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c172
1 files changed, 141 insertions, 31 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 06682e7b12e2..83d8fd991c86 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event,
674 675
675 event->tstamp_running += ctx->time - event->tstamp_stopped; 676 event->tstamp_running += ctx->time - event->tstamp_stopped;
676 677
678 event->shadow_ctx_time = ctx->time - ctx->timestamp;
679
677 if (!is_software_event(event)) 680 if (!is_software_event(event))
678 cpuctx->active_oncpu++; 681 cpuctx->active_oncpu++;
679 ctx->nr_active++; 682 ctx->nr_active++;
@@ -1284,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
1284{ 1287{
1285 int ctxn; 1288 int ctxn;
1286 1289
1287 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1288
1289 for_each_task_context_nr(ctxn) 1290 for_each_task_context_nr(ctxn)
1290 perf_event_context_sched_out(task, ctxn, next); 1291 perf_event_context_sched_out(task, ctxn, next);
1291} 1292}
@@ -1619,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
1619{ 1620{
1620 raw_spin_lock(&ctx->lock); 1621 raw_spin_lock(&ctx->lock);
1621 1622
1622 /* Rotate the first entry last of non-pinned groups */ 1623 /*
1623 list_rotate_left(&ctx->flexible_groups); 1624 * Rotate the first entry last of non-pinned groups. Rotation might be
1625 * disabled by the inheritance code.
1626 */
1627 if (!ctx->rotate_disable)
1628 list_rotate_left(&ctx->flexible_groups);
1624 1629
1625 raw_spin_unlock(&ctx->lock); 1630 raw_spin_unlock(&ctx->lock);
1626} 1631}
@@ -2232,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
2232 raw_spin_unlock_irq(&ctx->lock); 2237 raw_spin_unlock_irq(&ctx->lock);
2233 mutex_unlock(&ctx->mutex); 2238 mutex_unlock(&ctx->mutex);
2234 2239
2235 mutex_lock(&event->owner->perf_event_mutex);
2236 list_del_init(&event->owner_entry);
2237 mutex_unlock(&event->owner->perf_event_mutex);
2238 put_task_struct(event->owner);
2239
2240 free_event(event); 2240 free_event(event);
2241 2241
2242 return 0; 2242 return 0;
@@ -2249,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2249static int perf_release(struct inode *inode, struct file *file) 2249static int perf_release(struct inode *inode, struct file *file)
2250{ 2250{
2251 struct perf_event *event = file->private_data; 2251 struct perf_event *event = file->private_data;
2252 struct task_struct *owner;
2252 2253
2253 file->private_data = NULL; 2254 file->private_data = NULL;
2254 2255
2256 rcu_read_lock();
2257 owner = ACCESS_ONCE(event->owner);
2258 /*
2259 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2260 * !owner it means the list deletion is complete and we can indeed
2261 * free this event, otherwise we need to serialize on
2262 * owner->perf_event_mutex.
2263 */
2264 smp_read_barrier_depends();
2265 if (owner) {
2266 /*
2267 * Since delayed_put_task_struct() also drops the last
2268 * task reference we can safely take a new reference
2269 * while holding the rcu_read_lock().
2270 */
2271 get_task_struct(owner);
2272 }
2273 rcu_read_unlock();
2274
2275 if (owner) {
2276 mutex_lock(&owner->perf_event_mutex);
2277 /*
2278 * We have to re-check the event->owner field, if it is cleared
2279 * we raced with perf_event_exit_task(), acquiring the mutex
2280 * ensured they're done, and we can proceed with freeing the
2281 * event.
2282 */
2283 if (event->owner)
2284 list_del_init(&event->owner_entry);
2285 mutex_unlock(&owner->perf_event_mutex);
2286 put_task_struct(owner);
2287 }
2288
2255 return perf_event_release_kernel(event); 2289 return perf_event_release_kernel(event);
2256} 2290}
2257 2291
@@ -3396,7 +3430,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3396} 3430}
3397 3431
3398static void perf_output_read_one(struct perf_output_handle *handle, 3432static void perf_output_read_one(struct perf_output_handle *handle,
3399 struct perf_event *event) 3433 struct perf_event *event,
3434 u64 enabled, u64 running)
3400{ 3435{
3401 u64 read_format = event->attr.read_format; 3436 u64 read_format = event->attr.read_format;
3402 u64 values[4]; 3437 u64 values[4];
@@ -3404,11 +3439,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3404 3439
3405 values[n++] = perf_event_count(event); 3440 values[n++] = perf_event_count(event);
3406 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3441 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3407 values[n++] = event->total_time_enabled + 3442 values[n++] = enabled +
3408 atomic64_read(&event->child_total_time_enabled); 3443 atomic64_read(&event->child_total_time_enabled);
3409 } 3444 }
3410 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 3445 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3411 values[n++] = event->total_time_running + 3446 values[n++] = running +
3412 atomic64_read(&event->child_total_time_running); 3447 atomic64_read(&event->child_total_time_running);
3413 } 3448 }
3414 if (read_format & PERF_FORMAT_ID) 3449 if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3456,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3421 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. 3456 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
3422 */ 3457 */
3423static void perf_output_read_group(struct perf_output_handle *handle, 3458static void perf_output_read_group(struct perf_output_handle *handle,
3424 struct perf_event *event) 3459 struct perf_event *event,
3460 u64 enabled, u64 running)
3425{ 3461{
3426 struct perf_event *leader = event->group_leader, *sub; 3462 struct perf_event *leader = event->group_leader, *sub;
3427 u64 read_format = event->attr.read_format; 3463 u64 read_format = event->attr.read_format;
@@ -3431,10 +3467,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3431 values[n++] = 1 + leader->nr_siblings; 3467 values[n++] = 1 + leader->nr_siblings;
3432 3468
3433 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 3469 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3434 values[n++] = leader->total_time_enabled; 3470 values[n++] = enabled;
3435 3471
3436 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 3472 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3437 values[n++] = leader->total_time_running; 3473 values[n++] = running;
3438 3474
3439 if (leader != event) 3475 if (leader != event)
3440 leader->pmu->read(leader); 3476 leader->pmu->read(leader);
@@ -3459,13 +3495,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3459 } 3495 }
3460} 3496}
3461 3497
3498#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
3499 PERF_FORMAT_TOTAL_TIME_RUNNING)
3500
3462static void perf_output_read(struct perf_output_handle *handle, 3501static void perf_output_read(struct perf_output_handle *handle,
3463 struct perf_event *event) 3502 struct perf_event *event)
3464{ 3503{
3504 u64 enabled = 0, running = 0, now, ctx_time;
3505 u64 read_format = event->attr.read_format;
3506
3507 /*
3508 * compute total_time_enabled, total_time_running
3509 * based on snapshot values taken when the event
3510 * was last scheduled in.
3511 *
3512 * we cannot simply called update_context_time()
3513 * because of locking issue as we are called in
3514 * NMI context
3515 */
3516 if (read_format & PERF_FORMAT_TOTAL_TIMES) {
3517 now = perf_clock();
3518 ctx_time = event->shadow_ctx_time + now;
3519 enabled = ctx_time - event->tstamp_enabled;
3520 running = ctx_time - event->tstamp_running;
3521 }
3522
3465 if (event->attr.read_format & PERF_FORMAT_GROUP) 3523 if (event->attr.read_format & PERF_FORMAT_GROUP)
3466 perf_output_read_group(handle, event); 3524 perf_output_read_group(handle, event, enabled, running);
3467 else 3525 else
3468 perf_output_read_one(handle, event); 3526 perf_output_read_one(handle, event, enabled, running);
3469} 3527}
3470 3528
3471void perf_output_sample(struct perf_output_handle *handle, 3529void perf_output_sample(struct perf_output_handle *handle,
@@ -3766,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3766 rcu_read_lock(); 3824 rcu_read_lock();
3767 list_for_each_entry_rcu(pmu, &pmus, entry) { 3825 list_for_each_entry_rcu(pmu, &pmus, entry) {
3768 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3826 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3827 if (cpuctx->active_pmu != pmu)
3828 goto next;
3769 perf_event_task_ctx(&cpuctx->ctx, task_event); 3829 perf_event_task_ctx(&cpuctx->ctx, task_event);
3770 3830
3771 ctx = task_event->task_ctx; 3831 ctx = task_event->task_ctx;
@@ -3901,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3901 rcu_read_lock(); 3961 rcu_read_lock();
3902 list_for_each_entry_rcu(pmu, &pmus, entry) { 3962 list_for_each_entry_rcu(pmu, &pmus, entry) {
3903 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3963 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3964 if (cpuctx->active_pmu != pmu)
3965 goto next;
3904 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3966 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3905 3967
3906 ctxn = pmu->task_ctx_nr; 3968 ctxn = pmu->task_ctx_nr;
@@ -4086,6 +4148,8 @@ got_name:
4086 rcu_read_lock(); 4148 rcu_read_lock();
4087 list_for_each_entry_rcu(pmu, &pmus, entry) { 4149 list_for_each_entry_rcu(pmu, &pmus, entry) {
4088 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4150 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4151 if (cpuctx->active_pmu != pmu)
4152 goto next;
4089 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, 4153 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
4090 vma->vm_flags & VM_EXEC); 4154 vma->vm_flags & VM_EXEC);
4091 4155
@@ -4655,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event)
4655 break; 4719 break;
4656 } 4720 }
4657 4721
4658 if (event_id > PERF_COUNT_SW_MAX) 4722 if (event_id >= PERF_COUNT_SW_MAX)
4659 return -ENOENT; 4723 return -ENOENT;
4660 4724
4661 if (!event->parent) { 4725 if (!event->parent) {
@@ -5087,20 +5151,36 @@ static void *find_pmu_context(int ctxn)
5087 return NULL; 5151 return NULL;
5088} 5152}
5089 5153
5090static void free_pmu_context(void * __percpu cpu_context) 5154static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
5091{ 5155{
5092 struct pmu *pmu; 5156 int cpu;
5157
5158 for_each_possible_cpu(cpu) {
5159 struct perf_cpu_context *cpuctx;
5160
5161 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5162
5163 if (cpuctx->active_pmu == old_pmu)
5164 cpuctx->active_pmu = pmu;
5165 }
5166}
5167
5168static void free_pmu_context(struct pmu *pmu)
5169{
5170 struct pmu *i;
5093 5171
5094 mutex_lock(&pmus_lock); 5172 mutex_lock(&pmus_lock);
5095 /* 5173 /*
5096 * Like a real lame refcount. 5174 * Like a real lame refcount.
5097 */ 5175 */
5098 list_for_each_entry(pmu, &pmus, entry) { 5176 list_for_each_entry(i, &pmus, entry) {
5099 if (pmu->pmu_cpu_context == cpu_context) 5177 if (i->pmu_cpu_context == pmu->pmu_cpu_context) {
5178 update_pmu_context(i, pmu);
5100 goto out; 5179 goto out;
5180 }
5101 } 5181 }
5102 5182
5103 free_percpu(cpu_context); 5183 free_percpu(pmu->pmu_cpu_context);
5104out: 5184out:
5105 mutex_unlock(&pmus_lock); 5185 mutex_unlock(&pmus_lock);
5106} 5186}
@@ -5132,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu)
5132 cpuctx->ctx.pmu = pmu; 5212 cpuctx->ctx.pmu = pmu;
5133 cpuctx->jiffies_interval = 1; 5213 cpuctx->jiffies_interval = 1;
5134 INIT_LIST_HEAD(&cpuctx->rotation_list); 5214 INIT_LIST_HEAD(&cpuctx->rotation_list);
5215 cpuctx->active_pmu = pmu;
5135 } 5216 }
5136 5217
5137got_cpu_context: 5218got_cpu_context:
@@ -5183,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu)
5183 synchronize_rcu(); 5264 synchronize_rcu();
5184 5265
5185 free_percpu(pmu->pmu_disable_count); 5266 free_percpu(pmu->pmu_disable_count);
5186 free_pmu_context(pmu->pmu_cpu_context); 5267 free_pmu_context(pmu);
5187} 5268}
5188 5269
5189struct pmu *perf_init_event(struct perf_event *event) 5270struct pmu *perf_init_event(struct perf_event *event)
@@ -5651,7 +5732,7 @@ SYSCALL_DEFINE5(perf_event_open,
5651 mutex_unlock(&ctx->mutex); 5732 mutex_unlock(&ctx->mutex);
5652 5733
5653 event->owner = current; 5734 event->owner = current;
5654 get_task_struct(current); 5735
5655 mutex_lock(&current->perf_event_mutex); 5736 mutex_lock(&current->perf_event_mutex);
5656 list_add_tail(&event->owner_entry, &current->perf_event_list); 5737 list_add_tail(&event->owner_entry, &current->perf_event_list);
5657 mutex_unlock(&current->perf_event_mutex); 5738 mutex_unlock(&current->perf_event_mutex);
@@ -5719,12 +5800,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5719 ++ctx->generation; 5800 ++ctx->generation;
5720 mutex_unlock(&ctx->mutex); 5801 mutex_unlock(&ctx->mutex);
5721 5802
5722 event->owner = current;
5723 get_task_struct(current);
5724 mutex_lock(&current->perf_event_mutex);
5725 list_add_tail(&event->owner_entry, &current->perf_event_list);
5726 mutex_unlock(&current->perf_event_mutex);
5727
5728 return event; 5803 return event;
5729 5804
5730err_free: 5805err_free:
@@ -5875,8 +5950,24 @@ again:
5875 */ 5950 */
5876void perf_event_exit_task(struct task_struct *child) 5951void perf_event_exit_task(struct task_struct *child)
5877{ 5952{
5953 struct perf_event *event, *tmp;
5878 int ctxn; 5954 int ctxn;
5879 5955
5956 mutex_lock(&child->perf_event_mutex);
5957 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5958 owner_entry) {
5959 list_del_init(&event->owner_entry);
5960
5961 /*
5962 * Ensure the list deletion is visible before we clear
5963 * the owner, closes a race against perf_release() where
5964 * we need to serialize on the owner->perf_event_mutex.
5965 */
5966 smp_wmb();
5967 event->owner = NULL;
5968 }
5969 mutex_unlock(&child->perf_event_mutex);
5970
5880 for_each_task_context_nr(ctxn) 5971 for_each_task_context_nr(ctxn)
5881 perf_event_exit_task_context(child, ctxn); 5972 perf_event_exit_task_context(child, ctxn);
5882} 5973}
@@ -6096,6 +6187,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6096 struct perf_event *event; 6187 struct perf_event *event;
6097 struct task_struct *parent = current; 6188 struct task_struct *parent = current;
6098 int inherited_all = 1; 6189 int inherited_all = 1;
6190 unsigned long flags;
6099 int ret = 0; 6191 int ret = 0;
6100 6192
6101 child->perf_event_ctxp[ctxn] = NULL; 6193 child->perf_event_ctxp[ctxn] = NULL;
@@ -6136,6 +6228,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6136 break; 6228 break;
6137 } 6229 }
6138 6230
6231 /*
6232 * We can't hold ctx->lock when iterating the ->flexible_group list due
6233 * to allocations, but we need to prevent rotation because
6234 * rotate_ctx() will change the list from interrupt context.
6235 */
6236 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6237 parent_ctx->rotate_disable = 1;
6238 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6239
6139 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6240 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
6140 ret = inherit_task_group(event, parent, parent_ctx, 6241 ret = inherit_task_group(event, parent, parent_ctx,
6141 child, ctxn, &inherited_all); 6242 child, ctxn, &inherited_all);
@@ -6143,6 +6244,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6143 break; 6244 break;
6144 } 6245 }
6145 6246
6247 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6248 parent_ctx->rotate_disable = 0;
6249 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6250
6146 child_ctx = child->perf_event_ctxp[ctxn]; 6251 child_ctx = child->perf_event_ctxp[ctxn];
6147 6252
6148 if (child_ctx && inherited_all) { 6253 if (child_ctx && inherited_all) {
@@ -6295,6 +6400,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6295 6400
6296void __init perf_event_init(void) 6401void __init perf_event_init(void)
6297{ 6402{
6403 int ret;
6404
6298 perf_event_init_all_cpus(); 6405 perf_event_init_all_cpus();
6299 init_srcu_struct(&pmus_srcu); 6406 init_srcu_struct(&pmus_srcu);
6300 perf_pmu_register(&perf_swevent); 6407 perf_pmu_register(&perf_swevent);
@@ -6302,4 +6409,7 @@ void __init perf_event_init(void)
6302 perf_pmu_register(&perf_task_clock); 6409 perf_pmu_register(&perf_task_clock);
6303 perf_tp_register(); 6410 perf_tp_register();
6304 perf_cpu_notifier(perf_cpu_notify); 6411 perf_cpu_notifier(perf_cpu_notify);
6412
6413 ret = init_hw_breakpoint();
6414 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6305} 6415}