diff options
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r-- | kernel/events/core.c | 158 |
1 files changed, 144 insertions, 14 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index b1c663593f5c..385f11d94105 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -47,6 +47,8 @@ | |||
47 | 47 | ||
48 | #include <asm/irq_regs.h> | 48 | #include <asm/irq_regs.h> |
49 | 49 | ||
50 | static struct workqueue_struct *perf_wq; | ||
51 | |||
50 | struct remote_function_call { | 52 | struct remote_function_call { |
51 | struct task_struct *p; | 53 | struct task_struct *p; |
52 | int (*func)(void *info); | 54 | int (*func)(void *info); |
@@ -120,6 +122,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | |||
120 | return data.ret; | 122 | return data.ret; |
121 | } | 123 | } |
122 | 124 | ||
125 | #define EVENT_OWNER_KERNEL ((void *) -1) | ||
126 | |||
127 | static bool is_kernel_event(struct perf_event *event) | ||
128 | { | ||
129 | return event->owner == EVENT_OWNER_KERNEL; | ||
130 | } | ||
131 | |||
123 | #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ | 132 | #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ |
124 | PERF_FLAG_FD_OUTPUT |\ | 133 | PERF_FLAG_FD_OUTPUT |\ |
125 | PERF_FLAG_PID_CGROUP |\ | 134 | PERF_FLAG_PID_CGROUP |\ |
@@ -1370,6 +1379,45 @@ out: | |||
1370 | perf_event__header_size(tmp); | 1379 | perf_event__header_size(tmp); |
1371 | } | 1380 | } |
1372 | 1381 | ||
1382 | /* | ||
1383 | * User event without the task. | ||
1384 | */ | ||
1385 | static bool is_orphaned_event(struct perf_event *event) | ||
1386 | { | ||
1387 | return event && !is_kernel_event(event) && !event->owner; | ||
1388 | } | ||
1389 | |||
1390 | /* | ||
1391 | * Event has a parent but parent's task finished and it's | ||
1392 | * alive only because of children holding refference. | ||
1393 | */ | ||
1394 | static bool is_orphaned_child(struct perf_event *event) | ||
1395 | { | ||
1396 | return is_orphaned_event(event->parent); | ||
1397 | } | ||
1398 | |||
1399 | static void orphans_remove_work(struct work_struct *work); | ||
1400 | |||
1401 | static void schedule_orphans_remove(struct perf_event_context *ctx) | ||
1402 | { | ||
1403 | if (!ctx->task || ctx->orphans_remove_sched || !perf_wq) | ||
1404 | return; | ||
1405 | |||
1406 | if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) { | ||
1407 | get_ctx(ctx); | ||
1408 | ctx->orphans_remove_sched = true; | ||
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | static int __init perf_workqueue_init(void) | ||
1413 | { | ||
1414 | perf_wq = create_singlethread_workqueue("perf"); | ||
1415 | WARN(!perf_wq, "failed to create perf workqueue\n"); | ||
1416 | return perf_wq ? 0 : -1; | ||
1417 | } | ||
1418 | |||
1419 | core_initcall(perf_workqueue_init); | ||
1420 | |||
1373 | static inline int | 1421 | static inline int |
1374 | event_filter_match(struct perf_event *event) | 1422 | event_filter_match(struct perf_event *event) |
1375 | { | 1423 | { |
@@ -1419,6 +1467,9 @@ event_sched_out(struct perf_event *event, | |||
1419 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 1467 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
1420 | cpuctx->exclusive = 0; | 1468 | cpuctx->exclusive = 0; |
1421 | 1469 | ||
1470 | if (is_orphaned_child(event)) | ||
1471 | schedule_orphans_remove(ctx); | ||
1472 | |||
1422 | perf_pmu_enable(event->pmu); | 1473 | perf_pmu_enable(event->pmu); |
1423 | } | 1474 | } |
1424 | 1475 | ||
@@ -1726,6 +1777,9 @@ event_sched_in(struct perf_event *event, | |||
1726 | if (event->attr.exclusive) | 1777 | if (event->attr.exclusive) |
1727 | cpuctx->exclusive = 1; | 1778 | cpuctx->exclusive = 1; |
1728 | 1779 | ||
1780 | if (is_orphaned_child(event)) | ||
1781 | schedule_orphans_remove(ctx); | ||
1782 | |||
1729 | out: | 1783 | out: |
1730 | perf_pmu_enable(event->pmu); | 1784 | perf_pmu_enable(event->pmu); |
1731 | 1785 | ||
@@ -2326,7 +2380,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2326 | next_parent = rcu_dereference(next_ctx->parent_ctx); | 2380 | next_parent = rcu_dereference(next_ctx->parent_ctx); |
2327 | 2381 | ||
2328 | /* If neither context have a parent context; they cannot be clones. */ | 2382 | /* If neither context have a parent context; they cannot be clones. */ |
2329 | if (!parent || !next_parent) | 2383 | if (!parent && !next_parent) |
2330 | goto unlock; | 2384 | goto unlock; |
2331 | 2385 | ||
2332 | if (next_parent == ctx || next_ctx == parent || next_parent == parent) { | 2386 | if (next_parent == ctx || next_ctx == parent || next_parent == parent) { |
@@ -3073,6 +3127,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) | |||
3073 | INIT_LIST_HEAD(&ctx->flexible_groups); | 3127 | INIT_LIST_HEAD(&ctx->flexible_groups); |
3074 | INIT_LIST_HEAD(&ctx->event_list); | 3128 | INIT_LIST_HEAD(&ctx->event_list); |
3075 | atomic_set(&ctx->refcount, 1); | 3129 | atomic_set(&ctx->refcount, 1); |
3130 | INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work); | ||
3076 | } | 3131 | } |
3077 | 3132 | ||
3078 | static struct perf_event_context * | 3133 | static struct perf_event_context * |
@@ -3318,16 +3373,12 @@ static void free_event(struct perf_event *event) | |||
3318 | } | 3373 | } |
3319 | 3374 | ||
3320 | /* | 3375 | /* |
3321 | * Called when the last reference to the file is gone. | 3376 | * Remove user event from the owner task. |
3322 | */ | 3377 | */ |
3323 | static void put_event(struct perf_event *event) | 3378 | static void perf_remove_from_owner(struct perf_event *event) |
3324 | { | 3379 | { |
3325 | struct perf_event_context *ctx = event->ctx; | ||
3326 | struct task_struct *owner; | 3380 | struct task_struct *owner; |
3327 | 3381 | ||
3328 | if (!atomic_long_dec_and_test(&event->refcount)) | ||
3329 | return; | ||
3330 | |||
3331 | rcu_read_lock(); | 3382 | rcu_read_lock(); |
3332 | owner = ACCESS_ONCE(event->owner); | 3383 | owner = ACCESS_ONCE(event->owner); |
3333 | /* | 3384 | /* |
@@ -3360,6 +3411,20 @@ static void put_event(struct perf_event *event) | |||
3360 | mutex_unlock(&owner->perf_event_mutex); | 3411 | mutex_unlock(&owner->perf_event_mutex); |
3361 | put_task_struct(owner); | 3412 | put_task_struct(owner); |
3362 | } | 3413 | } |
3414 | } | ||
3415 | |||
3416 | /* | ||
3417 | * Called when the last reference to the file is gone. | ||
3418 | */ | ||
3419 | static void put_event(struct perf_event *event) | ||
3420 | { | ||
3421 | struct perf_event_context *ctx = event->ctx; | ||
3422 | |||
3423 | if (!atomic_long_dec_and_test(&event->refcount)) | ||
3424 | return; | ||
3425 | |||
3426 | if (!is_kernel_event(event)) | ||
3427 | perf_remove_from_owner(event); | ||
3363 | 3428 | ||
3364 | WARN_ON_ONCE(ctx->parent_ctx); | 3429 | WARN_ON_ONCE(ctx->parent_ctx); |
3365 | /* | 3430 | /* |
@@ -3394,6 +3459,42 @@ static int perf_release(struct inode *inode, struct file *file) | |||
3394 | return 0; | 3459 | return 0; |
3395 | } | 3460 | } |
3396 | 3461 | ||
3462 | /* | ||
3463 | * Remove all orphanes events from the context. | ||
3464 | */ | ||
3465 | static void orphans_remove_work(struct work_struct *work) | ||
3466 | { | ||
3467 | struct perf_event_context *ctx; | ||
3468 | struct perf_event *event, *tmp; | ||
3469 | |||
3470 | ctx = container_of(work, struct perf_event_context, | ||
3471 | orphans_remove.work); | ||
3472 | |||
3473 | mutex_lock(&ctx->mutex); | ||
3474 | list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) { | ||
3475 | struct perf_event *parent_event = event->parent; | ||
3476 | |||
3477 | if (!is_orphaned_child(event)) | ||
3478 | continue; | ||
3479 | |||
3480 | perf_remove_from_context(event, true); | ||
3481 | |||
3482 | mutex_lock(&parent_event->child_mutex); | ||
3483 | list_del_init(&event->child_list); | ||
3484 | mutex_unlock(&parent_event->child_mutex); | ||
3485 | |||
3486 | free_event(event); | ||
3487 | put_event(parent_event); | ||
3488 | } | ||
3489 | |||
3490 | raw_spin_lock_irq(&ctx->lock); | ||
3491 | ctx->orphans_remove_sched = false; | ||
3492 | raw_spin_unlock_irq(&ctx->lock); | ||
3493 | mutex_unlock(&ctx->mutex); | ||
3494 | |||
3495 | put_ctx(ctx); | ||
3496 | } | ||
3497 | |||
3397 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 3498 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
3398 | { | 3499 | { |
3399 | struct perf_event *child; | 3500 | struct perf_event *child; |
@@ -3491,6 +3592,19 @@ static int perf_event_read_one(struct perf_event *event, | |||
3491 | return n * sizeof(u64); | 3592 | return n * sizeof(u64); |
3492 | } | 3593 | } |
3493 | 3594 | ||
3595 | static bool is_event_hup(struct perf_event *event) | ||
3596 | { | ||
3597 | bool no_children; | ||
3598 | |||
3599 | if (event->state != PERF_EVENT_STATE_EXIT) | ||
3600 | return false; | ||
3601 | |||
3602 | mutex_lock(&event->child_mutex); | ||
3603 | no_children = list_empty(&event->child_list); | ||
3604 | mutex_unlock(&event->child_mutex); | ||
3605 | return no_children; | ||
3606 | } | ||
3607 | |||
3494 | /* | 3608 | /* |
3495 | * Read the performance event - simple non blocking version for now | 3609 | * Read the performance event - simple non blocking version for now |
3496 | */ | 3610 | */ |
@@ -3532,7 +3646,12 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3532 | { | 3646 | { |
3533 | struct perf_event *event = file->private_data; | 3647 | struct perf_event *event = file->private_data; |
3534 | struct ring_buffer *rb; | 3648 | struct ring_buffer *rb; |
3535 | unsigned int events = POLL_HUP; | 3649 | unsigned int events = POLLHUP; |
3650 | |||
3651 | poll_wait(file, &event->waitq, wait); | ||
3652 | |||
3653 | if (is_event_hup(event)) | ||
3654 | return events; | ||
3536 | 3655 | ||
3537 | /* | 3656 | /* |
3538 | * Pin the event->rb by taking event->mmap_mutex; otherwise | 3657 | * Pin the event->rb by taking event->mmap_mutex; otherwise |
@@ -3543,9 +3662,6 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3543 | if (rb) | 3662 | if (rb) |
3544 | events = atomic_xchg(&rb->poll, 0); | 3663 | events = atomic_xchg(&rb->poll, 0); |
3545 | mutex_unlock(&event->mmap_mutex); | 3664 | mutex_unlock(&event->mmap_mutex); |
3546 | |||
3547 | poll_wait(file, &event->waitq, wait); | ||
3548 | |||
3549 | return events; | 3665 | return events; |
3550 | } | 3666 | } |
3551 | 3667 | ||
@@ -5809,7 +5925,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash) | |||
5809 | if (!hlist) | 5925 | if (!hlist) |
5810 | return; | 5926 | return; |
5811 | 5927 | ||
5812 | rcu_assign_pointer(swhash->swevent_hlist, NULL); | 5928 | RCU_INIT_POINTER(swhash->swevent_hlist, NULL); |
5813 | kfree_rcu(hlist, rcu_head); | 5929 | kfree_rcu(hlist, rcu_head); |
5814 | } | 5930 | } |
5815 | 5931 | ||
@@ -7392,6 +7508,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7392 | goto err; | 7508 | goto err; |
7393 | } | 7509 | } |
7394 | 7510 | ||
7511 | /* Mark owner so we could distinguish it from user events. */ | ||
7512 | event->owner = EVENT_OWNER_KERNEL; | ||
7513 | |||
7395 | account_event(event); | 7514 | account_event(event); |
7396 | 7515 | ||
7397 | ctx = find_get_context(event->pmu, task, cpu); | 7516 | ctx = find_get_context(event->pmu, task, cpu); |
@@ -7479,6 +7598,12 @@ static void sync_child_event(struct perf_event *child_event, | |||
7479 | mutex_unlock(&parent_event->child_mutex); | 7598 | mutex_unlock(&parent_event->child_mutex); |
7480 | 7599 | ||
7481 | /* | 7600 | /* |
7601 | * Make sure user/parent get notified, that we just | ||
7602 | * lost one event. | ||
7603 | */ | ||
7604 | perf_event_wakeup(parent_event); | ||
7605 | |||
7606 | /* | ||
7482 | * Release the parent event, if this was the last | 7607 | * Release the parent event, if this was the last |
7483 | * reference to it. | 7608 | * reference to it. |
7484 | */ | 7609 | */ |
@@ -7512,6 +7637,9 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
7512 | if (child_event->parent) { | 7637 | if (child_event->parent) { |
7513 | sync_child_event(child_event, child); | 7638 | sync_child_event(child_event, child); |
7514 | free_event(child_event); | 7639 | free_event(child_event); |
7640 | } else { | ||
7641 | child_event->state = PERF_EVENT_STATE_EXIT; | ||
7642 | perf_event_wakeup(child_event); | ||
7515 | } | 7643 | } |
7516 | } | 7644 | } |
7517 | 7645 | ||
@@ -7695,6 +7823,7 @@ inherit_event(struct perf_event *parent_event, | |||
7695 | struct perf_event *group_leader, | 7823 | struct perf_event *group_leader, |
7696 | struct perf_event_context *child_ctx) | 7824 | struct perf_event_context *child_ctx) |
7697 | { | 7825 | { |
7826 | enum perf_event_active_state parent_state = parent_event->state; | ||
7698 | struct perf_event *child_event; | 7827 | struct perf_event *child_event; |
7699 | unsigned long flags; | 7828 | unsigned long flags; |
7700 | 7829 | ||
@@ -7715,7 +7844,8 @@ inherit_event(struct perf_event *parent_event, | |||
7715 | if (IS_ERR(child_event)) | 7844 | if (IS_ERR(child_event)) |
7716 | return child_event; | 7845 | return child_event; |
7717 | 7846 | ||
7718 | if (!atomic_long_inc_not_zero(&parent_event->refcount)) { | 7847 | if (is_orphaned_event(parent_event) || |
7848 | !atomic_long_inc_not_zero(&parent_event->refcount)) { | ||
7719 | free_event(child_event); | 7849 | free_event(child_event); |
7720 | return NULL; | 7850 | return NULL; |
7721 | } | 7851 | } |
@@ -7727,7 +7857,7 @@ inherit_event(struct perf_event *parent_event, | |||
7727 | * not its attr.disabled bit. We hold the parent's mutex, | 7857 | * not its attr.disabled bit. We hold the parent's mutex, |
7728 | * so we won't race with perf_event_{en, dis}able_family. | 7858 | * so we won't race with perf_event_{en, dis}able_family. |
7729 | */ | 7859 | */ |
7730 | if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) | 7860 | if (parent_state >= PERF_EVENT_STATE_INACTIVE) |
7731 | child_event->state = PERF_EVENT_STATE_INACTIVE; | 7861 | child_event->state = PERF_EVENT_STATE_INACTIVE; |
7732 | else | 7862 | else |
7733 | child_event->state = PERF_EVENT_STATE_OFF; | 7863 | child_event->state = PERF_EVENT_STATE_OFF; |