diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 135 | ||||
-rw-r--r-- | kernel/kprobes.c | 13 |
2 files changed, 135 insertions, 13 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1cf24b3e42ec..d8cb4d21a346 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -46,6 +46,8 @@ | |||
46 | 46 | ||
47 | #include <asm/irq_regs.h> | 47 | #include <asm/irq_regs.h> |
48 | 48 | ||
49 | static struct workqueue_struct *perf_wq; | ||
50 | |||
49 | struct remote_function_call { | 51 | struct remote_function_call { |
50 | struct task_struct *p; | 52 | struct task_struct *p; |
51 | int (*func)(void *info); | 53 | int (*func)(void *info); |
@@ -119,6 +121,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | |||
119 | return data.ret; | 121 | return data.ret; |
120 | } | 122 | } |
121 | 123 | ||
124 | #define EVENT_OWNER_KERNEL ((void *) -1) | ||
125 | |||
126 | static bool is_kernel_event(struct perf_event *event) | ||
127 | { | ||
128 | return event->owner == EVENT_OWNER_KERNEL; | ||
129 | } | ||
130 | |||
122 | #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ | 131 | #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ |
123 | PERF_FLAG_FD_OUTPUT |\ | 132 | PERF_FLAG_FD_OUTPUT |\ |
124 | PERF_FLAG_PID_CGROUP |\ | 133 | PERF_FLAG_PID_CGROUP |\ |
@@ -1374,6 +1383,45 @@ out: | |||
1374 | perf_event__header_size(tmp); | 1383 | perf_event__header_size(tmp); |
1375 | } | 1384 | } |
1376 | 1385 | ||
1386 | /* | ||
1387 | * User event without the task. | ||
1388 | */ | ||
1389 | static bool is_orphaned_event(struct perf_event *event) | ||
1390 | { | ||
1391 | return event && !is_kernel_event(event) && !event->owner; | ||
1392 | } | ||
1393 | |||
1394 | /* | ||
1395 | * Event has a parent but parent's task finished and it's | ||
1396 | * alive only because of children holding refference. | ||
1397 | */ | ||
1398 | static bool is_orphaned_child(struct perf_event *event) | ||
1399 | { | ||
1400 | return is_orphaned_event(event->parent); | ||
1401 | } | ||
1402 | |||
1403 | static void orphans_remove_work(struct work_struct *work); | ||
1404 | |||
1405 | static void schedule_orphans_remove(struct perf_event_context *ctx) | ||
1406 | { | ||
1407 | if (!ctx->task || ctx->orphans_remove_sched || !perf_wq) | ||
1408 | return; | ||
1409 | |||
1410 | if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) { | ||
1411 | get_ctx(ctx); | ||
1412 | ctx->orphans_remove_sched = true; | ||
1413 | } | ||
1414 | } | ||
1415 | |||
1416 | static int __init perf_workqueue_init(void) | ||
1417 | { | ||
1418 | perf_wq = create_singlethread_workqueue("perf"); | ||
1419 | WARN(!perf_wq, "failed to create perf workqueue\n"); | ||
1420 | return perf_wq ? 0 : -1; | ||
1421 | } | ||
1422 | |||
1423 | core_initcall(perf_workqueue_init); | ||
1424 | |||
1377 | static inline int | 1425 | static inline int |
1378 | event_filter_match(struct perf_event *event) | 1426 | event_filter_match(struct perf_event *event) |
1379 | { | 1427 | { |
@@ -1423,6 +1471,9 @@ event_sched_out(struct perf_event *event, | |||
1423 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 1471 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
1424 | cpuctx->exclusive = 0; | 1472 | cpuctx->exclusive = 0; |
1425 | 1473 | ||
1474 | if (is_orphaned_child(event)) | ||
1475 | schedule_orphans_remove(ctx); | ||
1476 | |||
1426 | perf_pmu_enable(event->pmu); | 1477 | perf_pmu_enable(event->pmu); |
1427 | } | 1478 | } |
1428 | 1479 | ||
@@ -1725,6 +1776,9 @@ event_sched_in(struct perf_event *event, | |||
1725 | if (event->attr.exclusive) | 1776 | if (event->attr.exclusive) |
1726 | cpuctx->exclusive = 1; | 1777 | cpuctx->exclusive = 1; |
1727 | 1778 | ||
1779 | if (is_orphaned_child(event)) | ||
1780 | schedule_orphans_remove(ctx); | ||
1781 | |||
1728 | out: | 1782 | out: |
1729 | perf_pmu_enable(event->pmu); | 1783 | perf_pmu_enable(event->pmu); |
1730 | 1784 | ||
@@ -3067,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) | |||
3067 | INIT_LIST_HEAD(&ctx->flexible_groups); | 3121 | INIT_LIST_HEAD(&ctx->flexible_groups); |
3068 | INIT_LIST_HEAD(&ctx->event_list); | 3122 | INIT_LIST_HEAD(&ctx->event_list); |
3069 | atomic_set(&ctx->refcount, 1); | 3123 | atomic_set(&ctx->refcount, 1); |
3124 | INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work); | ||
3070 | } | 3125 | } |
3071 | 3126 | ||
3072 | static struct perf_event_context * | 3127 | static struct perf_event_context * |
@@ -3312,16 +3367,12 @@ static void free_event(struct perf_event *event) | |||
3312 | } | 3367 | } |
3313 | 3368 | ||
3314 | /* | 3369 | /* |
3315 | * Called when the last reference to the file is gone. | 3370 | * Remove user event from the owner task. |
3316 | */ | 3371 | */ |
3317 | static void put_event(struct perf_event *event) | 3372 | static void perf_remove_from_owner(struct perf_event *event) |
3318 | { | 3373 | { |
3319 | struct perf_event_context *ctx = event->ctx; | ||
3320 | struct task_struct *owner; | 3374 | struct task_struct *owner; |
3321 | 3375 | ||
3322 | if (!atomic_long_dec_and_test(&event->refcount)) | ||
3323 | return; | ||
3324 | |||
3325 | rcu_read_lock(); | 3376 | rcu_read_lock(); |
3326 | owner = ACCESS_ONCE(event->owner); | 3377 | owner = ACCESS_ONCE(event->owner); |
3327 | /* | 3378 | /* |
@@ -3354,6 +3405,20 @@ static void put_event(struct perf_event *event) | |||
3354 | mutex_unlock(&owner->perf_event_mutex); | 3405 | mutex_unlock(&owner->perf_event_mutex); |
3355 | put_task_struct(owner); | 3406 | put_task_struct(owner); |
3356 | } | 3407 | } |
3408 | } | ||
3409 | |||
3410 | /* | ||
3411 | * Called when the last reference to the file is gone. | ||
3412 | */ | ||
3413 | static void put_event(struct perf_event *event) | ||
3414 | { | ||
3415 | struct perf_event_context *ctx = event->ctx; | ||
3416 | |||
3417 | if (!atomic_long_dec_and_test(&event->refcount)) | ||
3418 | return; | ||
3419 | |||
3420 | if (!is_kernel_event(event)) | ||
3421 | perf_remove_from_owner(event); | ||
3357 | 3422 | ||
3358 | WARN_ON_ONCE(ctx->parent_ctx); | 3423 | WARN_ON_ONCE(ctx->parent_ctx); |
3359 | /* | 3424 | /* |
@@ -3388,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file) | |||
3388 | return 0; | 3453 | return 0; |
3389 | } | 3454 | } |
3390 | 3455 | ||
3456 | /* | ||
3457 | * Remove all orphanes events from the context. | ||
3458 | */ | ||
3459 | static void orphans_remove_work(struct work_struct *work) | ||
3460 | { | ||
3461 | struct perf_event_context *ctx; | ||
3462 | struct perf_event *event, *tmp; | ||
3463 | |||
3464 | ctx = container_of(work, struct perf_event_context, | ||
3465 | orphans_remove.work); | ||
3466 | |||
3467 | mutex_lock(&ctx->mutex); | ||
3468 | list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) { | ||
3469 | struct perf_event *parent_event = event->parent; | ||
3470 | |||
3471 | if (!is_orphaned_child(event)) | ||
3472 | continue; | ||
3473 | |||
3474 | perf_remove_from_context(event, true); | ||
3475 | |||
3476 | mutex_lock(&parent_event->child_mutex); | ||
3477 | list_del_init(&event->child_list); | ||
3478 | mutex_unlock(&parent_event->child_mutex); | ||
3479 | |||
3480 | free_event(event); | ||
3481 | put_event(parent_event); | ||
3482 | } | ||
3483 | |||
3484 | raw_spin_lock_irq(&ctx->lock); | ||
3485 | ctx->orphans_remove_sched = false; | ||
3486 | raw_spin_unlock_irq(&ctx->lock); | ||
3487 | mutex_unlock(&ctx->mutex); | ||
3488 | |||
3489 | put_ctx(ctx); | ||
3490 | } | ||
3491 | |||
3391 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 3492 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
3392 | { | 3493 | { |
3393 | struct perf_event *child; | 3494 | struct perf_event *child; |
@@ -3499,7 +3600,8 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
3499 | * error state (i.e. because it was pinned but it couldn't be | 3600 | * error state (i.e. because it was pinned but it couldn't be |
3500 | * scheduled on to the CPU at some point). | 3601 | * scheduled on to the CPU at some point). |
3501 | */ | 3602 | */ |
3502 | if (event->state == PERF_EVENT_STATE_ERROR) | 3603 | if ((event->state == PERF_EVENT_STATE_ERROR) || |
3604 | (event->state == PERF_EVENT_STATE_EXIT)) | ||
3503 | return 0; | 3605 | return 0; |
3504 | 3606 | ||
3505 | if (count < event->read_size) | 3607 | if (count < event->read_size) |
@@ -3526,7 +3628,12 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3526 | { | 3628 | { |
3527 | struct perf_event *event = file->private_data; | 3629 | struct perf_event *event = file->private_data; |
3528 | struct ring_buffer *rb; | 3630 | struct ring_buffer *rb; |
3529 | unsigned int events = POLL_HUP; | 3631 | unsigned int events = POLLHUP; |
3632 | |||
3633 | poll_wait(file, &event->waitq, wait); | ||
3634 | |||
3635 | if (event->state == PERF_EVENT_STATE_EXIT) | ||
3636 | return events; | ||
3530 | 3637 | ||
3531 | /* | 3638 | /* |
3532 | * Pin the event->rb by taking event->mmap_mutex; otherwise | 3639 | * Pin the event->rb by taking event->mmap_mutex; otherwise |
@@ -3537,9 +3644,6 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3537 | if (rb) | 3644 | if (rb) |
3538 | events = atomic_xchg(&rb->poll, 0); | 3645 | events = atomic_xchg(&rb->poll, 0); |
3539 | mutex_unlock(&event->mmap_mutex); | 3646 | mutex_unlock(&event->mmap_mutex); |
3540 | |||
3541 | poll_wait(file, &event->waitq, wait); | ||
3542 | |||
3543 | return events; | 3647 | return events; |
3544 | } | 3648 | } |
3545 | 3649 | ||
@@ -7366,6 +7470,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7366 | goto err; | 7470 | goto err; |
7367 | } | 7471 | } |
7368 | 7472 | ||
7473 | /* Mark owner so we could distinguish it from user events. */ | ||
7474 | event->owner = EVENT_OWNER_KERNEL; | ||
7475 | |||
7369 | account_event(event); | 7476 | account_event(event); |
7370 | 7477 | ||
7371 | ctx = find_get_context(event->pmu, task, cpu); | 7478 | ctx = find_get_context(event->pmu, task, cpu); |
@@ -7486,6 +7593,9 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
7486 | if (child_event->parent) { | 7593 | if (child_event->parent) { |
7487 | sync_child_event(child_event, child); | 7594 | sync_child_event(child_event, child); |
7488 | free_event(child_event); | 7595 | free_event(child_event); |
7596 | } else { | ||
7597 | child_event->state = PERF_EVENT_STATE_EXIT; | ||
7598 | perf_event_wakeup(child_event); | ||
7489 | } | 7599 | } |
7490 | } | 7600 | } |
7491 | 7601 | ||
@@ -7689,7 +7799,8 @@ inherit_event(struct perf_event *parent_event, | |||
7689 | if (IS_ERR(child_event)) | 7799 | if (IS_ERR(child_event)) |
7690 | return child_event; | 7800 | return child_event; |
7691 | 7801 | ||
7692 | if (!atomic_long_inc_not_zero(&parent_event->refcount)) { | 7802 | if (is_orphaned_event(parent_event) || |
7803 | !atomic_long_inc_not_zero(&parent_event->refcount)) { | ||
7693 | free_event(child_event); | 7804 | free_event(child_event); |
7694 | return NULL; | 7805 | return NULL; |
7695 | } | 7806 | } |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 734e9a7d280b..3995f546d0f3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -1778,7 +1778,18 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) | |||
1778 | unsigned long hash, flags = 0; | 1778 | unsigned long hash, flags = 0; |
1779 | struct kretprobe_instance *ri; | 1779 | struct kretprobe_instance *ri; |
1780 | 1780 | ||
1781 | /*TODO: consider to only swap the RA after the last pre_handler fired */ | 1781 | /* |
1782 | * To avoid deadlocks, prohibit return probing in NMI contexts, | ||
1783 | * just skip the probe and increase the (inexact) 'nmissed' | ||
1784 | * statistical counter, so that the user is informed that | ||
1785 | * something happened: | ||
1786 | */ | ||
1787 | if (unlikely(in_nmi())) { | ||
1788 | rp->nmissed++; | ||
1789 | return 0; | ||
1790 | } | ||
1791 | |||
1792 | /* TODO: consider to only swap the RA after the last pre_handler fired */ | ||
1782 | hash = hash_ptr(current, KPROBE_HASH_BITS); | 1793 | hash = hash_ptr(current, KPROBE_HASH_BITS); |
1783 | raw_spin_lock_irqsave(&rp->lock, flags); | 1794 | raw_spin_lock_irqsave(&rp->lock, flags); |
1784 | if (!hlist_empty(&rp->free_instances)) { | 1795 | if (!hlist_empty(&rp->free_instances)) { |