aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c135
-rw-r--r--kernel/kprobes.c13
2 files changed, 135 insertions, 13 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cf24b3e42ec..d8cb4d21a346 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,8 @@
46 46
47#include <asm/irq_regs.h> 47#include <asm/irq_regs.h>
48 48
49static struct workqueue_struct *perf_wq;
50
49struct remote_function_call { 51struct remote_function_call {
50 struct task_struct *p; 52 struct task_struct *p;
51 int (*func)(void *info); 53 int (*func)(void *info);
@@ -119,6 +121,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
119 return data.ret; 121 return data.ret;
120} 122}
121 123
124#define EVENT_OWNER_KERNEL ((void *) -1)
125
126static bool is_kernel_event(struct perf_event *event)
127{
128 return event->owner == EVENT_OWNER_KERNEL;
129}
130
122#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ 131#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
123 PERF_FLAG_FD_OUTPUT |\ 132 PERF_FLAG_FD_OUTPUT |\
124 PERF_FLAG_PID_CGROUP |\ 133 PERF_FLAG_PID_CGROUP |\
@@ -1374,6 +1383,45 @@ out:
1374 perf_event__header_size(tmp); 1383 perf_event__header_size(tmp);
1375} 1384}
1376 1385
1386/*
1387 * User event without the task.
1388 */
1389static bool is_orphaned_event(struct perf_event *event)
1390{
1391 return event && !is_kernel_event(event) && !event->owner;
1392}
1393
1394/*
1395 * Event has a parent but parent's task finished and it's
1396 * alive only because of children holding refference.
1397 */
1398static bool is_orphaned_child(struct perf_event *event)
1399{
1400 return is_orphaned_event(event->parent);
1401}
1402
1403static void orphans_remove_work(struct work_struct *work);
1404
1405static void schedule_orphans_remove(struct perf_event_context *ctx)
1406{
1407 if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
1408 return;
1409
1410 if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
1411 get_ctx(ctx);
1412 ctx->orphans_remove_sched = true;
1413 }
1414}
1415
1416static int __init perf_workqueue_init(void)
1417{
1418 perf_wq = create_singlethread_workqueue("perf");
1419 WARN(!perf_wq, "failed to create perf workqueue\n");
1420 return perf_wq ? 0 : -1;
1421}
1422
1423core_initcall(perf_workqueue_init);
1424
1377static inline int 1425static inline int
1378event_filter_match(struct perf_event *event) 1426event_filter_match(struct perf_event *event)
1379{ 1427{
@@ -1423,6 +1471,9 @@ event_sched_out(struct perf_event *event,
1423 if (event->attr.exclusive || !cpuctx->active_oncpu) 1471 if (event->attr.exclusive || !cpuctx->active_oncpu)
1424 cpuctx->exclusive = 0; 1472 cpuctx->exclusive = 0;
1425 1473
1474 if (is_orphaned_child(event))
1475 schedule_orphans_remove(ctx);
1476
1426 perf_pmu_enable(event->pmu); 1477 perf_pmu_enable(event->pmu);
1427} 1478}
1428 1479
@@ -1725,6 +1776,9 @@ event_sched_in(struct perf_event *event,
1725 if (event->attr.exclusive) 1776 if (event->attr.exclusive)
1726 cpuctx->exclusive = 1; 1777 cpuctx->exclusive = 1;
1727 1778
1779 if (is_orphaned_child(event))
1780 schedule_orphans_remove(ctx);
1781
1728out: 1782out:
1729 perf_pmu_enable(event->pmu); 1783 perf_pmu_enable(event->pmu);
1730 1784
@@ -3067,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3067 INIT_LIST_HEAD(&ctx->flexible_groups); 3121 INIT_LIST_HEAD(&ctx->flexible_groups);
3068 INIT_LIST_HEAD(&ctx->event_list); 3122 INIT_LIST_HEAD(&ctx->event_list);
3069 atomic_set(&ctx->refcount, 1); 3123 atomic_set(&ctx->refcount, 1);
3124 INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
3070} 3125}
3071 3126
3072static struct perf_event_context * 3127static struct perf_event_context *
@@ -3312,16 +3367,12 @@ static void free_event(struct perf_event *event)
3312} 3367}
3313 3368
3314/* 3369/*
3315 * Called when the last reference to the file is gone. 3370 * Remove user event from the owner task.
3316 */ 3371 */
3317static void put_event(struct perf_event *event) 3372static void perf_remove_from_owner(struct perf_event *event)
3318{ 3373{
3319 struct perf_event_context *ctx = event->ctx;
3320 struct task_struct *owner; 3374 struct task_struct *owner;
3321 3375
3322 if (!atomic_long_dec_and_test(&event->refcount))
3323 return;
3324
3325 rcu_read_lock(); 3376 rcu_read_lock();
3326 owner = ACCESS_ONCE(event->owner); 3377 owner = ACCESS_ONCE(event->owner);
3327 /* 3378 /*
@@ -3354,6 +3405,20 @@ static void put_event(struct perf_event *event)
3354 mutex_unlock(&owner->perf_event_mutex); 3405 mutex_unlock(&owner->perf_event_mutex);
3355 put_task_struct(owner); 3406 put_task_struct(owner);
3356 } 3407 }
3408}
3409
3410/*
3411 * Called when the last reference to the file is gone.
3412 */
3413static void put_event(struct perf_event *event)
3414{
3415 struct perf_event_context *ctx = event->ctx;
3416
3417 if (!atomic_long_dec_and_test(&event->refcount))
3418 return;
3419
3420 if (!is_kernel_event(event))
3421 perf_remove_from_owner(event);
3357 3422
3358 WARN_ON_ONCE(ctx->parent_ctx); 3423 WARN_ON_ONCE(ctx->parent_ctx);
3359 /* 3424 /*
@@ -3388,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file)
3388 return 0; 3453 return 0;
3389} 3454}
3390 3455
3456/*
3457 * Remove all orphanes events from the context.
3458 */
3459static void orphans_remove_work(struct work_struct *work)
3460{
3461 struct perf_event_context *ctx;
3462 struct perf_event *event, *tmp;
3463
3464 ctx = container_of(work, struct perf_event_context,
3465 orphans_remove.work);
3466
3467 mutex_lock(&ctx->mutex);
3468 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
3469 struct perf_event *parent_event = event->parent;
3470
3471 if (!is_orphaned_child(event))
3472 continue;
3473
3474 perf_remove_from_context(event, true);
3475
3476 mutex_lock(&parent_event->child_mutex);
3477 list_del_init(&event->child_list);
3478 mutex_unlock(&parent_event->child_mutex);
3479
3480 free_event(event);
3481 put_event(parent_event);
3482 }
3483
3484 raw_spin_lock_irq(&ctx->lock);
3485 ctx->orphans_remove_sched = false;
3486 raw_spin_unlock_irq(&ctx->lock);
3487 mutex_unlock(&ctx->mutex);
3488
3489 put_ctx(ctx);
3490}
3491
3391u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 3492u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
3392{ 3493{
3393 struct perf_event *child; 3494 struct perf_event *child;
@@ -3499,7 +3600,8 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
3499 * error state (i.e. because it was pinned but it couldn't be 3600 * error state (i.e. because it was pinned but it couldn't be
3500 * scheduled on to the CPU at some point). 3601 * scheduled on to the CPU at some point).
3501 */ 3602 */
3502 if (event->state == PERF_EVENT_STATE_ERROR) 3603 if ((event->state == PERF_EVENT_STATE_ERROR) ||
3604 (event->state == PERF_EVENT_STATE_EXIT))
3503 return 0; 3605 return 0;
3504 3606
3505 if (count < event->read_size) 3607 if (count < event->read_size)
@@ -3526,7 +3628,12 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
3526{ 3628{
3527 struct perf_event *event = file->private_data; 3629 struct perf_event *event = file->private_data;
3528 struct ring_buffer *rb; 3630 struct ring_buffer *rb;
3529 unsigned int events = POLL_HUP; 3631 unsigned int events = POLLHUP;
3632
3633 poll_wait(file, &event->waitq, wait);
3634
3635 if (event->state == PERF_EVENT_STATE_EXIT)
3636 return events;
3530 3637
3531 /* 3638 /*
3532 * Pin the event->rb by taking event->mmap_mutex; otherwise 3639 * Pin the event->rb by taking event->mmap_mutex; otherwise
@@ -3537,9 +3644,6 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
3537 if (rb) 3644 if (rb)
3538 events = atomic_xchg(&rb->poll, 0); 3645 events = atomic_xchg(&rb->poll, 0);
3539 mutex_unlock(&event->mmap_mutex); 3646 mutex_unlock(&event->mmap_mutex);
3540
3541 poll_wait(file, &event->waitq, wait);
3542
3543 return events; 3647 return events;
3544} 3648}
3545 3649
@@ -7366,6 +7470,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
7366 goto err; 7470 goto err;
7367 } 7471 }
7368 7472
7473 /* Mark owner so we could distinguish it from user events. */
7474 event->owner = EVENT_OWNER_KERNEL;
7475
7369 account_event(event); 7476 account_event(event);
7370 7477
7371 ctx = find_get_context(event->pmu, task, cpu); 7478 ctx = find_get_context(event->pmu, task, cpu);
@@ -7486,6 +7593,9 @@ __perf_event_exit_task(struct perf_event *child_event,
7486 if (child_event->parent) { 7593 if (child_event->parent) {
7487 sync_child_event(child_event, child); 7594 sync_child_event(child_event, child);
7488 free_event(child_event); 7595 free_event(child_event);
7596 } else {
7597 child_event->state = PERF_EVENT_STATE_EXIT;
7598 perf_event_wakeup(child_event);
7489 } 7599 }
7490} 7600}
7491 7601
@@ -7689,7 +7799,8 @@ inherit_event(struct perf_event *parent_event,
7689 if (IS_ERR(child_event)) 7799 if (IS_ERR(child_event))
7690 return child_event; 7800 return child_event;
7691 7801
7692 if (!atomic_long_inc_not_zero(&parent_event->refcount)) { 7802 if (is_orphaned_event(parent_event) ||
7803 !atomic_long_inc_not_zero(&parent_event->refcount)) {
7693 free_event(child_event); 7804 free_event(child_event);
7694 return NULL; 7805 return NULL;
7695 } 7806 }
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 734e9a7d280b..3995f546d0f3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1778,7 +1778,18 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
1778 unsigned long hash, flags = 0; 1778 unsigned long hash, flags = 0;
1779 struct kretprobe_instance *ri; 1779 struct kretprobe_instance *ri;
1780 1780
1781 /*TODO: consider to only swap the RA after the last pre_handler fired */ 1781 /*
1782 * To avoid deadlocks, prohibit return probing in NMI contexts,
1783 * just skip the probe and increase the (inexact) 'nmissed'
1784 * statistical counter, so that the user is informed that
1785 * something happened:
1786 */
1787 if (unlikely(in_nmi())) {
1788 rp->nmissed++;
1789 return 0;
1790 }
1791
1792 /* TODO: consider to only swap the RA after the last pre_handler fired */
1782 hash = hash_ptr(current, KPROBE_HASH_BITS); 1793 hash = hash_ptr(current, KPROBE_HASH_BITS);
1783 raw_spin_lock_irqsave(&rp->lock, flags); 1794 raw_spin_lock_irqsave(&rp->lock, flags);
1784 if (!hlist_empty(&rp->free_instances)) { 1795 if (!hlist_empty(&rp->free_instances)) {