aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2014-08-01 08:33:02 -0400
committerIngo Molnar <mingo@kernel.org>2014-08-13 01:51:04 -0400
commitfadfe7be6e50de7f03913833b33c56cd8fb66bac (patch)
treec1a4212e5509ac5390f1a6297dcafcebde12edd6
parentf86977620ee4635f26befcf436700493a38ce002 (diff)
perf: Add queued work to remove orphaned child events
In cases when the owner task exits before the workload and the workload made some forks, all the events stay in until the last workload process exits. Thats' because each child event holds parent reference. We want to release all children events once the parent is gone, because at that time there's no process to read them anyway, so they're just eating resources. This removal races with process exit, which removes all events and fork, which clone events. To be clear of those two, adding work queue to remove orphaned child for context in case such event is detected. Using delayed work queue (with delay == 1), because we queue this work under perf scheduler callbacks. Normal work queue tries to wake up the queue process, which deadlocks on rq->lock in this place. Also preventing clones from abandoned parent event. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/1406896382-18404-4-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/perf_event.h4
-rw-r--r--kernel/events/core.c87
2 files changed, 90 insertions, 1 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a8c0f6..ef5b62bdb103 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ struct perf_guest_info_callbacks {
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <linux/sysfs.h> 53#include <linux/sysfs.h>
54#include <linux/perf_regs.h> 54#include <linux/perf_regs.h>
55#include <linux/workqueue.h>
55#include <asm/local.h> 56#include <asm/local.h>
56 57
57struct perf_callchain_entry { 58struct perf_callchain_entry {
@@ -507,6 +508,9 @@ struct perf_event_context {
507 int nr_cgroups; /* cgroup evts */ 508 int nr_cgroups; /* cgroup evts */
508 int nr_branch_stack; /* branch_stack evt */ 509 int nr_branch_stack; /* branch_stack evt */
509 struct rcu_head rcu_head; 510 struct rcu_head rcu_head;
511
512 struct delayed_work orphans_remove;
513 bool orphans_remove_sched;
510}; 514};
511 515
512/* 516/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bbb3ca22f07c..a25460559b4f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,8 @@
46 46
47#include <asm/irq_regs.h> 47#include <asm/irq_regs.h>
48 48
49static struct workqueue_struct *perf_wq;
50
49struct remote_function_call { 51struct remote_function_call {
50 struct task_struct *p; 52 struct task_struct *p;
51 int (*func)(void *info); 53 int (*func)(void *info);
@@ -1381,6 +1383,45 @@ out:
1381 perf_event__header_size(tmp); 1383 perf_event__header_size(tmp);
1382} 1384}
1383 1385
1386/*
1387 * User event without the task.
1388 */
1389static bool is_orphaned_event(struct perf_event *event)
1390{
1391 return event && !is_kernel_event(event) && !event->owner;
1392}
1393
1394/*
1395 * Event has a parent but parent's task finished and it's
1396 * alive only because of children holding refference.
1397 */
1398static bool is_orphaned_child(struct perf_event *event)
1399{
1400 return is_orphaned_event(event->parent);
1401}
1402
1403static void orphans_remove_work(struct work_struct *work);
1404
1405static void schedule_orphans_remove(struct perf_event_context *ctx)
1406{
1407 if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
1408 return;
1409
1410 if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
1411 get_ctx(ctx);
1412 ctx->orphans_remove_sched = true;
1413 }
1414}
1415
1416static int __init perf_workqueue_init(void)
1417{
1418 perf_wq = create_singlethread_workqueue("perf");
1419 WARN(!perf_wq, "failed to create perf workqueue\n");
1420 return perf_wq ? 0 : -1;
1421}
1422
1423core_initcall(perf_workqueue_init);
1424
1384static inline int 1425static inline int
1385event_filter_match(struct perf_event *event) 1426event_filter_match(struct perf_event *event)
1386{ 1427{
@@ -1430,6 +1471,9 @@ event_sched_out(struct perf_event *event,
1430 if (event->attr.exclusive || !cpuctx->active_oncpu) 1471 if (event->attr.exclusive || !cpuctx->active_oncpu)
1431 cpuctx->exclusive = 0; 1472 cpuctx->exclusive = 0;
1432 1473
1474 if (is_orphaned_child(event))
1475 schedule_orphans_remove(ctx);
1476
1433 perf_pmu_enable(event->pmu); 1477 perf_pmu_enable(event->pmu);
1434} 1478}
1435 1479
@@ -1732,6 +1776,9 @@ event_sched_in(struct perf_event *event,
1732 if (event->attr.exclusive) 1776 if (event->attr.exclusive)
1733 cpuctx->exclusive = 1; 1777 cpuctx->exclusive = 1;
1734 1778
1779 if (is_orphaned_child(event))
1780 schedule_orphans_remove(ctx);
1781
1735out: 1782out:
1736 perf_pmu_enable(event->pmu); 1783 perf_pmu_enable(event->pmu);
1737 1784
@@ -3074,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3074 INIT_LIST_HEAD(&ctx->flexible_groups); 3121 INIT_LIST_HEAD(&ctx->flexible_groups);
3075 INIT_LIST_HEAD(&ctx->event_list); 3122 INIT_LIST_HEAD(&ctx->event_list);
3076 atomic_set(&ctx->refcount, 1); 3123 atomic_set(&ctx->refcount, 1);
3124 INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
3077} 3125}
3078 3126
3079static struct perf_event_context * 3127static struct perf_event_context *
@@ -3405,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file)
3405 return 0; 3453 return 0;
3406} 3454}
3407 3455
3456/*
3457 * Remove all orphanes events from the context.
3458 */
3459static void orphans_remove_work(struct work_struct *work)
3460{
3461 struct perf_event_context *ctx;
3462 struct perf_event *event, *tmp;
3463
3464 ctx = container_of(work, struct perf_event_context,
3465 orphans_remove.work);
3466
3467 mutex_lock(&ctx->mutex);
3468 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
3469 struct perf_event *parent_event = event->parent;
3470
3471 if (!is_orphaned_child(event))
3472 continue;
3473
3474 perf_remove_from_context(event, true);
3475
3476 mutex_lock(&parent_event->child_mutex);
3477 list_del_init(&event->child_list);
3478 mutex_unlock(&parent_event->child_mutex);
3479
3480 free_event(event);
3481 put_event(parent_event);
3482 }
3483
3484 raw_spin_lock_irq(&ctx->lock);
3485 ctx->orphans_remove_sched = false;
3486 raw_spin_unlock_irq(&ctx->lock);
3487 mutex_unlock(&ctx->mutex);
3488
3489 put_ctx(ctx);
3490}
3491
3408u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 3492u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
3409{ 3493{
3410 struct perf_event *child; 3494 struct perf_event *child;
@@ -7709,7 +7793,8 @@ inherit_event(struct perf_event *parent_event,
7709 if (IS_ERR(child_event)) 7793 if (IS_ERR(child_event))
7710 return child_event; 7794 return child_event;
7711 7795
7712 if (!atomic_long_inc_not_zero(&parent_event->refcount)) { 7796 if (is_orphaned_event(parent_event) ||
7797 !atomic_long_inc_not_zero(&parent_event->refcount)) {
7713 free_event(child_event); 7798 free_event(child_event);
7714 return NULL; 7799 return NULL;
7715 } 7800 }