diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-11-09 13:01:43 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-11-18 07:18:46 -0500 |
commit | 8882135bcd332f294df5455747ea43ba9e6f77ad (patch) | |
tree | 758f26d26750fd5007e885cc28125f7c1fa24ca6 /kernel/perf_event.c | |
parent | fcf48a725a176ba12aa7be64c50190deaa2f86df (diff) |
perf: Fix owner-list vs exit
Oleg noticed that a perf-fd keeping a reference on the creating task
leads to a few funny side effects.
There's two different aspects to this:
- kernel based perf-events, these should not take out
a reference on the creating task and appear on the task's
event list since they're not bound to fds nor visible
to userspace.
- fork() and pthread_create(), these can lead to the creating
task dying (and thus the task's event-list becomming useless)
but keeping the list and ref alive until the event is closed.
Combined they lead to malfunction of the ptrace hw_tracepoints.
Cure this by not considering kernel based perf_events for the
owner-list and destroying the owner-list when the owner dies.
Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <1289576883.2084.286.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 63 |
1 files changed, 51 insertions, 12 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f818d9d2dc93..671f6c8c8a32 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -2235,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2235 | raw_spin_unlock_irq(&ctx->lock); | 2235 | raw_spin_unlock_irq(&ctx->lock); |
2236 | mutex_unlock(&ctx->mutex); | 2236 | mutex_unlock(&ctx->mutex); |
2237 | 2237 | ||
2238 | mutex_lock(&event->owner->perf_event_mutex); | ||
2239 | list_del_init(&event->owner_entry); | ||
2240 | mutex_unlock(&event->owner->perf_event_mutex); | ||
2241 | put_task_struct(event->owner); | ||
2242 | |||
2243 | free_event(event); | 2238 | free_event(event); |
2244 | 2239 | ||
2245 | return 0; | 2240 | return 0; |
@@ -2252,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
2252 | static int perf_release(struct inode *inode, struct file *file) | 2247 | static int perf_release(struct inode *inode, struct file *file) |
2253 | { | 2248 | { |
2254 | struct perf_event *event = file->private_data; | 2249 | struct perf_event *event = file->private_data; |
2250 | struct task_struct *owner; | ||
2255 | 2251 | ||
2256 | file->private_data = NULL; | 2252 | file->private_data = NULL; |
2257 | 2253 | ||
2254 | rcu_read_lock(); | ||
2255 | owner = ACCESS_ONCE(event->owner); | ||
2256 | /* | ||
2257 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe | ||
2258 | * !owner it means the list deletion is complete and we can indeed | ||
2259 | * free this event, otherwise we need to serialize on | ||
2260 | * owner->perf_event_mutex. | ||
2261 | */ | ||
2262 | smp_read_barrier_depends(); | ||
2263 | if (owner) { | ||
2264 | /* | ||
2265 | * Since delayed_put_task_struct() also drops the last | ||
2266 | * task reference we can safely take a new reference | ||
2267 | * while holding the rcu_read_lock(). | ||
2268 | */ | ||
2269 | get_task_struct(owner); | ||
2270 | } | ||
2271 | rcu_read_unlock(); | ||
2272 | |||
2273 | if (owner) { | ||
2274 | mutex_lock(&owner->perf_event_mutex); | ||
2275 | /* | ||
2276 | * We have to re-check the event->owner field, if it is cleared | ||
2277 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
2278 | * ensured they're done, and we can proceed with freeing the | ||
2279 | * event. | ||
2280 | */ | ||
2281 | if (event->owner) | ||
2282 | list_del_init(&event->owner_entry); | ||
2283 | mutex_unlock(&owner->perf_event_mutex); | ||
2284 | put_task_struct(owner); | ||
2285 | } | ||
2286 | |||
2258 | return perf_event_release_kernel(event); | 2287 | return perf_event_release_kernel(event); |
2259 | } | 2288 | } |
2260 | 2289 | ||
@@ -5678,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5678 | mutex_unlock(&ctx->mutex); | 5707 | mutex_unlock(&ctx->mutex); |
5679 | 5708 | ||
5680 | event->owner = current; | 5709 | event->owner = current; |
5681 | get_task_struct(current); | 5710 | |
5682 | mutex_lock(¤t->perf_event_mutex); | 5711 | mutex_lock(¤t->perf_event_mutex); |
5683 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5712 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
5684 | mutex_unlock(¤t->perf_event_mutex); | 5713 | mutex_unlock(¤t->perf_event_mutex); |
@@ -5746,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5746 | ++ctx->generation; | 5775 | ++ctx->generation; |
5747 | mutex_unlock(&ctx->mutex); | 5776 | mutex_unlock(&ctx->mutex); |
5748 | 5777 | ||
5749 | event->owner = current; | ||
5750 | get_task_struct(current); | ||
5751 | mutex_lock(¤t->perf_event_mutex); | ||
5752 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
5753 | mutex_unlock(¤t->perf_event_mutex); | ||
5754 | |||
5755 | return event; | 5778 | return event; |
5756 | 5779 | ||
5757 | err_free: | 5780 | err_free: |
@@ -5902,8 +5925,24 @@ again: | |||
5902 | */ | 5925 | */ |
5903 | void perf_event_exit_task(struct task_struct *child) | 5926 | void perf_event_exit_task(struct task_struct *child) |
5904 | { | 5927 | { |
5928 | struct perf_event *event, *tmp; | ||
5905 | int ctxn; | 5929 | int ctxn; |
5906 | 5930 | ||
5931 | mutex_lock(&child->perf_event_mutex); | ||
5932 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
5933 | owner_entry) { | ||
5934 | list_del_init(&event->owner_entry); | ||
5935 | |||
5936 | /* | ||
5937 | * Ensure the list deletion is visible before we clear | ||
5938 | * the owner, closes a race against perf_release() where | ||
5939 | * we need to serialize on the owner->perf_event_mutex. | ||
5940 | */ | ||
5941 | smp_wmb(); | ||
5942 | event->owner = NULL; | ||
5943 | } | ||
5944 | mutex_unlock(&child->perf_event_mutex); | ||
5945 | |||
5907 | for_each_task_context_nr(ctxn) | 5946 | for_each_task_context_nr(ctxn) |
5908 | perf_event_exit_task_context(child, ctxn); | 5947 | perf_event_exit_task_context(child, ctxn); |
5909 | } | 5948 | } |