diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-27 09:47:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-31 02:46:09 -0400 |
commit | 8a49542c0554af7d0073aac0ee73ee65b807ef34 (patch) | |
tree | 73e82ee5a624012ac723c3af21bb1945b7bd675f /kernel/perf_event.c | |
parent | ac9721f3f54b27a16c7e1afb2481e7ee95a70318 (diff) |
perf_events: Fix races in group composition
Group siblings don't pin each-other or the parent, so when we destroy
events we must make sure to clean up all cross referencing pointers.
In particular, for destruction of a group leader we must be able to
find all its siblings and remove their reference to it.
This means that detaching an event from its context must not detach it
from the group, otherwise we can end up failing to clear all pointers.
Solve this by clearly separating the attachment to a context and
attachment to a group, and keep the group composed until we destroy
the events.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 91 |
1 files changed, 67 insertions, 24 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 848d49a043e9..10a1aee2309e 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | |||
283 | static void | 283 | static void |
284 | list_add_event(struct perf_event *event, struct perf_event_context *ctx) | 284 | list_add_event(struct perf_event *event, struct perf_event_context *ctx) |
285 | { | 285 | { |
286 | struct perf_event *group_leader = event->group_leader; | 286 | WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); |
287 | event->attach_state |= PERF_ATTACH_CONTEXT; | ||
287 | 288 | ||
288 | /* | 289 | /* |
289 | * Depending on whether it is a standalone or sibling event, | 290 | * If we're a stand alone event or group leader, we go to the context |
290 | * add it straight to the context's event list, or to the group | 291 | * list, group events are kept attached to the group so that |
291 | * leader's sibling list: | 292 | * perf_group_detach can, at all times, locate all siblings. |
292 | */ | 293 | */ |
293 | if (group_leader == event) { | 294 | if (event->group_leader == event) { |
294 | struct list_head *list; | 295 | struct list_head *list; |
295 | 296 | ||
296 | if (is_software_event(event)) | 297 | if (is_software_event(event)) |
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
298 | 299 | ||
299 | list = ctx_group_list(event, ctx); | 300 | list = ctx_group_list(event, ctx); |
300 | list_add_tail(&event->group_entry, list); | 301 | list_add_tail(&event->group_entry, list); |
301 | } else { | ||
302 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
303 | !is_software_event(event)) | ||
304 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
305 | |||
306 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | ||
307 | group_leader->nr_siblings++; | ||
308 | } | 302 | } |
309 | 303 | ||
310 | list_add_rcu(&event->event_entry, &ctx->event_list); | 304 | list_add_rcu(&event->event_entry, &ctx->event_list); |
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
313 | ctx->nr_stat++; | 307 | ctx->nr_stat++; |
314 | } | 308 | } |
315 | 309 | ||
310 | static void perf_group_attach(struct perf_event *event) | ||
311 | { | ||
312 | struct perf_event *group_leader = event->group_leader; | ||
313 | |||
314 | WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP); | ||
315 | event->attach_state |= PERF_ATTACH_GROUP; | ||
316 | |||
317 | if (group_leader == event) | ||
318 | return; | ||
319 | |||
320 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
321 | !is_software_event(event)) | ||
322 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
323 | |||
324 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | ||
325 | group_leader->nr_siblings++; | ||
326 | } | ||
327 | |||
316 | /* | 328 | /* |
317 | * Remove a event from the lists for its context. | 329 | * Remove a event from the lists for its context. |
318 | * Must be called with ctx->mutex and ctx->lock held. | 330 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
320 | static void | 332 | static void |
321 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 333 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
322 | { | 334 | { |
323 | if (list_empty(&event->group_entry)) | 335 | /* |
336 | * We can have double detach due to exit/hot-unplug + close. | ||
337 | */ | ||
338 | if (!(event->attach_state & PERF_ATTACH_CONTEXT)) | ||
324 | return; | 339 | return; |
340 | |||
341 | event->attach_state &= ~PERF_ATTACH_CONTEXT; | ||
342 | |||
325 | ctx->nr_events--; | 343 | ctx->nr_events--; |
326 | if (event->attr.inherit_stat) | 344 | if (event->attr.inherit_stat) |
327 | ctx->nr_stat--; | 345 | ctx->nr_stat--; |
328 | 346 | ||
329 | list_del_init(&event->group_entry); | ||
330 | list_del_rcu(&event->event_entry); | 347 | list_del_rcu(&event->event_entry); |
331 | 348 | ||
332 | if (event->group_leader != event) | 349 | if (event->group_leader == event) |
333 | event->group_leader->nr_siblings--; | 350 | list_del_init(&event->group_entry); |
334 | 351 | ||
335 | update_group_times(event); | 352 | update_group_times(event); |
336 | 353 | ||
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
345 | event->state = PERF_EVENT_STATE_OFF; | 362 | event->state = PERF_EVENT_STATE_OFF; |
346 | } | 363 | } |
347 | 364 | ||
348 | static void | 365 | static void perf_group_detach(struct perf_event *event) |
349 | perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx) | ||
350 | { | 366 | { |
351 | struct perf_event *sibling, *tmp; | 367 | struct perf_event *sibling, *tmp; |
368 | struct list_head *list = NULL; | ||
369 | |||
370 | /* | ||
371 | * We can have double detach due to exit/hot-unplug + close. | ||
372 | */ | ||
373 | if (!(event->attach_state & PERF_ATTACH_GROUP)) | ||
374 | return; | ||
375 | |||
376 | event->attach_state &= ~PERF_ATTACH_GROUP; | ||
377 | |||
378 | /* | ||
379 | * If this is a sibling, remove it from its group. | ||
380 | */ | ||
381 | if (event->group_leader != event) { | ||
382 | list_del_init(&event->group_entry); | ||
383 | event->group_leader->nr_siblings--; | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | if (!list_empty(&event->group_entry)) | ||
388 | list = &event->group_entry; | ||
352 | 389 | ||
353 | /* | 390 | /* |
354 | * If this was a group event with sibling events then | 391 | * If this was a group event with sibling events then |
355 | * upgrade the siblings to singleton events by adding them | 392 | * upgrade the siblings to singleton events by adding them |
356 | * to the context list directly: | 393 | * to whatever list we are on. |
357 | */ | 394 | */ |
358 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 395 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
359 | struct list_head *list; | 396 | if (list) |
360 | 397 | list_move_tail(&sibling->group_entry, list); | |
361 | list = ctx_group_list(event, ctx); | ||
362 | list_move_tail(&sibling->group_entry, list); | ||
363 | sibling->group_leader = sibling; | 398 | sibling->group_leader = sibling; |
364 | 399 | ||
365 | /* Inherit group flags from the previous leader */ | 400 | /* Inherit group flags from the previous leader */ |
@@ -727,6 +762,7 @@ static void add_event_to_ctx(struct perf_event *event, | |||
727 | struct perf_event_context *ctx) | 762 | struct perf_event_context *ctx) |
728 | { | 763 | { |
729 | list_add_event(event, ctx); | 764 | list_add_event(event, ctx); |
765 | perf_group_attach(event); | ||
730 | event->tstamp_enabled = ctx->time; | 766 | event->tstamp_enabled = ctx->time; |
731 | event->tstamp_running = ctx->time; | 767 | event->tstamp_running = ctx->time; |
732 | event->tstamp_stopped = ctx->time; | 768 | event->tstamp_stopped = ctx->time; |
@@ -1894,8 +1930,8 @@ int perf_event_release_kernel(struct perf_event *event) | |||
1894 | */ | 1930 | */ |
1895 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 1931 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); |
1896 | raw_spin_lock_irq(&ctx->lock); | 1932 | raw_spin_lock_irq(&ctx->lock); |
1933 | perf_group_detach(event); | ||
1897 | list_del_event(event, ctx); | 1934 | list_del_event(event, ctx); |
1898 | perf_destroy_group(event, ctx); | ||
1899 | raw_spin_unlock_irq(&ctx->lock); | 1935 | raw_spin_unlock_irq(&ctx->lock); |
1900 | mutex_unlock(&ctx->mutex); | 1936 | mutex_unlock(&ctx->mutex); |
1901 | 1937 | ||
@@ -5127,6 +5163,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5127 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5163 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
5128 | mutex_unlock(¤t->perf_event_mutex); | 5164 | mutex_unlock(¤t->perf_event_mutex); |
5129 | 5165 | ||
5166 | /* | ||
5167 | * Drop the reference on the group_event after placing the | ||
5168 | * new event on the sibling_list. This ensures destruction | ||
5169 | * of the group leader will find the pointer to itself in | ||
5170 | * perf_group_detach(). | ||
5171 | */ | ||
5130 | fput_light(group_file, fput_needed); | 5172 | fput_light(group_file, fput_needed); |
5131 | fd_install(event_fd, event_file); | 5173 | fd_install(event_fd, event_file); |
5132 | return event_fd; | 5174 | return event_fd; |
@@ -5448,6 +5490,7 @@ static void perf_free_event(struct perf_event *event, | |||
5448 | 5490 | ||
5449 | fput(parent->filp); | 5491 | fput(parent->filp); |
5450 | 5492 | ||
5493 | perf_group_detach(event); | ||
5451 | list_del_event(event, ctx); | 5494 | list_del_event(event, ctx); |
5452 | free_event(event); | 5495 | free_event(event); |
5453 | } | 5496 | } |