diff options
author | Peter Zijlstra <peterz@infradead.org> | 2013-10-03 10:02:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-04 03:58:53 -0400 |
commit | 9886167d20c0720dcfb01e62cdff4d906b226f43 (patch) | |
tree | dff8fb937ae2c73fe474263da0e906aa77d6ae54 | |
parent | cac6653529eeac9b661fc8342c47d849c3adb085 (diff) |
perf: Fix perf_pmu_migrate_context
While auditing the list_entry usage due to a trinity bug I found that
perf_pmu_migrate_context violates the rules for
perf_event::event_entry.
The problem is that perf_event::event_entry is a RCU list element, and
hence we must wait for a full RCU grace period before re-using the
element after deletion.
Therefore the usage in perf_pmu_migrate_context() which re-uses the
entry immediately is broken. For now introduce another list_head into
perf_event for this specific usage.
This doesn't actually fix the trinity report because that never goes
through this code.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-mkj72lxagw1z8fvjm648iznw@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/perf_event.h | 24 | ||||
-rw-r--r-- | kernel/events/core.c | 6 |
2 files changed, 26 insertions, 4 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 866e85c5eb94..c8ba627c1d60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -294,9 +294,31 @@ struct ring_buffer; | |||
294 | */ | 294 | */ |
295 | struct perf_event { | 295 | struct perf_event { |
296 | #ifdef CONFIG_PERF_EVENTS | 296 | #ifdef CONFIG_PERF_EVENTS |
297 | struct list_head group_entry; | 297 | /* |
298 | * entry onto perf_event_context::event_list; | ||
299 | * modifications require ctx->lock | ||
300 | * RCU safe iterations. | ||
301 | */ | ||
298 | struct list_head event_entry; | 302 | struct list_head event_entry; |
303 | |||
304 | /* | ||
305 | * XXX: group_entry and sibling_list should be mutually exclusive; | ||
306 | * either you're a sibling on a group, or you're the group leader. | ||
307 | * Rework the code to always use the same list element. | ||
308 | * | ||
309 | * Locked for modification by both ctx->mutex and ctx->lock; holding | ||
310 | * either sufficies for read. | ||
311 | */ | ||
312 | struct list_head group_entry; | ||
299 | struct list_head sibling_list; | 313 | struct list_head sibling_list; |
314 | |||
315 | /* | ||
316 | * We need storage to track the entries in perf_pmu_migrate_context; we | ||
317 | * cannot use the event_entry because of RCU and we want to keep the | ||
318 | * group in tact which avoids us using the other two entries. | ||
319 | */ | ||
320 | struct list_head migrate_entry; | ||
321 | |||
300 | struct hlist_node hlist_entry; | 322 | struct hlist_node hlist_entry; |
301 | int nr_siblings; | 323 | int nr_siblings; |
302 | int group_flags; | 324 | int group_flags; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index cb4238e85b38..d49a9d29334c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -7234,15 +7234,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
7234 | perf_remove_from_context(event); | 7234 | perf_remove_from_context(event); |
7235 | unaccount_event_cpu(event, src_cpu); | 7235 | unaccount_event_cpu(event, src_cpu); |
7236 | put_ctx(src_ctx); | 7236 | put_ctx(src_ctx); |
7237 | list_add(&event->event_entry, &events); | 7237 | list_add(&event->migrate_entry, &events); |
7238 | } | 7238 | } |
7239 | mutex_unlock(&src_ctx->mutex); | 7239 | mutex_unlock(&src_ctx->mutex); |
7240 | 7240 | ||
7241 | synchronize_rcu(); | 7241 | synchronize_rcu(); |
7242 | 7242 | ||
7243 | mutex_lock(&dst_ctx->mutex); | 7243 | mutex_lock(&dst_ctx->mutex); |
7244 | list_for_each_entry_safe(event, tmp, &events, event_entry) { | 7244 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { |
7245 | list_del(&event->event_entry); | 7245 | list_del(&event->migrate_entry); |
7246 | if (event->state >= PERF_EVENT_STATE_OFF) | 7246 | if (event->state >= PERF_EVENT_STATE_OFF) |
7247 | event->state = PERF_EVENT_STATE_INACTIVE; | 7247 | event->state = PERF_EVENT_STATE_INACTIVE; |
7248 | account_event_cpu(event, dst_cpu); | 7248 | account_event_cpu(event, dst_cpu); |