diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 111 |
1 files changed, 91 insertions, 20 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827f4982..671f6c8c8a32 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
34 | #include <linux/hw_breakpoint.h> | ||
34 | 35 | ||
35 | #include <asm/irq_regs.h> | 36 | #include <asm/irq_regs.h> |
36 | 37 | ||
@@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event, | |||
674 | 675 | ||
675 | event->tstamp_running += ctx->time - event->tstamp_stopped; | 676 | event->tstamp_running += ctx->time - event->tstamp_stopped; |
676 | 677 | ||
678 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | ||
679 | |||
677 | if (!is_software_event(event)) | 680 | if (!is_software_event(event)) |
678 | cpuctx->active_oncpu++; | 681 | cpuctx->active_oncpu++; |
679 | ctx->nr_active++; | 682 | ctx->nr_active++; |
@@ -2232,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2232 | raw_spin_unlock_irq(&ctx->lock); | 2235 | raw_spin_unlock_irq(&ctx->lock); |
2233 | mutex_unlock(&ctx->mutex); | 2236 | mutex_unlock(&ctx->mutex); |
2234 | 2237 | ||
2235 | mutex_lock(&event->owner->perf_event_mutex); | ||
2236 | list_del_init(&event->owner_entry); | ||
2237 | mutex_unlock(&event->owner->perf_event_mutex); | ||
2238 | put_task_struct(event->owner); | ||
2239 | |||
2240 | free_event(event); | 2238 | free_event(event); |
2241 | 2239 | ||
2242 | return 0; | 2240 | return 0; |
@@ -2249,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
2249 | static int perf_release(struct inode *inode, struct file *file) | 2247 | static int perf_release(struct inode *inode, struct file *file) |
2250 | { | 2248 | { |
2251 | struct perf_event *event = file->private_data; | 2249 | struct perf_event *event = file->private_data; |
2250 | struct task_struct *owner; | ||
2252 | 2251 | ||
2253 | file->private_data = NULL; | 2252 | file->private_data = NULL; |
2254 | 2253 | ||
2254 | rcu_read_lock(); | ||
2255 | owner = ACCESS_ONCE(event->owner); | ||
2256 | /* | ||
2257 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe | ||
2258 | * !owner it means the list deletion is complete and we can indeed | ||
2259 | * free this event, otherwise we need to serialize on | ||
2260 | * owner->perf_event_mutex. | ||
2261 | */ | ||
2262 | smp_read_barrier_depends(); | ||
2263 | if (owner) { | ||
2264 | /* | ||
2265 | * Since delayed_put_task_struct() also drops the last | ||
2266 | * task reference we can safely take a new reference | ||
2267 | * while holding the rcu_read_lock(). | ||
2268 | */ | ||
2269 | get_task_struct(owner); | ||
2270 | } | ||
2271 | rcu_read_unlock(); | ||
2272 | |||
2273 | if (owner) { | ||
2274 | mutex_lock(&owner->perf_event_mutex); | ||
2275 | /* | ||
2276 | * We have to re-check the event->owner field, if it is cleared | ||
2277 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
2278 | * ensured they're done, and we can proceed with freeing the | ||
2279 | * event. | ||
2280 | */ | ||
2281 | if (event->owner) | ||
2282 | list_del_init(&event->owner_entry); | ||
2283 | mutex_unlock(&owner->perf_event_mutex); | ||
2284 | put_task_struct(owner); | ||
2285 | } | ||
2286 | |||
2255 | return perf_event_release_kernel(event); | 2287 | return perf_event_release_kernel(event); |
2256 | } | 2288 | } |
2257 | 2289 | ||
@@ -3396,7 +3428,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | |||
3396 | } | 3428 | } |
3397 | 3429 | ||
3398 | static void perf_output_read_one(struct perf_output_handle *handle, | 3430 | static void perf_output_read_one(struct perf_output_handle *handle, |
3399 | struct perf_event *event) | 3431 | struct perf_event *event, |
3432 | u64 enabled, u64 running) | ||
3400 | { | 3433 | { |
3401 | u64 read_format = event->attr.read_format; | 3434 | u64 read_format = event->attr.read_format; |
3402 | u64 values[4]; | 3435 | u64 values[4]; |
@@ -3404,11 +3437,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3404 | 3437 | ||
3405 | values[n++] = perf_event_count(event); | 3438 | values[n++] = perf_event_count(event); |
3406 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 3439 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
3407 | values[n++] = event->total_time_enabled + | 3440 | values[n++] = enabled + |
3408 | atomic64_read(&event->child_total_time_enabled); | 3441 | atomic64_read(&event->child_total_time_enabled); |
3409 | } | 3442 | } |
3410 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 3443 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
3411 | values[n++] = event->total_time_running + | 3444 | values[n++] = running + |
3412 | atomic64_read(&event->child_total_time_running); | 3445 | atomic64_read(&event->child_total_time_running); |
3413 | } | 3446 | } |
3414 | if (read_format & PERF_FORMAT_ID) | 3447 | if (read_format & PERF_FORMAT_ID) |
@@ -3421,7 +3454,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3421 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. | 3454 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. |
3422 | */ | 3455 | */ |
3423 | static void perf_output_read_group(struct perf_output_handle *handle, | 3456 | static void perf_output_read_group(struct perf_output_handle *handle, |
3424 | struct perf_event *event) | 3457 | struct perf_event *event, |
3458 | u64 enabled, u64 running) | ||
3425 | { | 3459 | { |
3426 | struct perf_event *leader = event->group_leader, *sub; | 3460 | struct perf_event *leader = event->group_leader, *sub; |
3427 | u64 read_format = event->attr.read_format; | 3461 | u64 read_format = event->attr.read_format; |
@@ -3431,10 +3465,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3431 | values[n++] = 1 + leader->nr_siblings; | 3465 | values[n++] = 1 + leader->nr_siblings; |
3432 | 3466 | ||
3433 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 3467 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
3434 | values[n++] = leader->total_time_enabled; | 3468 | values[n++] = enabled; |
3435 | 3469 | ||
3436 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 3470 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
3437 | values[n++] = leader->total_time_running; | 3471 | values[n++] = running; |
3438 | 3472 | ||
3439 | if (leader != event) | 3473 | if (leader != event) |
3440 | leader->pmu->read(leader); | 3474 | leader->pmu->read(leader); |
@@ -3459,13 +3493,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3459 | } | 3493 | } |
3460 | } | 3494 | } |
3461 | 3495 | ||
3496 | #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ | ||
3497 | PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
3498 | |||
3462 | static void perf_output_read(struct perf_output_handle *handle, | 3499 | static void perf_output_read(struct perf_output_handle *handle, |
3463 | struct perf_event *event) | 3500 | struct perf_event *event) |
3464 | { | 3501 | { |
3502 | u64 enabled = 0, running = 0, now, ctx_time; | ||
3503 | u64 read_format = event->attr.read_format; | ||
3504 | |||
3505 | /* | ||
3506 | * compute total_time_enabled, total_time_running | ||
3507 | * based on snapshot values taken when the event | ||
3508 | * was last scheduled in. | ||
3509 | * | ||
3510 | * we cannot simply called update_context_time() | ||
3511 | * because of locking issue as we are called in | ||
3512 | * NMI context | ||
3513 | */ | ||
3514 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | ||
3515 | now = perf_clock(); | ||
3516 | ctx_time = event->shadow_ctx_time + now; | ||
3517 | enabled = ctx_time - event->tstamp_enabled; | ||
3518 | running = ctx_time - event->tstamp_running; | ||
3519 | } | ||
3520 | |||
3465 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3521 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
3466 | perf_output_read_group(handle, event); | 3522 | perf_output_read_group(handle, event, enabled, running); |
3467 | else | 3523 | else |
3468 | perf_output_read_one(handle, event); | 3524 | perf_output_read_one(handle, event, enabled, running); |
3469 | } | 3525 | } |
3470 | 3526 | ||
3471 | void perf_output_sample(struct perf_output_handle *handle, | 3527 | void perf_output_sample(struct perf_output_handle *handle, |
@@ -5651,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5651 | mutex_unlock(&ctx->mutex); | 5707 | mutex_unlock(&ctx->mutex); |
5652 | 5708 | ||
5653 | event->owner = current; | 5709 | event->owner = current; |
5654 | get_task_struct(current); | 5710 | |
5655 | mutex_lock(¤t->perf_event_mutex); | 5711 | mutex_lock(¤t->perf_event_mutex); |
5656 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5712 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
5657 | mutex_unlock(¤t->perf_event_mutex); | 5713 | mutex_unlock(¤t->perf_event_mutex); |
@@ -5719,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5719 | ++ctx->generation; | 5775 | ++ctx->generation; |
5720 | mutex_unlock(&ctx->mutex); | 5776 | mutex_unlock(&ctx->mutex); |
5721 | 5777 | ||
5722 | event->owner = current; | ||
5723 | get_task_struct(current); | ||
5724 | mutex_lock(¤t->perf_event_mutex); | ||
5725 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
5726 | mutex_unlock(¤t->perf_event_mutex); | ||
5727 | |||
5728 | return event; | 5778 | return event; |
5729 | 5779 | ||
5730 | err_free: | 5780 | err_free: |
@@ -5875,8 +5925,24 @@ again: | |||
5875 | */ | 5925 | */ |
5876 | void perf_event_exit_task(struct task_struct *child) | 5926 | void perf_event_exit_task(struct task_struct *child) |
5877 | { | 5927 | { |
5928 | struct perf_event *event, *tmp; | ||
5878 | int ctxn; | 5929 | int ctxn; |
5879 | 5930 | ||
5931 | mutex_lock(&child->perf_event_mutex); | ||
5932 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
5933 | owner_entry) { | ||
5934 | list_del_init(&event->owner_entry); | ||
5935 | |||
5936 | /* | ||
5937 | * Ensure the list deletion is visible before we clear | ||
5938 | * the owner, closes a race against perf_release() where | ||
5939 | * we need to serialize on the owner->perf_event_mutex. | ||
5940 | */ | ||
5941 | smp_wmb(); | ||
5942 | event->owner = NULL; | ||
5943 | } | ||
5944 | mutex_unlock(&child->perf_event_mutex); | ||
5945 | |||
5880 | for_each_task_context_nr(ctxn) | 5946 | for_each_task_context_nr(ctxn) |
5881 | perf_event_exit_task_context(child, ctxn); | 5947 | perf_event_exit_task_context(child, ctxn); |
5882 | } | 5948 | } |
@@ -6295,6 +6361,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
6295 | 6361 | ||
6296 | void __init perf_event_init(void) | 6362 | void __init perf_event_init(void) |
6297 | { | 6363 | { |
6364 | int ret; | ||
6365 | |||
6298 | perf_event_init_all_cpus(); | 6366 | perf_event_init_all_cpus(); |
6299 | init_srcu_struct(&pmus_srcu); | 6367 | init_srcu_struct(&pmus_srcu); |
6300 | perf_pmu_register(&perf_swevent); | 6368 | perf_pmu_register(&perf_swevent); |
@@ -6302,4 +6370,7 @@ void __init perf_event_init(void) | |||
6302 | perf_pmu_register(&perf_task_clock); | 6370 | perf_pmu_register(&perf_task_clock); |
6303 | perf_tp_register(); | 6371 | perf_tp_register(); |
6304 | perf_cpu_notifier(perf_cpu_notify); | 6372 | perf_cpu_notifier(perf_cpu_notify); |
6373 | |||
6374 | ret = init_hw_breakpoint(); | ||
6375 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | ||
6305 | } | 6376 | } |