diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 189 |
1 files changed, 105 insertions, 84 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 2fabc0627165..9a5f339a0e2d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -153,7 +153,7 @@ enum event_type_t { | |||
153 | */ | 153 | */ |
154 | struct static_key_deferred perf_sched_events __read_mostly; | 154 | struct static_key_deferred perf_sched_events __read_mostly; |
155 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 155 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
156 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | 156 | static DEFINE_PER_CPU(int, perf_sched_cb_usages); |
157 | 157 | ||
158 | static atomic_t nr_mmap_events __read_mostly; | 158 | static atomic_t nr_mmap_events __read_mostly; |
159 | static atomic_t nr_comm_events __read_mostly; | 159 | static atomic_t nr_comm_events __read_mostly; |
@@ -905,6 +905,15 @@ static void get_ctx(struct perf_event_context *ctx) | |||
905 | WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); | 905 | WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); |
906 | } | 906 | } |
907 | 907 | ||
908 | static void free_ctx(struct rcu_head *head) | ||
909 | { | ||
910 | struct perf_event_context *ctx; | ||
911 | |||
912 | ctx = container_of(head, struct perf_event_context, rcu_head); | ||
913 | kfree(ctx->task_ctx_data); | ||
914 | kfree(ctx); | ||
915 | } | ||
916 | |||
908 | static void put_ctx(struct perf_event_context *ctx) | 917 | static void put_ctx(struct perf_event_context *ctx) |
909 | { | 918 | { |
910 | if (atomic_dec_and_test(&ctx->refcount)) { | 919 | if (atomic_dec_and_test(&ctx->refcount)) { |
@@ -912,7 +921,7 @@ static void put_ctx(struct perf_event_context *ctx) | |||
912 | put_ctx(ctx->parent_ctx); | 921 | put_ctx(ctx->parent_ctx); |
913 | if (ctx->task) | 922 | if (ctx->task) |
914 | put_task_struct(ctx->task); | 923 | put_task_struct(ctx->task); |
915 | kfree_rcu(ctx, rcu_head); | 924 | call_rcu(&ctx->rcu_head, free_ctx); |
916 | } | 925 | } |
917 | } | 926 | } |
918 | 927 | ||
@@ -1239,9 +1248,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1239 | if (is_cgroup_event(event)) | 1248 | if (is_cgroup_event(event)) |
1240 | ctx->nr_cgroups++; | 1249 | ctx->nr_cgroups++; |
1241 | 1250 | ||
1242 | if (has_branch_stack(event)) | ||
1243 | ctx->nr_branch_stack++; | ||
1244 | |||
1245 | list_add_rcu(&event->event_entry, &ctx->event_list); | 1251 | list_add_rcu(&event->event_entry, &ctx->event_list); |
1246 | ctx->nr_events++; | 1252 | ctx->nr_events++; |
1247 | if (event->attr.inherit_stat) | 1253 | if (event->attr.inherit_stat) |
@@ -1408,9 +1414,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1408 | cpuctx->cgrp = NULL; | 1414 | cpuctx->cgrp = NULL; |
1409 | } | 1415 | } |
1410 | 1416 | ||
1411 | if (has_branch_stack(event)) | ||
1412 | ctx->nr_branch_stack--; | ||
1413 | |||
1414 | ctx->nr_events--; | 1417 | ctx->nr_events--; |
1415 | if (event->attr.inherit_stat) | 1418 | if (event->attr.inherit_stat) |
1416 | ctx->nr_stat--; | 1419 | ctx->nr_stat--; |
@@ -1881,6 +1884,10 @@ event_sched_in(struct perf_event *event, | |||
1881 | 1884 | ||
1882 | perf_pmu_disable(event->pmu); | 1885 | perf_pmu_disable(event->pmu); |
1883 | 1886 | ||
1887 | event->tstamp_running += tstamp - event->tstamp_stopped; | ||
1888 | |||
1889 | perf_set_shadow_time(event, ctx, tstamp); | ||
1890 | |||
1884 | if (event->pmu->add(event, PERF_EF_START)) { | 1891 | if (event->pmu->add(event, PERF_EF_START)) { |
1885 | event->state = PERF_EVENT_STATE_INACTIVE; | 1892 | event->state = PERF_EVENT_STATE_INACTIVE; |
1886 | event->oncpu = -1; | 1893 | event->oncpu = -1; |
@@ -1888,10 +1895,6 @@ event_sched_in(struct perf_event *event, | |||
1888 | goto out; | 1895 | goto out; |
1889 | } | 1896 | } |
1890 | 1897 | ||
1891 | event->tstamp_running += tstamp - event->tstamp_stopped; | ||
1892 | |||
1893 | perf_set_shadow_time(event, ctx, tstamp); | ||
1894 | |||
1895 | if (!is_software_event(event)) | 1898 | if (!is_software_event(event)) |
1896 | cpuctx->active_oncpu++; | 1899 | cpuctx->active_oncpu++; |
1897 | if (!ctx->nr_active++) | 1900 | if (!ctx->nr_active++) |
@@ -2559,6 +2562,9 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2559 | next->perf_event_ctxp[ctxn] = ctx; | 2562 | next->perf_event_ctxp[ctxn] = ctx; |
2560 | ctx->task = next; | 2563 | ctx->task = next; |
2561 | next_ctx->task = task; | 2564 | next_ctx->task = task; |
2565 | |||
2566 | swap(ctx->task_ctx_data, next_ctx->task_ctx_data); | ||
2567 | |||
2562 | do_switch = 0; | 2568 | do_switch = 0; |
2563 | 2569 | ||
2564 | perf_event_sync_stat(ctx, next_ctx); | 2570 | perf_event_sync_stat(ctx, next_ctx); |
@@ -2577,6 +2583,56 @@ unlock: | |||
2577 | } | 2583 | } |
2578 | } | 2584 | } |
2579 | 2585 | ||
2586 | void perf_sched_cb_dec(struct pmu *pmu) | ||
2587 | { | ||
2588 | this_cpu_dec(perf_sched_cb_usages); | ||
2589 | } | ||
2590 | |||
2591 | void perf_sched_cb_inc(struct pmu *pmu) | ||
2592 | { | ||
2593 | this_cpu_inc(perf_sched_cb_usages); | ||
2594 | } | ||
2595 | |||
2596 | /* | ||
2597 | * This function provides the context switch callback to the lower code | ||
2598 | * layer. It is invoked ONLY when the context switch callback is enabled. | ||
2599 | */ | ||
2600 | static void perf_pmu_sched_task(struct task_struct *prev, | ||
2601 | struct task_struct *next, | ||
2602 | bool sched_in) | ||
2603 | { | ||
2604 | struct perf_cpu_context *cpuctx; | ||
2605 | struct pmu *pmu; | ||
2606 | unsigned long flags; | ||
2607 | |||
2608 | if (prev == next) | ||
2609 | return; | ||
2610 | |||
2611 | local_irq_save(flags); | ||
2612 | |||
2613 | rcu_read_lock(); | ||
2614 | |||
2615 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2616 | if (pmu->sched_task) { | ||
2617 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2618 | |||
2619 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2620 | |||
2621 | perf_pmu_disable(pmu); | ||
2622 | |||
2623 | pmu->sched_task(cpuctx->task_ctx, sched_in); | ||
2624 | |||
2625 | perf_pmu_enable(pmu); | ||
2626 | |||
2627 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2628 | } | ||
2629 | } | ||
2630 | |||
2631 | rcu_read_unlock(); | ||
2632 | |||
2633 | local_irq_restore(flags); | ||
2634 | } | ||
2635 | |||
2580 | #define for_each_task_context_nr(ctxn) \ | 2636 | #define for_each_task_context_nr(ctxn) \ |
2581 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) | 2637 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) |
2582 | 2638 | ||
@@ -2596,6 +2652,9 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
2596 | { | 2652 | { |
2597 | int ctxn; | 2653 | int ctxn; |
2598 | 2654 | ||
2655 | if (__this_cpu_read(perf_sched_cb_usages)) | ||
2656 | perf_pmu_sched_task(task, next, false); | ||
2657 | |||
2599 | for_each_task_context_nr(ctxn) | 2658 | for_each_task_context_nr(ctxn) |
2600 | perf_event_context_sched_out(task, ctxn, next); | 2659 | perf_event_context_sched_out(task, ctxn, next); |
2601 | 2660 | ||
@@ -2755,64 +2814,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2755 | } | 2814 | } |
2756 | 2815 | ||
2757 | /* | 2816 | /* |
2758 | * When sampling the branck stack in system-wide, it may be necessary | ||
2759 | * to flush the stack on context switch. This happens when the branch | ||
2760 | * stack does not tag its entries with the pid of the current task. | ||
2761 | * Otherwise it becomes impossible to associate a branch entry with a | ||
2762 | * task. This ambiguity is more likely to appear when the branch stack | ||
2763 | * supports priv level filtering and the user sets it to monitor only | ||
2764 | * at the user level (which could be a useful measurement in system-wide | ||
2765 | * mode). In that case, the risk is high of having a branch stack with | ||
2766 | * branch from multiple tasks. Flushing may mean dropping the existing | ||
2767 | * entries or stashing them somewhere in the PMU specific code layer. | ||
2768 | * | ||
2769 | * This function provides the context switch callback to the lower code | ||
2770 | * layer. It is invoked ONLY when there is at least one system-wide context | ||
2771 | * with at least one active event using taken branch sampling. | ||
2772 | */ | ||
2773 | static void perf_branch_stack_sched_in(struct task_struct *prev, | ||
2774 | struct task_struct *task) | ||
2775 | { | ||
2776 | struct perf_cpu_context *cpuctx; | ||
2777 | struct pmu *pmu; | ||
2778 | unsigned long flags; | ||
2779 | |||
2780 | /* no need to flush branch stack if not changing task */ | ||
2781 | if (prev == task) | ||
2782 | return; | ||
2783 | |||
2784 | local_irq_save(flags); | ||
2785 | |||
2786 | rcu_read_lock(); | ||
2787 | |||
2788 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2789 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2790 | |||
2791 | /* | ||
2792 | * check if the context has at least one | ||
2793 | * event using PERF_SAMPLE_BRANCH_STACK | ||
2794 | */ | ||
2795 | if (cpuctx->ctx.nr_branch_stack > 0 | ||
2796 | && pmu->flush_branch_stack) { | ||
2797 | |||
2798 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2799 | |||
2800 | perf_pmu_disable(pmu); | ||
2801 | |||
2802 | pmu->flush_branch_stack(); | ||
2803 | |||
2804 | perf_pmu_enable(pmu); | ||
2805 | |||
2806 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2807 | } | ||
2808 | } | ||
2809 | |||
2810 | rcu_read_unlock(); | ||
2811 | |||
2812 | local_irq_restore(flags); | ||
2813 | } | ||
2814 | |||
2815 | /* | ||
2816 | * Called from scheduler to add the events of the current task | 2817 | * Called from scheduler to add the events of the current task |
2817 | * with interrupts disabled. | 2818 | * with interrupts disabled. |
2818 | * | 2819 | * |
@@ -2844,9 +2845,8 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
2844 | if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) | 2845 | if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) |
2845 | perf_cgroup_sched_in(prev, task); | 2846 | perf_cgroup_sched_in(prev, task); |
2846 | 2847 | ||
2847 | /* check for system-wide branch_stack events */ | 2848 | if (__this_cpu_read(perf_sched_cb_usages)) |
2848 | if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) | 2849 | perf_pmu_sched_task(prev, task, true); |
2849 | perf_branch_stack_sched_in(prev, task); | ||
2850 | } | 2850 | } |
2851 | 2851 | ||
2852 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2852 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
@@ -3321,12 +3321,15 @@ errout: | |||
3321 | * Returns a matching context with refcount and pincount. | 3321 | * Returns a matching context with refcount and pincount. |
3322 | */ | 3322 | */ |
3323 | static struct perf_event_context * | 3323 | static struct perf_event_context * |
3324 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | 3324 | find_get_context(struct pmu *pmu, struct task_struct *task, |
3325 | struct perf_event *event) | ||
3325 | { | 3326 | { |
3326 | struct perf_event_context *ctx, *clone_ctx = NULL; | 3327 | struct perf_event_context *ctx, *clone_ctx = NULL; |
3327 | struct perf_cpu_context *cpuctx; | 3328 | struct perf_cpu_context *cpuctx; |
3329 | void *task_ctx_data = NULL; | ||
3328 | unsigned long flags; | 3330 | unsigned long flags; |
3329 | int ctxn, err; | 3331 | int ctxn, err; |
3332 | int cpu = event->cpu; | ||
3330 | 3333 | ||
3331 | if (!task) { | 3334 | if (!task) { |
3332 | /* Must be root to operate on a CPU event: */ | 3335 | /* Must be root to operate on a CPU event: */ |
@@ -3354,11 +3357,24 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
3354 | if (ctxn < 0) | 3357 | if (ctxn < 0) |
3355 | goto errout; | 3358 | goto errout; |
3356 | 3359 | ||
3360 | if (event->attach_state & PERF_ATTACH_TASK_DATA) { | ||
3361 | task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL); | ||
3362 | if (!task_ctx_data) { | ||
3363 | err = -ENOMEM; | ||
3364 | goto errout; | ||
3365 | } | ||
3366 | } | ||
3367 | |||
3357 | retry: | 3368 | retry: |
3358 | ctx = perf_lock_task_context(task, ctxn, &flags); | 3369 | ctx = perf_lock_task_context(task, ctxn, &flags); |
3359 | if (ctx) { | 3370 | if (ctx) { |
3360 | clone_ctx = unclone_ctx(ctx); | 3371 | clone_ctx = unclone_ctx(ctx); |
3361 | ++ctx->pin_count; | 3372 | ++ctx->pin_count; |
3373 | |||
3374 | if (task_ctx_data && !ctx->task_ctx_data) { | ||
3375 | ctx->task_ctx_data = task_ctx_data; | ||
3376 | task_ctx_data = NULL; | ||
3377 | } | ||
3362 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 3378 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
3363 | 3379 | ||
3364 | if (clone_ctx) | 3380 | if (clone_ctx) |
@@ -3369,6 +3385,11 @@ retry: | |||
3369 | if (!ctx) | 3385 | if (!ctx) |
3370 | goto errout; | 3386 | goto errout; |
3371 | 3387 | ||
3388 | if (task_ctx_data) { | ||
3389 | ctx->task_ctx_data = task_ctx_data; | ||
3390 | task_ctx_data = NULL; | ||
3391 | } | ||
3392 | |||
3372 | err = 0; | 3393 | err = 0; |
3373 | mutex_lock(&task->perf_event_mutex); | 3394 | mutex_lock(&task->perf_event_mutex); |
3374 | /* | 3395 | /* |
@@ -3395,9 +3416,11 @@ retry: | |||
3395 | } | 3416 | } |
3396 | } | 3417 | } |
3397 | 3418 | ||
3419 | kfree(task_ctx_data); | ||
3398 | return ctx; | 3420 | return ctx; |
3399 | 3421 | ||
3400 | errout: | 3422 | errout: |
3423 | kfree(task_ctx_data); | ||
3401 | return ERR_PTR(err); | 3424 | return ERR_PTR(err); |
3402 | } | 3425 | } |
3403 | 3426 | ||
@@ -3423,10 +3446,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) | |||
3423 | if (event->parent) | 3446 | if (event->parent) |
3424 | return; | 3447 | return; |
3425 | 3448 | ||
3426 | if (has_branch_stack(event)) { | ||
3427 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
3428 | atomic_dec(&per_cpu(perf_branch_stack_events, cpu)); | ||
3429 | } | ||
3430 | if (is_cgroup_event(event)) | 3449 | if (is_cgroup_event(event)) |
3431 | atomic_dec(&per_cpu(perf_cgroup_events, cpu)); | 3450 | atomic_dec(&per_cpu(perf_cgroup_events, cpu)); |
3432 | } | 3451 | } |
@@ -6133,6 +6152,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
6133 | } | 6152 | } |
6134 | 6153 | ||
6135 | hlist_add_head_rcu(&event->hlist_entry, head); | 6154 | hlist_add_head_rcu(&event->hlist_entry, head); |
6155 | perf_event_update_userpage(event); | ||
6136 | 6156 | ||
6137 | return 0; | 6157 | return 0; |
6138 | } | 6158 | } |
@@ -6602,6 +6622,7 @@ static int cpu_clock_event_add(struct perf_event *event, int flags) | |||
6602 | { | 6622 | { |
6603 | if (flags & PERF_EF_START) | 6623 | if (flags & PERF_EF_START) |
6604 | cpu_clock_event_start(event, flags); | 6624 | cpu_clock_event_start(event, flags); |
6625 | perf_event_update_userpage(event); | ||
6605 | 6626 | ||
6606 | return 0; | 6627 | return 0; |
6607 | } | 6628 | } |
@@ -6676,6 +6697,7 @@ static int task_clock_event_add(struct perf_event *event, int flags) | |||
6676 | { | 6697 | { |
6677 | if (flags & PERF_EF_START) | 6698 | if (flags & PERF_EF_START) |
6678 | task_clock_event_start(event, flags); | 6699 | task_clock_event_start(event, flags); |
6700 | perf_event_update_userpage(event); | ||
6679 | 6701 | ||
6680 | return 0; | 6702 | return 0; |
6681 | } | 6703 | } |
@@ -7089,10 +7111,6 @@ static void account_event_cpu(struct perf_event *event, int cpu) | |||
7089 | if (event->parent) | 7111 | if (event->parent) |
7090 | return; | 7112 | return; |
7091 | 7113 | ||
7092 | if (has_branch_stack(event)) { | ||
7093 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
7094 | atomic_inc(&per_cpu(perf_branch_stack_events, cpu)); | ||
7095 | } | ||
7096 | if (is_cgroup_event(event)) | 7114 | if (is_cgroup_event(event)) |
7097 | atomic_inc(&per_cpu(perf_cgroup_events, cpu)); | 7115 | atomic_inc(&per_cpu(perf_cgroup_events, cpu)); |
7098 | } | 7116 | } |
@@ -7224,6 +7242,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
7224 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) | 7242 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) |
7225 | goto err_ns; | 7243 | goto err_ns; |
7226 | 7244 | ||
7245 | if (!has_branch_stack(event)) | ||
7246 | event->attr.branch_sample_type = 0; | ||
7247 | |||
7227 | pmu = perf_init_event(event); | 7248 | pmu = perf_init_event(event); |
7228 | if (!pmu) | 7249 | if (!pmu) |
7229 | goto err_ns; | 7250 | goto err_ns; |
@@ -7586,7 +7607,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7586 | /* | 7607 | /* |
7587 | * Get the target context (task or percpu): | 7608 | * Get the target context (task or percpu): |
7588 | */ | 7609 | */ |
7589 | ctx = find_get_context(pmu, task, event->cpu); | 7610 | ctx = find_get_context(pmu, task, event); |
7590 | if (IS_ERR(ctx)) { | 7611 | if (IS_ERR(ctx)) { |
7591 | err = PTR_ERR(ctx); | 7612 | err = PTR_ERR(ctx); |
7592 | goto err_alloc; | 7613 | goto err_alloc; |
@@ -7792,7 +7813,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7792 | 7813 | ||
7793 | account_event(event); | 7814 | account_event(event); |
7794 | 7815 | ||
7795 | ctx = find_get_context(event->pmu, task, cpu); | 7816 | ctx = find_get_context(event->pmu, task, event); |
7796 | if (IS_ERR(ctx)) { | 7817 | if (IS_ERR(ctx)) { |
7797 | err = PTR_ERR(ctx); | 7818 | err = PTR_ERR(ctx); |
7798 | goto err_free; | 7819 | goto err_free; |