aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c189
1 files changed, 105 insertions, 84 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2fabc0627165..9a5f339a0e2d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -153,7 +153,7 @@ enum event_type_t {
153 */ 153 */
154struct static_key_deferred perf_sched_events __read_mostly; 154struct static_key_deferred perf_sched_events __read_mostly;
155static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); 155static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
156static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); 156static DEFINE_PER_CPU(int, perf_sched_cb_usages);
157 157
158static atomic_t nr_mmap_events __read_mostly; 158static atomic_t nr_mmap_events __read_mostly;
159static atomic_t nr_comm_events __read_mostly; 159static atomic_t nr_comm_events __read_mostly;
@@ -905,6 +905,15 @@ static void get_ctx(struct perf_event_context *ctx)
905 WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); 905 WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
906} 906}
907 907
908static void free_ctx(struct rcu_head *head)
909{
910 struct perf_event_context *ctx;
911
912 ctx = container_of(head, struct perf_event_context, rcu_head);
913 kfree(ctx->task_ctx_data);
914 kfree(ctx);
915}
916
908static void put_ctx(struct perf_event_context *ctx) 917static void put_ctx(struct perf_event_context *ctx)
909{ 918{
910 if (atomic_dec_and_test(&ctx->refcount)) { 919 if (atomic_dec_and_test(&ctx->refcount)) {
@@ -912,7 +921,7 @@ static void put_ctx(struct perf_event_context *ctx)
912 put_ctx(ctx->parent_ctx); 921 put_ctx(ctx->parent_ctx);
913 if (ctx->task) 922 if (ctx->task)
914 put_task_struct(ctx->task); 923 put_task_struct(ctx->task);
915 kfree_rcu(ctx, rcu_head); 924 call_rcu(&ctx->rcu_head, free_ctx);
916 } 925 }
917} 926}
918 927
@@ -1239,9 +1248,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1239 if (is_cgroup_event(event)) 1248 if (is_cgroup_event(event))
1240 ctx->nr_cgroups++; 1249 ctx->nr_cgroups++;
1241 1250
1242 if (has_branch_stack(event))
1243 ctx->nr_branch_stack++;
1244
1245 list_add_rcu(&event->event_entry, &ctx->event_list); 1251 list_add_rcu(&event->event_entry, &ctx->event_list);
1246 ctx->nr_events++; 1252 ctx->nr_events++;
1247 if (event->attr.inherit_stat) 1253 if (event->attr.inherit_stat)
@@ -1408,9 +1414,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1408 cpuctx->cgrp = NULL; 1414 cpuctx->cgrp = NULL;
1409 } 1415 }
1410 1416
1411 if (has_branch_stack(event))
1412 ctx->nr_branch_stack--;
1413
1414 ctx->nr_events--; 1417 ctx->nr_events--;
1415 if (event->attr.inherit_stat) 1418 if (event->attr.inherit_stat)
1416 ctx->nr_stat--; 1419 ctx->nr_stat--;
@@ -1881,6 +1884,10 @@ event_sched_in(struct perf_event *event,
1881 1884
1882 perf_pmu_disable(event->pmu); 1885 perf_pmu_disable(event->pmu);
1883 1886
1887 event->tstamp_running += tstamp - event->tstamp_stopped;
1888
1889 perf_set_shadow_time(event, ctx, tstamp);
1890
1884 if (event->pmu->add(event, PERF_EF_START)) { 1891 if (event->pmu->add(event, PERF_EF_START)) {
1885 event->state = PERF_EVENT_STATE_INACTIVE; 1892 event->state = PERF_EVENT_STATE_INACTIVE;
1886 event->oncpu = -1; 1893 event->oncpu = -1;
@@ -1888,10 +1895,6 @@ event_sched_in(struct perf_event *event,
1888 goto out; 1895 goto out;
1889 } 1896 }
1890 1897
1891 event->tstamp_running += tstamp - event->tstamp_stopped;
1892
1893 perf_set_shadow_time(event, ctx, tstamp);
1894
1895 if (!is_software_event(event)) 1898 if (!is_software_event(event))
1896 cpuctx->active_oncpu++; 1899 cpuctx->active_oncpu++;
1897 if (!ctx->nr_active++) 1900 if (!ctx->nr_active++)
@@ -2559,6 +2562,9 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2559 next->perf_event_ctxp[ctxn] = ctx; 2562 next->perf_event_ctxp[ctxn] = ctx;
2560 ctx->task = next; 2563 ctx->task = next;
2561 next_ctx->task = task; 2564 next_ctx->task = task;
2565
2566 swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
2567
2562 do_switch = 0; 2568 do_switch = 0;
2563 2569
2564 perf_event_sync_stat(ctx, next_ctx); 2570 perf_event_sync_stat(ctx, next_ctx);
@@ -2577,6 +2583,56 @@ unlock:
2577 } 2583 }
2578} 2584}
2579 2585
2586void perf_sched_cb_dec(struct pmu *pmu)
2587{
2588 this_cpu_dec(perf_sched_cb_usages);
2589}
2590
2591void perf_sched_cb_inc(struct pmu *pmu)
2592{
2593 this_cpu_inc(perf_sched_cb_usages);
2594}
2595
2596/*
2597 * This function provides the context switch callback to the lower code
2598 * layer. It is invoked ONLY when the context switch callback is enabled.
2599 */
2600static void perf_pmu_sched_task(struct task_struct *prev,
2601 struct task_struct *next,
2602 bool sched_in)
2603{
2604 struct perf_cpu_context *cpuctx;
2605 struct pmu *pmu;
2606 unsigned long flags;
2607
2608 if (prev == next)
2609 return;
2610
2611 local_irq_save(flags);
2612
2613 rcu_read_lock();
2614
2615 list_for_each_entry_rcu(pmu, &pmus, entry) {
2616 if (pmu->sched_task) {
2617 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2618
2619 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2620
2621 perf_pmu_disable(pmu);
2622
2623 pmu->sched_task(cpuctx->task_ctx, sched_in);
2624
2625 perf_pmu_enable(pmu);
2626
2627 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2628 }
2629 }
2630
2631 rcu_read_unlock();
2632
2633 local_irq_restore(flags);
2634}
2635
2580#define for_each_task_context_nr(ctxn) \ 2636#define for_each_task_context_nr(ctxn) \
2581 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) 2637 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
2582 2638
@@ -2596,6 +2652,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
2596{ 2652{
2597 int ctxn; 2653 int ctxn;
2598 2654
2655 if (__this_cpu_read(perf_sched_cb_usages))
2656 perf_pmu_sched_task(task, next, false);
2657
2599 for_each_task_context_nr(ctxn) 2658 for_each_task_context_nr(ctxn)
2600 perf_event_context_sched_out(task, ctxn, next); 2659 perf_event_context_sched_out(task, ctxn, next);
2601 2660
@@ -2755,64 +2814,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2755} 2814}
2756 2815
2757/* 2816/*
2758 * When sampling the branck stack in system-wide, it may be necessary
2759 * to flush the stack on context switch. This happens when the branch
2760 * stack does not tag its entries with the pid of the current task.
2761 * Otherwise it becomes impossible to associate a branch entry with a
2762 * task. This ambiguity is more likely to appear when the branch stack
2763 * supports priv level filtering and the user sets it to monitor only
2764 * at the user level (which could be a useful measurement in system-wide
2765 * mode). In that case, the risk is high of having a branch stack with
2766 * branch from multiple tasks. Flushing may mean dropping the existing
2767 * entries or stashing them somewhere in the PMU specific code layer.
2768 *
2769 * This function provides the context switch callback to the lower code
2770 * layer. It is invoked ONLY when there is at least one system-wide context
2771 * with at least one active event using taken branch sampling.
2772 */
2773static void perf_branch_stack_sched_in(struct task_struct *prev,
2774 struct task_struct *task)
2775{
2776 struct perf_cpu_context *cpuctx;
2777 struct pmu *pmu;
2778 unsigned long flags;
2779
2780 /* no need to flush branch stack if not changing task */
2781 if (prev == task)
2782 return;
2783
2784 local_irq_save(flags);
2785
2786 rcu_read_lock();
2787
2788 list_for_each_entry_rcu(pmu, &pmus, entry) {
2789 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2790
2791 /*
2792 * check if the context has at least one
2793 * event using PERF_SAMPLE_BRANCH_STACK
2794 */
2795 if (cpuctx->ctx.nr_branch_stack > 0
2796 && pmu->flush_branch_stack) {
2797
2798 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2799
2800 perf_pmu_disable(pmu);
2801
2802 pmu->flush_branch_stack();
2803
2804 perf_pmu_enable(pmu);
2805
2806 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2807 }
2808 }
2809
2810 rcu_read_unlock();
2811
2812 local_irq_restore(flags);
2813}
2814
2815/*
2816 * Called from scheduler to add the events of the current task 2817 * Called from scheduler to add the events of the current task
2817 * with interrupts disabled. 2818 * with interrupts disabled.
2818 * 2819 *
@@ -2844,9 +2845,8 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2844 if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) 2845 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
2845 perf_cgroup_sched_in(prev, task); 2846 perf_cgroup_sched_in(prev, task);
2846 2847
2847 /* check for system-wide branch_stack events */ 2848 if (__this_cpu_read(perf_sched_cb_usages))
2848 if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) 2849 perf_pmu_sched_task(prev, task, true);
2849 perf_branch_stack_sched_in(prev, task);
2850} 2850}
2851 2851
2852static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2852static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -3321,12 +3321,15 @@ errout:
3321 * Returns a matching context with refcount and pincount. 3321 * Returns a matching context with refcount and pincount.
3322 */ 3322 */
3323static struct perf_event_context * 3323static struct perf_event_context *
3324find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) 3324find_get_context(struct pmu *pmu, struct task_struct *task,
3325 struct perf_event *event)
3325{ 3326{
3326 struct perf_event_context *ctx, *clone_ctx = NULL; 3327 struct perf_event_context *ctx, *clone_ctx = NULL;
3327 struct perf_cpu_context *cpuctx; 3328 struct perf_cpu_context *cpuctx;
3329 void *task_ctx_data = NULL;
3328 unsigned long flags; 3330 unsigned long flags;
3329 int ctxn, err; 3331 int ctxn, err;
3332 int cpu = event->cpu;
3330 3333
3331 if (!task) { 3334 if (!task) {
3332 /* Must be root to operate on a CPU event: */ 3335 /* Must be root to operate on a CPU event: */
@@ -3354,11 +3357,24 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
3354 if (ctxn < 0) 3357 if (ctxn < 0)
3355 goto errout; 3358 goto errout;
3356 3359
3360 if (event->attach_state & PERF_ATTACH_TASK_DATA) {
3361 task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL);
3362 if (!task_ctx_data) {
3363 err = -ENOMEM;
3364 goto errout;
3365 }
3366 }
3367
3357retry: 3368retry:
3358 ctx = perf_lock_task_context(task, ctxn, &flags); 3369 ctx = perf_lock_task_context(task, ctxn, &flags);
3359 if (ctx) { 3370 if (ctx) {
3360 clone_ctx = unclone_ctx(ctx); 3371 clone_ctx = unclone_ctx(ctx);
3361 ++ctx->pin_count; 3372 ++ctx->pin_count;
3373
3374 if (task_ctx_data && !ctx->task_ctx_data) {
3375 ctx->task_ctx_data = task_ctx_data;
3376 task_ctx_data = NULL;
3377 }
3362 raw_spin_unlock_irqrestore(&ctx->lock, flags); 3378 raw_spin_unlock_irqrestore(&ctx->lock, flags);
3363 3379
3364 if (clone_ctx) 3380 if (clone_ctx)
@@ -3369,6 +3385,11 @@ retry:
3369 if (!ctx) 3385 if (!ctx)
3370 goto errout; 3386 goto errout;
3371 3387
3388 if (task_ctx_data) {
3389 ctx->task_ctx_data = task_ctx_data;
3390 task_ctx_data = NULL;
3391 }
3392
3372 err = 0; 3393 err = 0;
3373 mutex_lock(&task->perf_event_mutex); 3394 mutex_lock(&task->perf_event_mutex);
3374 /* 3395 /*
@@ -3395,9 +3416,11 @@ retry:
3395 } 3416 }
3396 } 3417 }
3397 3418
3419 kfree(task_ctx_data);
3398 return ctx; 3420 return ctx;
3399 3421
3400errout: 3422errout:
3423 kfree(task_ctx_data);
3401 return ERR_PTR(err); 3424 return ERR_PTR(err);
3402} 3425}
3403 3426
@@ -3423,10 +3446,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
3423 if (event->parent) 3446 if (event->parent)
3424 return; 3447 return;
3425 3448
3426 if (has_branch_stack(event)) {
3427 if (!(event->attach_state & PERF_ATTACH_TASK))
3428 atomic_dec(&per_cpu(perf_branch_stack_events, cpu));
3429 }
3430 if (is_cgroup_event(event)) 3449 if (is_cgroup_event(event))
3431 atomic_dec(&per_cpu(perf_cgroup_events, cpu)); 3450 atomic_dec(&per_cpu(perf_cgroup_events, cpu));
3432} 3451}
@@ -6133,6 +6152,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
6133 } 6152 }
6134 6153
6135 hlist_add_head_rcu(&event->hlist_entry, head); 6154 hlist_add_head_rcu(&event->hlist_entry, head);
6155 perf_event_update_userpage(event);
6136 6156
6137 return 0; 6157 return 0;
6138} 6158}
@@ -6602,6 +6622,7 @@ static int cpu_clock_event_add(struct perf_event *event, int flags)
6602{ 6622{
6603 if (flags & PERF_EF_START) 6623 if (flags & PERF_EF_START)
6604 cpu_clock_event_start(event, flags); 6624 cpu_clock_event_start(event, flags);
6625 perf_event_update_userpage(event);
6605 6626
6606 return 0; 6627 return 0;
6607} 6628}
@@ -6676,6 +6697,7 @@ static int task_clock_event_add(struct perf_event *event, int flags)
6676{ 6697{
6677 if (flags & PERF_EF_START) 6698 if (flags & PERF_EF_START)
6678 task_clock_event_start(event, flags); 6699 task_clock_event_start(event, flags);
6700 perf_event_update_userpage(event);
6679 6701
6680 return 0; 6702 return 0;
6681} 6703}
@@ -7089,10 +7111,6 @@ static void account_event_cpu(struct perf_event *event, int cpu)
7089 if (event->parent) 7111 if (event->parent)
7090 return; 7112 return;
7091 7113
7092 if (has_branch_stack(event)) {
7093 if (!(event->attach_state & PERF_ATTACH_TASK))
7094 atomic_inc(&per_cpu(perf_branch_stack_events, cpu));
7095 }
7096 if (is_cgroup_event(event)) 7114 if (is_cgroup_event(event))
7097 atomic_inc(&per_cpu(perf_cgroup_events, cpu)); 7115 atomic_inc(&per_cpu(perf_cgroup_events, cpu));
7098} 7116}
@@ -7224,6 +7242,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
7224 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) 7242 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
7225 goto err_ns; 7243 goto err_ns;
7226 7244
7245 if (!has_branch_stack(event))
7246 event->attr.branch_sample_type = 0;
7247
7227 pmu = perf_init_event(event); 7248 pmu = perf_init_event(event);
7228 if (!pmu) 7249 if (!pmu)
7229 goto err_ns; 7250 goto err_ns;
@@ -7586,7 +7607,7 @@ SYSCALL_DEFINE5(perf_event_open,
7586 /* 7607 /*
7587 * Get the target context (task or percpu): 7608 * Get the target context (task or percpu):
7588 */ 7609 */
7589 ctx = find_get_context(pmu, task, event->cpu); 7610 ctx = find_get_context(pmu, task, event);
7590 if (IS_ERR(ctx)) { 7611 if (IS_ERR(ctx)) {
7591 err = PTR_ERR(ctx); 7612 err = PTR_ERR(ctx);
7592 goto err_alloc; 7613 goto err_alloc;
@@ -7792,7 +7813,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
7792 7813
7793 account_event(event); 7814 account_event(event);
7794 7815
7795 ctx = find_get_context(event->pmu, task, cpu); 7816 ctx = find_get_context(event->pmu, task, event);
7796 if (IS_ERR(ctx)) { 7817 if (IS_ERR(ctx)) {
7797 err = PTR_ERR(ctx); 7818 err = PTR_ERR(ctx);
7798 goto err_free; 7819 goto err_free;