aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/perf_counter.c109
-rw-r--r--kernel/sched.c2
2 files changed, 96 insertions, 15 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 06ea3eae886e..c10055416dea 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -104,8 +104,11 @@ static void get_ctx(struct perf_counter_context *ctx)
104 104
105static void put_ctx(struct perf_counter_context *ctx) 105static void put_ctx(struct perf_counter_context *ctx)
106{ 106{
107 if (atomic_dec_and_test(&ctx->refcount)) 107 if (atomic_dec_and_test(&ctx->refcount)) {
108 if (ctx->parent_ctx)
109 put_ctx(ctx->parent_ctx);
108 kfree(ctx); 110 kfree(ctx);
111 }
109} 112}
110 113
111static void 114static void
@@ -127,6 +130,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
127 130
128 list_add_rcu(&counter->event_entry, &ctx->event_list); 131 list_add_rcu(&counter->event_entry, &ctx->event_list);
129 ctx->nr_counters++; 132 ctx->nr_counters++;
133 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
134 ctx->nr_enabled++;
130} 135}
131 136
132/* 137/*
@@ -141,6 +146,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
141 if (list_empty(&counter->list_entry)) 146 if (list_empty(&counter->list_entry))
142 return; 147 return;
143 ctx->nr_counters--; 148 ctx->nr_counters--;
149 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
150 ctx->nr_enabled--;
144 151
145 list_del_init(&counter->list_entry); 152 list_del_init(&counter->list_entry);
146 list_del_rcu(&counter->event_entry); 153 list_del_rcu(&counter->event_entry);
@@ -204,6 +211,22 @@ group_sched_out(struct perf_counter *group_counter,
204} 211}
205 212
206/* 213/*
214 * Mark this context as not being a clone of another.
215 * Called when counters are added to or removed from this context.
216 * We also increment our generation number so that anything that
217 * was cloned from this context before this will not match anything
218 * cloned from this context after this.
219 */
220static void unclone_ctx(struct perf_counter_context *ctx)
221{
222 ++ctx->generation;
223 if (!ctx->parent_ctx)
224 return;
225 put_ctx(ctx->parent_ctx);
226 ctx->parent_ctx = NULL;
227}
228
229/*
207 * Cross CPU call to remove a performance counter 230 * Cross CPU call to remove a performance counter
208 * 231 *
209 * We disable the counter on the hardware level first. After that we 232 * We disable the counter on the hardware level first. After that we
@@ -263,6 +286,7 @@ static void perf_counter_remove_from_context(struct perf_counter *counter)
263 struct perf_counter_context *ctx = counter->ctx; 286 struct perf_counter_context *ctx = counter->ctx;
264 struct task_struct *task = ctx->task; 287 struct task_struct *task = ctx->task;
265 288
289 unclone_ctx(ctx);
266 if (!task) { 290 if (!task) {
267 /* 291 /*
268 * Per cpu counters are removed via an smp call and 292 * Per cpu counters are removed via an smp call and
@@ -378,6 +402,7 @@ static void __perf_counter_disable(void *info)
378 else 402 else
379 counter_sched_out(counter, cpuctx, ctx); 403 counter_sched_out(counter, cpuctx, ctx);
380 counter->state = PERF_COUNTER_STATE_OFF; 404 counter->state = PERF_COUNTER_STATE_OFF;
405 ctx->nr_enabled--;
381 } 406 }
382 407
383 spin_unlock_irqrestore(&ctx->lock, flags); 408 spin_unlock_irqrestore(&ctx->lock, flags);
@@ -419,6 +444,7 @@ static void perf_counter_disable(struct perf_counter *counter)
419 if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 444 if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
420 update_counter_times(counter); 445 update_counter_times(counter);
421 counter->state = PERF_COUNTER_STATE_OFF; 446 counter->state = PERF_COUNTER_STATE_OFF;
447 ctx->nr_enabled--;
422 } 448 }
423 449
424 spin_unlock_irq(&ctx->lock); 450 spin_unlock_irq(&ctx->lock);
@@ -727,6 +753,7 @@ static void __perf_counter_enable(void *info)
727 goto unlock; 753 goto unlock;
728 counter->state = PERF_COUNTER_STATE_INACTIVE; 754 counter->state = PERF_COUNTER_STATE_INACTIVE;
729 counter->tstamp_enabled = ctx->time - counter->total_time_enabled; 755 counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
756 ctx->nr_enabled++;
730 757
731 /* 758 /*
732 * If the counter is in a group and isn't the group leader, 759 * If the counter is in a group and isn't the group leader,
@@ -817,6 +844,7 @@ static void perf_counter_enable(struct perf_counter *counter)
817 counter->state = PERF_COUNTER_STATE_INACTIVE; 844 counter->state = PERF_COUNTER_STATE_INACTIVE;
818 counter->tstamp_enabled = 845 counter->tstamp_enabled =
819 ctx->time - counter->total_time_enabled; 846 ctx->time - counter->total_time_enabled;
847 ctx->nr_enabled++;
820 } 848 }
821 out: 849 out:
822 spin_unlock_irq(&ctx->lock); 850 spin_unlock_irq(&ctx->lock);
@@ -862,6 +890,25 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
862} 890}
863 891
864/* 892/*
893 * Test whether two contexts are equivalent, i.e. whether they
894 * have both been cloned from the same version of the same context
895 * and they both have the same number of enabled counters.
896 * If the number of enabled counters is the same, then the set
897 * of enabled counters should be the same, because these are both
898 * inherited contexts, therefore we can't access individual counters
899 * in them directly with an fd; we can only enable/disable all
900 * counters via prctl, or enable/disable all counters in a family
901 * via ioctl, which will have the same effect on both contexts.
902 */
903static int context_equiv(struct perf_counter_context *ctx1,
904 struct perf_counter_context *ctx2)
905{
906 return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
907 && ctx1->parent_gen == ctx2->parent_gen
908 && ctx1->nr_enabled == ctx2->nr_enabled;
909}
910
911/*
865 * Called from scheduler to remove the counters of the current task, 912 * Called from scheduler to remove the counters of the current task,
866 * with interrupts disabled. 913 * with interrupts disabled.
867 * 914 *
@@ -872,10 +919,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
872 * accessing the counter control register. If a NMI hits, then it will 919 * accessing the counter control register. If a NMI hits, then it will
873 * not restart the counter. 920 * not restart the counter.
874 */ 921 */
875void perf_counter_task_sched_out(struct task_struct *task, int cpu) 922void perf_counter_task_sched_out(struct task_struct *task,
923 struct task_struct *next, int cpu)
876{ 924{
877 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 925 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
878 struct perf_counter_context *ctx = task->perf_counter_ctxp; 926 struct perf_counter_context *ctx = task->perf_counter_ctxp;
927 struct perf_counter_context *next_ctx;
879 struct pt_regs *regs; 928 struct pt_regs *regs;
880 929
881 if (likely(!ctx || !cpuctx->task_ctx)) 930 if (likely(!ctx || !cpuctx->task_ctx))
@@ -885,6 +934,16 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
885 934
886 regs = task_pt_regs(task); 935 regs = task_pt_regs(task);
887 perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); 936 perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
937
938 next_ctx = next->perf_counter_ctxp;
939 if (next_ctx && context_equiv(ctx, next_ctx)) {
940 task->perf_counter_ctxp = next_ctx;
941 next->perf_counter_ctxp = ctx;
942 ctx->task = next;
943 next_ctx->task = task;
944 return;
945 }
946
888 __perf_counter_sched_out(ctx, cpuctx); 947 __perf_counter_sched_out(ctx, cpuctx);
889 948
890 cpuctx->task_ctx = NULL; 949 cpuctx->task_ctx = NULL;
@@ -998,6 +1057,8 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
998 1057
999 if (likely(!ctx)) 1058 if (likely(!ctx))
1000 return; 1059 return;
1060 if (cpuctx->task_ctx == ctx)
1061 return;
1001 __perf_counter_sched_in(ctx, cpuctx, cpu); 1062 __perf_counter_sched_in(ctx, cpuctx, cpu);
1002 cpuctx->task_ctx = ctx; 1063 cpuctx->task_ctx = ctx;
1003} 1064}
@@ -3253,6 +3314,16 @@ inherit_counter(struct perf_counter *parent_counter,
3253 return child_counter; 3314 return child_counter;
3254 3315
3255 /* 3316 /*
3317 * Make the child state follow the state of the parent counter,
3318 * not its hw_event.disabled bit. We hold the parent's mutex,
3319 * so we won't race with perf_counter_{en,dis}able_family.
3320 */
3321 if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
3322 child_counter->state = PERF_COUNTER_STATE_INACTIVE;
3323 else
3324 child_counter->state = PERF_COUNTER_STATE_OFF;
3325
3326 /*
3256 * Link it up in the child's context: 3327 * Link it up in the child's context:
3257 */ 3328 */
3258 add_counter_to_ctx(child_counter, child_ctx); 3329 add_counter_to_ctx(child_counter, child_ctx);
@@ -3277,16 +3348,6 @@ inherit_counter(struct perf_counter *parent_counter,
3277 mutex_lock(&parent_counter->mutex); 3348 mutex_lock(&parent_counter->mutex);
3278 list_add_tail(&child_counter->child_list, &parent_counter->child_list); 3349 list_add_tail(&child_counter->child_list, &parent_counter->child_list);
3279 3350
3280 /*
3281 * Make the child state follow the state of the parent counter,
3282 * not its hw_event.disabled bit. We hold the parent's mutex,
3283 * so we won't race with perf_counter_{en,dis}able_family.
3284 */
3285 if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
3286 child_counter->state = PERF_COUNTER_STATE_INACTIVE;
3287 else
3288 child_counter->state = PERF_COUNTER_STATE_OFF;
3289
3290 mutex_unlock(&parent_counter->mutex); 3351 mutex_unlock(&parent_counter->mutex);
3291 3352
3292 return child_counter; 3353 return child_counter;
@@ -3429,6 +3490,7 @@ void perf_counter_init_task(struct task_struct *child)
3429 struct perf_counter_context *child_ctx, *parent_ctx; 3490 struct perf_counter_context *child_ctx, *parent_ctx;
3430 struct perf_counter *counter; 3491 struct perf_counter *counter;
3431 struct task_struct *parent = current; 3492 struct task_struct *parent = current;
3493 int inherited_all = 1;
3432 3494
3433 child->perf_counter_ctxp = NULL; 3495 child->perf_counter_ctxp = NULL;
3434 3496
@@ -3463,12 +3525,31 @@ void perf_counter_init_task(struct task_struct *child)
3463 if (counter != counter->group_leader) 3525 if (counter != counter->group_leader)
3464 continue; 3526 continue;
3465 3527
3466 if (!counter->hw_event.inherit) 3528 if (!counter->hw_event.inherit) {
3529 inherited_all = 0;
3467 continue; 3530 continue;
3531 }
3468 3532
3469 if (inherit_group(counter, parent, 3533 if (inherit_group(counter, parent,
3470 parent_ctx, child, child_ctx)) 3534 parent_ctx, child, child_ctx)) {
3535 inherited_all = 0;
3471 break; 3536 break;
3537 }
3538 }
3539
3540 if (inherited_all) {
3541 /*
3542 * Mark the child context as a clone of the parent
3543 * context, or of whatever the parent is a clone of.
3544 */
3545 if (parent_ctx->parent_ctx) {
3546 child_ctx->parent_ctx = parent_ctx->parent_ctx;
3547 child_ctx->parent_gen = parent_ctx->parent_gen;
3548 } else {
3549 child_ctx->parent_ctx = parent_ctx;
3550 child_ctx->parent_gen = parent_ctx->generation;
3551 }
3552 get_ctx(child_ctx->parent_ctx);
3472 } 3553 }
3473 3554
3474 mutex_unlock(&parent_ctx->mutex); 3555 mutex_unlock(&parent_ctx->mutex);
diff --git a/kernel/sched.c b/kernel/sched.c
index 419a39d0988f..4c0d58bce6b2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5091,7 +5091,7 @@ need_resched_nonpreemptible:
5091 5091
5092 if (likely(prev != next)) { 5092 if (likely(prev != next)) {
5093 sched_info_switch(prev, next); 5093 sched_info_switch(prev, next);
5094 perf_counter_task_sched_out(prev, cpu); 5094 perf_counter_task_sched_out(prev, next, cpu);
5095 5095
5096 rq->nr_switches++; 5096 rq->nr_switches++;
5097 rq->curr = next; 5097 rq->curr = next;