diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/perf_counter.c | 109 | ||||
-rw-r--r-- | kernel/sched.c | 2 |
2 files changed, 96 insertions, 15 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 06ea3eae886e..c10055416dea 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -104,8 +104,11 @@ static void get_ctx(struct perf_counter_context *ctx) | |||
104 | 104 | ||
105 | static void put_ctx(struct perf_counter_context *ctx) | 105 | static void put_ctx(struct perf_counter_context *ctx) |
106 | { | 106 | { |
107 | if (atomic_dec_and_test(&ctx->refcount)) | 107 | if (atomic_dec_and_test(&ctx->refcount)) { |
108 | if (ctx->parent_ctx) | ||
109 | put_ctx(ctx->parent_ctx); | ||
108 | kfree(ctx); | 110 | kfree(ctx); |
111 | } | ||
109 | } | 112 | } |
110 | 113 | ||
111 | static void | 114 | static void |
@@ -127,6 +130,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
127 | 130 | ||
128 | list_add_rcu(&counter->event_entry, &ctx->event_list); | 131 | list_add_rcu(&counter->event_entry, &ctx->event_list); |
129 | ctx->nr_counters++; | 132 | ctx->nr_counters++; |
133 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
134 | ctx->nr_enabled++; | ||
130 | } | 135 | } |
131 | 136 | ||
132 | /* | 137 | /* |
@@ -141,6 +146,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
141 | if (list_empty(&counter->list_entry)) | 146 | if (list_empty(&counter->list_entry)) |
142 | return; | 147 | return; |
143 | ctx->nr_counters--; | 148 | ctx->nr_counters--; |
149 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
150 | ctx->nr_enabled--; | ||
144 | 151 | ||
145 | list_del_init(&counter->list_entry); | 152 | list_del_init(&counter->list_entry); |
146 | list_del_rcu(&counter->event_entry); | 153 | list_del_rcu(&counter->event_entry); |
@@ -204,6 +211,22 @@ group_sched_out(struct perf_counter *group_counter, | |||
204 | } | 211 | } |
205 | 212 | ||
206 | /* | 213 | /* |
214 | * Mark this context as not being a clone of another. | ||
215 | * Called when counters are added to or removed from this context. | ||
216 | * We also increment our generation number so that anything that | ||
217 | * was cloned from this context before this will not match anything | ||
218 | * cloned from this context after this. | ||
219 | */ | ||
220 | static void unclone_ctx(struct perf_counter_context *ctx) | ||
221 | { | ||
222 | ++ctx->generation; | ||
223 | if (!ctx->parent_ctx) | ||
224 | return; | ||
225 | put_ctx(ctx->parent_ctx); | ||
226 | ctx->parent_ctx = NULL; | ||
227 | } | ||
228 | |||
229 | /* | ||
207 | * Cross CPU call to remove a performance counter | 230 | * Cross CPU call to remove a performance counter |
208 | * | 231 | * |
209 | * We disable the counter on the hardware level first. After that we | 232 | * We disable the counter on the hardware level first. After that we |
@@ -263,6 +286,7 @@ static void perf_counter_remove_from_context(struct perf_counter *counter) | |||
263 | struct perf_counter_context *ctx = counter->ctx; | 286 | struct perf_counter_context *ctx = counter->ctx; |
264 | struct task_struct *task = ctx->task; | 287 | struct task_struct *task = ctx->task; |
265 | 288 | ||
289 | unclone_ctx(ctx); | ||
266 | if (!task) { | 290 | if (!task) { |
267 | /* | 291 | /* |
268 | * Per cpu counters are removed via an smp call and | 292 | * Per cpu counters are removed via an smp call and |
@@ -378,6 +402,7 @@ static void __perf_counter_disable(void *info) | |||
378 | else | 402 | else |
379 | counter_sched_out(counter, cpuctx, ctx); | 403 | counter_sched_out(counter, cpuctx, ctx); |
380 | counter->state = PERF_COUNTER_STATE_OFF; | 404 | counter->state = PERF_COUNTER_STATE_OFF; |
405 | ctx->nr_enabled--; | ||
381 | } | 406 | } |
382 | 407 | ||
383 | spin_unlock_irqrestore(&ctx->lock, flags); | 408 | spin_unlock_irqrestore(&ctx->lock, flags); |
@@ -419,6 +444,7 @@ static void perf_counter_disable(struct perf_counter *counter) | |||
419 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | 444 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { |
420 | update_counter_times(counter); | 445 | update_counter_times(counter); |
421 | counter->state = PERF_COUNTER_STATE_OFF; | 446 | counter->state = PERF_COUNTER_STATE_OFF; |
447 | ctx->nr_enabled--; | ||
422 | } | 448 | } |
423 | 449 | ||
424 | spin_unlock_irq(&ctx->lock); | 450 | spin_unlock_irq(&ctx->lock); |
@@ -727,6 +753,7 @@ static void __perf_counter_enable(void *info) | |||
727 | goto unlock; | 753 | goto unlock; |
728 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 754 | counter->state = PERF_COUNTER_STATE_INACTIVE; |
729 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | 755 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; |
756 | ctx->nr_enabled++; | ||
730 | 757 | ||
731 | /* | 758 | /* |
732 | * If the counter is in a group and isn't the group leader, | 759 | * If the counter is in a group and isn't the group leader, |
@@ -817,6 +844,7 @@ static void perf_counter_enable(struct perf_counter *counter) | |||
817 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 844 | counter->state = PERF_COUNTER_STATE_INACTIVE; |
818 | counter->tstamp_enabled = | 845 | counter->tstamp_enabled = |
819 | ctx->time - counter->total_time_enabled; | 846 | ctx->time - counter->total_time_enabled; |
847 | ctx->nr_enabled++; | ||
820 | } | 848 | } |
821 | out: | 849 | out: |
822 | spin_unlock_irq(&ctx->lock); | 850 | spin_unlock_irq(&ctx->lock); |
@@ -862,6 +890,25 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, | |||
862 | } | 890 | } |
863 | 891 | ||
864 | /* | 892 | /* |
893 | * Test whether two contexts are equivalent, i.e. whether they | ||
894 | * have both been cloned from the same version of the same context | ||
895 | * and they both have the same number of enabled counters. | ||
896 | * If the number of enabled counters is the same, then the set | ||
897 | * of enabled counters should be the same, because these are both | ||
898 | * inherited contexts, therefore we can't access individual counters | ||
899 | * in them directly with an fd; we can only enable/disable all | ||
900 | * counters via prctl, or enable/disable all counters in a family | ||
901 | * via ioctl, which will have the same effect on both contexts. | ||
902 | */ | ||
903 | static int context_equiv(struct perf_counter_context *ctx1, | ||
904 | struct perf_counter_context *ctx2) | ||
905 | { | ||
906 | return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx | ||
907 | && ctx1->parent_gen == ctx2->parent_gen | ||
908 | && ctx1->nr_enabled == ctx2->nr_enabled; | ||
909 | } | ||
910 | |||
911 | /* | ||
865 | * Called from scheduler to remove the counters of the current task, | 912 | * Called from scheduler to remove the counters of the current task, |
866 | * with interrupts disabled. | 913 | * with interrupts disabled. |
867 | * | 914 | * |
@@ -872,10 +919,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, | |||
872 | * accessing the counter control register. If a NMI hits, then it will | 919 | * accessing the counter control register. If a NMI hits, then it will |
873 | * not restart the counter. | 920 | * not restart the counter. |
874 | */ | 921 | */ |
875 | void perf_counter_task_sched_out(struct task_struct *task, int cpu) | 922 | void perf_counter_task_sched_out(struct task_struct *task, |
923 | struct task_struct *next, int cpu) | ||
876 | { | 924 | { |
877 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 925 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
878 | struct perf_counter_context *ctx = task->perf_counter_ctxp; | 926 | struct perf_counter_context *ctx = task->perf_counter_ctxp; |
927 | struct perf_counter_context *next_ctx; | ||
879 | struct pt_regs *regs; | 928 | struct pt_regs *regs; |
880 | 929 | ||
881 | if (likely(!ctx || !cpuctx->task_ctx)) | 930 | if (likely(!ctx || !cpuctx->task_ctx)) |
@@ -885,6 +934,16 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) | |||
885 | 934 | ||
886 | regs = task_pt_regs(task); | 935 | regs = task_pt_regs(task); |
887 | perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); | 936 | perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); |
937 | |||
938 | next_ctx = next->perf_counter_ctxp; | ||
939 | if (next_ctx && context_equiv(ctx, next_ctx)) { | ||
940 | task->perf_counter_ctxp = next_ctx; | ||
941 | next->perf_counter_ctxp = ctx; | ||
942 | ctx->task = next; | ||
943 | next_ctx->task = task; | ||
944 | return; | ||
945 | } | ||
946 | |||
888 | __perf_counter_sched_out(ctx, cpuctx); | 947 | __perf_counter_sched_out(ctx, cpuctx); |
889 | 948 | ||
890 | cpuctx->task_ctx = NULL; | 949 | cpuctx->task_ctx = NULL; |
@@ -998,6 +1057,8 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) | |||
998 | 1057 | ||
999 | if (likely(!ctx)) | 1058 | if (likely(!ctx)) |
1000 | return; | 1059 | return; |
1060 | if (cpuctx->task_ctx == ctx) | ||
1061 | return; | ||
1001 | __perf_counter_sched_in(ctx, cpuctx, cpu); | 1062 | __perf_counter_sched_in(ctx, cpuctx, cpu); |
1002 | cpuctx->task_ctx = ctx; | 1063 | cpuctx->task_ctx = ctx; |
1003 | } | 1064 | } |
@@ -3253,6 +3314,16 @@ inherit_counter(struct perf_counter *parent_counter, | |||
3253 | return child_counter; | 3314 | return child_counter; |
3254 | 3315 | ||
3255 | /* | 3316 | /* |
3317 | * Make the child state follow the state of the parent counter, | ||
3318 | * not its hw_event.disabled bit. We hold the parent's mutex, | ||
3319 | * so we won't race with perf_counter_{en,dis}able_family. | ||
3320 | */ | ||
3321 | if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
3322 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
3323 | else | ||
3324 | child_counter->state = PERF_COUNTER_STATE_OFF; | ||
3325 | |||
3326 | /* | ||
3256 | * Link it up in the child's context: | 3327 | * Link it up in the child's context: |
3257 | */ | 3328 | */ |
3258 | add_counter_to_ctx(child_counter, child_ctx); | 3329 | add_counter_to_ctx(child_counter, child_ctx); |
@@ -3277,16 +3348,6 @@ inherit_counter(struct perf_counter *parent_counter, | |||
3277 | mutex_lock(&parent_counter->mutex); | 3348 | mutex_lock(&parent_counter->mutex); |
3278 | list_add_tail(&child_counter->child_list, &parent_counter->child_list); | 3349 | list_add_tail(&child_counter->child_list, &parent_counter->child_list); |
3279 | 3350 | ||
3280 | /* | ||
3281 | * Make the child state follow the state of the parent counter, | ||
3282 | * not its hw_event.disabled bit. We hold the parent's mutex, | ||
3283 | * so we won't race with perf_counter_{en,dis}able_family. | ||
3284 | */ | ||
3285 | if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
3286 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
3287 | else | ||
3288 | child_counter->state = PERF_COUNTER_STATE_OFF; | ||
3289 | |||
3290 | mutex_unlock(&parent_counter->mutex); | 3351 | mutex_unlock(&parent_counter->mutex); |
3291 | 3352 | ||
3292 | return child_counter; | 3353 | return child_counter; |
@@ -3429,6 +3490,7 @@ void perf_counter_init_task(struct task_struct *child) | |||
3429 | struct perf_counter_context *child_ctx, *parent_ctx; | 3490 | struct perf_counter_context *child_ctx, *parent_ctx; |
3430 | struct perf_counter *counter; | 3491 | struct perf_counter *counter; |
3431 | struct task_struct *parent = current; | 3492 | struct task_struct *parent = current; |
3493 | int inherited_all = 1; | ||
3432 | 3494 | ||
3433 | child->perf_counter_ctxp = NULL; | 3495 | child->perf_counter_ctxp = NULL; |
3434 | 3496 | ||
@@ -3463,12 +3525,31 @@ void perf_counter_init_task(struct task_struct *child) | |||
3463 | if (counter != counter->group_leader) | 3525 | if (counter != counter->group_leader) |
3464 | continue; | 3526 | continue; |
3465 | 3527 | ||
3466 | if (!counter->hw_event.inherit) | 3528 | if (!counter->hw_event.inherit) { |
3529 | inherited_all = 0; | ||
3467 | continue; | 3530 | continue; |
3531 | } | ||
3468 | 3532 | ||
3469 | if (inherit_group(counter, parent, | 3533 | if (inherit_group(counter, parent, |
3470 | parent_ctx, child, child_ctx)) | 3534 | parent_ctx, child, child_ctx)) { |
3535 | inherited_all = 0; | ||
3471 | break; | 3536 | break; |
3537 | } | ||
3538 | } | ||
3539 | |||
3540 | if (inherited_all) { | ||
3541 | /* | ||
3542 | * Mark the child context as a clone of the parent | ||
3543 | * context, or of whatever the parent is a clone of. | ||
3544 | */ | ||
3545 | if (parent_ctx->parent_ctx) { | ||
3546 | child_ctx->parent_ctx = parent_ctx->parent_ctx; | ||
3547 | child_ctx->parent_gen = parent_ctx->parent_gen; | ||
3548 | } else { | ||
3549 | child_ctx->parent_ctx = parent_ctx; | ||
3550 | child_ctx->parent_gen = parent_ctx->generation; | ||
3551 | } | ||
3552 | get_ctx(child_ctx->parent_ctx); | ||
3472 | } | 3553 | } |
3473 | 3554 | ||
3474 | mutex_unlock(&parent_ctx->mutex); | 3555 | mutex_unlock(&parent_ctx->mutex); |
diff --git a/kernel/sched.c b/kernel/sched.c index 419a39d0988f..4c0d58bce6b2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -5091,7 +5091,7 @@ need_resched_nonpreemptible: | |||
5091 | 5091 | ||
5092 | if (likely(prev != next)) { | 5092 | if (likely(prev != next)) { |
5093 | sched_info_switch(prev, next); | 5093 | sched_info_switch(prev, next); |
5094 | perf_counter_task_sched_out(prev, cpu); | 5094 | perf_counter_task_sched_out(prev, next, cpu); |
5095 | 5095 | ||
5096 | rq->nr_switches++; | 5096 | rq->nr_switches++; |
5097 | rq->curr = next; | 5097 | rq->curr = next; |