diff options
author | Paul Mackerras <paulus@samba.org> | 2009-05-22 00:17:31 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-22 06:18:19 -0400 |
commit | a63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 (patch) | |
tree | 9e81e5e0299bd524b3d07c17a05760e33c7d58a0 | |
parent | 34adc8062227f41b04ade0ff3fbd1dbe3002669e (diff) |
perf_counter: Dynamically allocate tasks' perf_counter_context struct
This replaces the struct perf_counter_context in the task_struct with
a pointer to a dynamically allocated perf_counter_context struct. The
main reason for doing is this is to allow us to transfer a
perf_counter_context from one task to another when we do lazy PMU
switching in a later patch.
This has a few side-benefits: the task_struct becomes a little smaller,
we save some memory because only tasks that have perf_counters attached
get a perf_counter_context allocated for them, and we can remove the
inclusion of <linux/perf_counter.h> in sched.h, meaning that we don't
end up recompiling nearly everything whenever perf_counter.h changes.
The perf_counter_context structures are reference-counted and freed
when the last reference is dropped. A context can have references
from its task and the counters on its task. Counters can outlive the
task so it is possible that a context will be freed well after its
task has exited.
Contexts are allocated on fork if the parent had a context, or
otherwise the first time that a per-task counter is created on a task.
In the latter case, we set the context pointer in the task struct
locklessly using an atomic compare-and-exchange operation in case we
raced with some other task in creating a context for the subject task.
This also removes the task pointer from the perf_counter struct. The
task pointer was not used anywhere and would make it harder to move a
context from one task to another. Anything that needed to know which
task a counter was attached to was already using counter->ctx->task.
The __perf_counter_init_context function moves up in perf_counter.c
so that it can be called from find_get_context, and now initializes
the refcount, but is otherwise unchanged.
We were potentially calling list_del_counter twice: once from
__perf_counter_exit_task when the task exits and once from
__perf_counter_remove_from_context when the counter's fd gets closed.
This adds a check in list_del_counter so it doesn't do anything if
the counter has already been removed from the lists.
Since perf_counter_task_sched_in doesn't do anything if the task doesn't
have a context, and leaves cpuctx->task_ctx = NULL, this adds code to
__perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in
the case where the current task adds the first counter to itself and
thus creates a context for itself.
This also adds similar code to __perf_counter_enable to handle a
similar situation which can arise when the counters have been disabled
using prctl; that also leaves cpuctx->task_ctx = NULL.
[ Impact: refactor counter context management to prepare for new feature ]
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 1 | ||||
-rw-r--r-- | include/linux/init_task.h | 13 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 6 | ||||
-rw-r--r-- | kernel/exit.c | 3 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/perf_counter.c | 218 |
7 files changed, 145 insertions, 101 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e9021a908020..b4f64402a82a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -14,6 +14,7 @@ | |||
14 | * Mikael Pettersson : PM converted to driver model. | 14 | * Mikael Pettersson : PM converted to driver model. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/perf_counter.h> | ||
17 | #include <linux/kernel_stat.h> | 18 | #include <linux/kernel_stat.h> |
18 | #include <linux/mc146818rtc.h> | 19 | #include <linux/mc146818rtc.h> |
19 | #include <linux/acpi_pmtmr.h> | 20 | #include <linux/acpi_pmtmr.h> |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 503afaa0afa7..d87247d2641f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -108,18 +108,6 @@ extern struct group_info init_groups; | |||
108 | 108 | ||
109 | extern struct cred init_cred; | 109 | extern struct cred init_cred; |
110 | 110 | ||
111 | #ifdef CONFIG_PERF_COUNTERS | ||
112 | # define INIT_PERF_COUNTERS(tsk) \ | ||
113 | .perf_counter_ctx.counter_list = \ | ||
114 | LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ | ||
115 | .perf_counter_ctx.event_list = \ | ||
116 | LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ | ||
117 | .perf_counter_ctx.lock = \ | ||
118 | __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), | ||
119 | #else | ||
120 | # define INIT_PERF_COUNTERS(tsk) | ||
121 | #endif | ||
122 | |||
123 | /* | 111 | /* |
124 | * INIT_TASK is used to set up the first task table, touch at | 112 | * INIT_TASK is used to set up the first task table, touch at |
125 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | 113 | * your own risk!. Base=0, limit=0x1fffff (=2MB) |
@@ -183,7 +171,6 @@ extern struct cred init_cred; | |||
183 | }, \ | 171 | }, \ |
184 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ | 172 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ |
185 | INIT_IDS \ | 173 | INIT_IDS \ |
186 | INIT_PERF_COUNTERS(tsk) \ | ||
187 | INIT_TRACE_IRQFLAGS \ | 174 | INIT_TRACE_IRQFLAGS \ |
188 | INIT_LOCKDEP \ | 175 | INIT_LOCKDEP \ |
189 | INIT_FTRACE_GRAPH \ | 176 | INIT_FTRACE_GRAPH \ |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f612941ef46e..071309005468 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -449,7 +449,6 @@ struct perf_counter { | |||
449 | struct hw_perf_counter hw; | 449 | struct hw_perf_counter hw; |
450 | 450 | ||
451 | struct perf_counter_context *ctx; | 451 | struct perf_counter_context *ctx; |
452 | struct task_struct *task; | ||
453 | struct file *filp; | 452 | struct file *filp; |
454 | 453 | ||
455 | struct perf_counter *parent; | 454 | struct perf_counter *parent; |
@@ -498,7 +497,6 @@ struct perf_counter { | |||
498 | * Used as a container for task counters and CPU counters as well: | 497 | * Used as a container for task counters and CPU counters as well: |
499 | */ | 498 | */ |
500 | struct perf_counter_context { | 499 | struct perf_counter_context { |
501 | #ifdef CONFIG_PERF_COUNTERS | ||
502 | /* | 500 | /* |
503 | * Protect the states of the counters in the list, | 501 | * Protect the states of the counters in the list, |
504 | * nr_active, and the list: | 502 | * nr_active, and the list: |
@@ -516,6 +514,7 @@ struct perf_counter_context { | |||
516 | int nr_counters; | 514 | int nr_counters; |
517 | int nr_active; | 515 | int nr_active; |
518 | int is_active; | 516 | int is_active; |
517 | atomic_t refcount; | ||
519 | struct task_struct *task; | 518 | struct task_struct *task; |
520 | 519 | ||
521 | /* | 520 | /* |
@@ -523,7 +522,6 @@ struct perf_counter_context { | |||
523 | */ | 522 | */ |
524 | u64 time; | 523 | u64 time; |
525 | u64 timestamp; | 524 | u64 timestamp; |
526 | #endif | ||
527 | }; | 525 | }; |
528 | 526 | ||
529 | /** | 527 | /** |
diff --git a/include/linux/sched.h b/include/linux/sched.h index ff59d1231519..9714d450f417 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -71,7 +71,6 @@ struct sched_param { | |||
71 | #include <linux/path.h> | 71 | #include <linux/path.h> |
72 | #include <linux/compiler.h> | 72 | #include <linux/compiler.h> |
73 | #include <linux/completion.h> | 73 | #include <linux/completion.h> |
74 | #include <linux/perf_counter.h> | ||
75 | #include <linux/pid.h> | 74 | #include <linux/pid.h> |
76 | #include <linux/percpu.h> | 75 | #include <linux/percpu.h> |
77 | #include <linux/topology.h> | 76 | #include <linux/topology.h> |
@@ -99,6 +98,7 @@ struct robust_list_head; | |||
99 | struct bio; | 98 | struct bio; |
100 | struct bts_tracer; | 99 | struct bts_tracer; |
101 | struct fs_struct; | 100 | struct fs_struct; |
101 | struct perf_counter_context; | ||
102 | 102 | ||
103 | /* | 103 | /* |
104 | * List of flags we want to share for kernel threads, | 104 | * List of flags we want to share for kernel threads, |
@@ -1387,7 +1387,9 @@ struct task_struct { | |||
1387 | struct list_head pi_state_list; | 1387 | struct list_head pi_state_list; |
1388 | struct futex_pi_state *pi_state_cache; | 1388 | struct futex_pi_state *pi_state_cache; |
1389 | #endif | 1389 | #endif |
1390 | struct perf_counter_context perf_counter_ctx; | 1390 | #ifdef CONFIG_PERF_COUNTERS |
1391 | struct perf_counter_context *perf_counter_ctxp; | ||
1392 | #endif | ||
1391 | #ifdef CONFIG_NUMA | 1393 | #ifdef CONFIG_NUMA |
1392 | struct mempolicy *mempolicy; | 1394 | struct mempolicy *mempolicy; |
1393 | short il_next; | 1395 | short il_next; |
diff --git a/kernel/exit.c b/kernel/exit.c index f9dfedd94af0..99ad4063ee4a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/tracehook.h> | 48 | #include <linux/tracehook.h> |
49 | #include <linux/fs_struct.h> | 49 | #include <linux/fs_struct.h> |
50 | #include <linux/init_task.h> | 50 | #include <linux/init_task.h> |
51 | #include <linux/perf_counter.h> | ||
51 | #include <trace/sched.h> | 52 | #include <trace/sched.h> |
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
@@ -159,7 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
159 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 160 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
160 | 161 | ||
161 | #ifdef CONFIG_PERF_COUNTERS | 162 | #ifdef CONFIG_PERF_COUNTERS |
162 | WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); | 163 | WARN_ON_ONCE(tsk->perf_counter_ctxp); |
163 | #endif | 164 | #endif |
164 | trace_sched_process_free(tsk); | 165 | trace_sched_process_free(tsk); |
165 | put_task_struct(tsk); | 166 | put_task_struct(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index d32fef4d38e5..e72a09f5355b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <linux/fs_struct.h> | 63 | #include <linux/fs_struct.h> |
64 | #include <trace/sched.h> | 64 | #include <trace/sched.h> |
65 | #include <linux/magic.h> | 65 | #include <linux/magic.h> |
66 | #include <linux/perf_counter.h> | ||
66 | 67 | ||
67 | #include <asm/pgtable.h> | 68 | #include <asm/pgtable.h> |
68 | #include <asm/pgalloc.h> | 69 | #include <asm/pgalloc.h> |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 08584c16049f..06ea3eae886e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -97,6 +97,17 @@ void perf_enable(void) | |||
97 | hw_perf_enable(); | 97 | hw_perf_enable(); |
98 | } | 98 | } |
99 | 99 | ||
100 | static void get_ctx(struct perf_counter_context *ctx) | ||
101 | { | ||
102 | atomic_inc(&ctx->refcount); | ||
103 | } | ||
104 | |||
105 | static void put_ctx(struct perf_counter_context *ctx) | ||
106 | { | ||
107 | if (atomic_dec_and_test(&ctx->refcount)) | ||
108 | kfree(ctx); | ||
109 | } | ||
110 | |||
100 | static void | 111 | static void |
101 | list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | 112 | list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) |
102 | { | 113 | { |
@@ -118,11 +129,17 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
118 | ctx->nr_counters++; | 129 | ctx->nr_counters++; |
119 | } | 130 | } |
120 | 131 | ||
132 | /* | ||
133 | * Remove a counter from the lists for its context. | ||
134 | * Must be called with counter->mutex and ctx->mutex held. | ||
135 | */ | ||
121 | static void | 136 | static void |
122 | list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | 137 | list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) |
123 | { | 138 | { |
124 | struct perf_counter *sibling, *tmp; | 139 | struct perf_counter *sibling, *tmp; |
125 | 140 | ||
141 | if (list_empty(&counter->list_entry)) | ||
142 | return; | ||
126 | ctx->nr_counters--; | 143 | ctx->nr_counters--; |
127 | 144 | ||
128 | list_del_init(&counter->list_entry); | 145 | list_del_init(&counter->list_entry); |
@@ -216,8 +233,6 @@ static void __perf_counter_remove_from_context(void *info) | |||
216 | 233 | ||
217 | counter_sched_out(counter, cpuctx, ctx); | 234 | counter_sched_out(counter, cpuctx, ctx); |
218 | 235 | ||
219 | counter->task = NULL; | ||
220 | |||
221 | list_del_counter(counter, ctx); | 236 | list_del_counter(counter, ctx); |
222 | 237 | ||
223 | if (!ctx->task) { | 238 | if (!ctx->task) { |
@@ -279,7 +294,6 @@ retry: | |||
279 | */ | 294 | */ |
280 | if (!list_empty(&counter->list_entry)) { | 295 | if (!list_empty(&counter->list_entry)) { |
281 | list_del_counter(counter, ctx); | 296 | list_del_counter(counter, ctx); |
282 | counter->task = NULL; | ||
283 | } | 297 | } |
284 | spin_unlock_irq(&ctx->lock); | 298 | spin_unlock_irq(&ctx->lock); |
285 | } | 299 | } |
@@ -568,11 +582,17 @@ static void __perf_install_in_context(void *info) | |||
568 | * If this is a task context, we need to check whether it is | 582 | * If this is a task context, we need to check whether it is |
569 | * the current task context of this cpu. If not it has been | 583 | * the current task context of this cpu. If not it has been |
570 | * scheduled out before the smp call arrived. | 584 | * scheduled out before the smp call arrived. |
585 | * Or possibly this is the right context but it isn't | ||
586 | * on this cpu because it had no counters. | ||
571 | */ | 587 | */ |
572 | if (ctx->task && cpuctx->task_ctx != ctx) | 588 | if (ctx->task && cpuctx->task_ctx != ctx) { |
573 | return; | 589 | if (cpuctx->task_ctx || ctx->task != current) |
590 | return; | ||
591 | cpuctx->task_ctx = ctx; | ||
592 | } | ||
574 | 593 | ||
575 | spin_lock_irqsave(&ctx->lock, flags); | 594 | spin_lock_irqsave(&ctx->lock, flags); |
595 | ctx->is_active = 1; | ||
576 | update_context_time(ctx); | 596 | update_context_time(ctx); |
577 | 597 | ||
578 | /* | 598 | /* |
@@ -653,7 +673,6 @@ perf_install_in_context(struct perf_counter_context *ctx, | |||
653 | return; | 673 | return; |
654 | } | 674 | } |
655 | 675 | ||
656 | counter->task = task; | ||
657 | retry: | 676 | retry: |
658 | task_oncpu_function_call(task, __perf_install_in_context, | 677 | task_oncpu_function_call(task, __perf_install_in_context, |
659 | counter); | 678 | counter); |
@@ -693,10 +712,14 @@ static void __perf_counter_enable(void *info) | |||
693 | * If this is a per-task counter, need to check whether this | 712 | * If this is a per-task counter, need to check whether this |
694 | * counter's task is the current task on this cpu. | 713 | * counter's task is the current task on this cpu. |
695 | */ | 714 | */ |
696 | if (ctx->task && cpuctx->task_ctx != ctx) | 715 | if (ctx->task && cpuctx->task_ctx != ctx) { |
697 | return; | 716 | if (cpuctx->task_ctx || ctx->task != current) |
717 | return; | ||
718 | cpuctx->task_ctx = ctx; | ||
719 | } | ||
698 | 720 | ||
699 | spin_lock_irqsave(&ctx->lock, flags); | 721 | spin_lock_irqsave(&ctx->lock, flags); |
722 | ctx->is_active = 1; | ||
700 | update_context_time(ctx); | 723 | update_context_time(ctx); |
701 | 724 | ||
702 | counter->prev_state = counter->state; | 725 | counter->prev_state = counter->state; |
@@ -852,10 +875,10 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, | |||
852 | void perf_counter_task_sched_out(struct task_struct *task, int cpu) | 875 | void perf_counter_task_sched_out(struct task_struct *task, int cpu) |
853 | { | 876 | { |
854 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 877 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
855 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | 878 | struct perf_counter_context *ctx = task->perf_counter_ctxp; |
856 | struct pt_regs *regs; | 879 | struct pt_regs *regs; |
857 | 880 | ||
858 | if (likely(!cpuctx->task_ctx)) | 881 | if (likely(!ctx || !cpuctx->task_ctx)) |
859 | return; | 882 | return; |
860 | 883 | ||
861 | update_context_time(ctx); | 884 | update_context_time(ctx); |
@@ -871,6 +894,8 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) | |||
871 | { | 894 | { |
872 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 895 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
873 | 896 | ||
897 | if (!cpuctx->task_ctx) | ||
898 | return; | ||
874 | __perf_counter_sched_out(ctx, cpuctx); | 899 | __perf_counter_sched_out(ctx, cpuctx); |
875 | cpuctx->task_ctx = NULL; | 900 | cpuctx->task_ctx = NULL; |
876 | } | 901 | } |
@@ -969,8 +994,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, | |||
969 | void perf_counter_task_sched_in(struct task_struct *task, int cpu) | 994 | void perf_counter_task_sched_in(struct task_struct *task, int cpu) |
970 | { | 995 | { |
971 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 996 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
972 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | 997 | struct perf_counter_context *ctx = task->perf_counter_ctxp; |
973 | 998 | ||
999 | if (likely(!ctx)) | ||
1000 | return; | ||
974 | __perf_counter_sched_in(ctx, cpuctx, cpu); | 1001 | __perf_counter_sched_in(ctx, cpuctx, cpu); |
975 | cpuctx->task_ctx = ctx; | 1002 | cpuctx->task_ctx = ctx; |
976 | } | 1003 | } |
@@ -985,11 +1012,11 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | |||
985 | int perf_counter_task_disable(void) | 1012 | int perf_counter_task_disable(void) |
986 | { | 1013 | { |
987 | struct task_struct *curr = current; | 1014 | struct task_struct *curr = current; |
988 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | 1015 | struct perf_counter_context *ctx = curr->perf_counter_ctxp; |
989 | struct perf_counter *counter; | 1016 | struct perf_counter *counter; |
990 | unsigned long flags; | 1017 | unsigned long flags; |
991 | 1018 | ||
992 | if (likely(!ctx->nr_counters)) | 1019 | if (!ctx || !ctx->nr_counters) |
993 | return 0; | 1020 | return 0; |
994 | 1021 | ||
995 | local_irq_save(flags); | 1022 | local_irq_save(flags); |
@@ -1020,12 +1047,12 @@ int perf_counter_task_disable(void) | |||
1020 | int perf_counter_task_enable(void) | 1047 | int perf_counter_task_enable(void) |
1021 | { | 1048 | { |
1022 | struct task_struct *curr = current; | 1049 | struct task_struct *curr = current; |
1023 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | 1050 | struct perf_counter_context *ctx = curr->perf_counter_ctxp; |
1024 | struct perf_counter *counter; | 1051 | struct perf_counter *counter; |
1025 | unsigned long flags; | 1052 | unsigned long flags; |
1026 | int cpu; | 1053 | int cpu; |
1027 | 1054 | ||
1028 | if (likely(!ctx->nr_counters)) | 1055 | if (!ctx || !ctx->nr_counters) |
1029 | return 0; | 1056 | return 0; |
1030 | 1057 | ||
1031 | local_irq_save(flags); | 1058 | local_irq_save(flags); |
@@ -1128,19 +1155,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
1128 | return; | 1155 | return; |
1129 | 1156 | ||
1130 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1157 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
1131 | ctx = &curr->perf_counter_ctx; | 1158 | ctx = curr->perf_counter_ctxp; |
1132 | 1159 | ||
1133 | perf_adjust_freq(&cpuctx->ctx); | 1160 | perf_adjust_freq(&cpuctx->ctx); |
1134 | perf_adjust_freq(ctx); | 1161 | if (ctx) |
1162 | perf_adjust_freq(ctx); | ||
1135 | 1163 | ||
1136 | perf_counter_cpu_sched_out(cpuctx); | 1164 | perf_counter_cpu_sched_out(cpuctx); |
1137 | __perf_counter_task_sched_out(ctx); | 1165 | if (ctx) |
1166 | __perf_counter_task_sched_out(ctx); | ||
1138 | 1167 | ||
1139 | rotate_ctx(&cpuctx->ctx); | 1168 | rotate_ctx(&cpuctx->ctx); |
1140 | rotate_ctx(ctx); | 1169 | if (ctx) |
1170 | rotate_ctx(ctx); | ||
1141 | 1171 | ||
1142 | perf_counter_cpu_sched_in(cpuctx, cpu); | 1172 | perf_counter_cpu_sched_in(cpuctx, cpu); |
1143 | perf_counter_task_sched_in(curr, cpu); | 1173 | if (ctx) |
1174 | perf_counter_task_sched_in(curr, cpu); | ||
1144 | } | 1175 | } |
1145 | 1176 | ||
1146 | /* | 1177 | /* |
@@ -1176,6 +1207,22 @@ static u64 perf_counter_read(struct perf_counter *counter) | |||
1176 | return atomic64_read(&counter->count); | 1207 | return atomic64_read(&counter->count); |
1177 | } | 1208 | } |
1178 | 1209 | ||
1210 | /* | ||
1211 | * Initialize the perf_counter context in a task_struct: | ||
1212 | */ | ||
1213 | static void | ||
1214 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
1215 | struct task_struct *task) | ||
1216 | { | ||
1217 | memset(ctx, 0, sizeof(*ctx)); | ||
1218 | spin_lock_init(&ctx->lock); | ||
1219 | mutex_init(&ctx->mutex); | ||
1220 | INIT_LIST_HEAD(&ctx->counter_list); | ||
1221 | INIT_LIST_HEAD(&ctx->event_list); | ||
1222 | atomic_set(&ctx->refcount, 1); | ||
1223 | ctx->task = task; | ||
1224 | } | ||
1225 | |||
1179 | static void put_context(struct perf_counter_context *ctx) | 1226 | static void put_context(struct perf_counter_context *ctx) |
1180 | { | 1227 | { |
1181 | if (ctx->task) | 1228 | if (ctx->task) |
@@ -1186,6 +1233,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | |||
1186 | { | 1233 | { |
1187 | struct perf_cpu_context *cpuctx; | 1234 | struct perf_cpu_context *cpuctx; |
1188 | struct perf_counter_context *ctx; | 1235 | struct perf_counter_context *ctx; |
1236 | struct perf_counter_context *tctx; | ||
1189 | struct task_struct *task; | 1237 | struct task_struct *task; |
1190 | 1238 | ||
1191 | /* | 1239 | /* |
@@ -1225,15 +1273,36 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | |||
1225 | if (!task) | 1273 | if (!task) |
1226 | return ERR_PTR(-ESRCH); | 1274 | return ERR_PTR(-ESRCH); |
1227 | 1275 | ||
1228 | ctx = &task->perf_counter_ctx; | ||
1229 | ctx->task = task; | ||
1230 | |||
1231 | /* Reuse ptrace permission checks for now. */ | 1276 | /* Reuse ptrace permission checks for now. */ |
1232 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) { | 1277 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) { |
1233 | put_context(ctx); | 1278 | put_task_struct(task); |
1234 | return ERR_PTR(-EACCES); | 1279 | return ERR_PTR(-EACCES); |
1235 | } | 1280 | } |
1236 | 1281 | ||
1282 | ctx = task->perf_counter_ctxp; | ||
1283 | if (!ctx) { | ||
1284 | ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); | ||
1285 | if (!ctx) { | ||
1286 | put_task_struct(task); | ||
1287 | return ERR_PTR(-ENOMEM); | ||
1288 | } | ||
1289 | __perf_counter_init_context(ctx, task); | ||
1290 | /* | ||
1291 | * Make sure other cpus see correct values for *ctx | ||
1292 | * once task->perf_counter_ctxp is visible to them. | ||
1293 | */ | ||
1294 | smp_wmb(); | ||
1295 | tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx); | ||
1296 | if (tctx) { | ||
1297 | /* | ||
1298 | * We raced with some other task; use | ||
1299 | * the context they set. | ||
1300 | */ | ||
1301 | kfree(ctx); | ||
1302 | ctx = tctx; | ||
1303 | } | ||
1304 | } | ||
1305 | |||
1237 | return ctx; | 1306 | return ctx; |
1238 | } | 1307 | } |
1239 | 1308 | ||
@@ -1242,6 +1311,7 @@ static void free_counter_rcu(struct rcu_head *head) | |||
1242 | struct perf_counter *counter; | 1311 | struct perf_counter *counter; |
1243 | 1312 | ||
1244 | counter = container_of(head, struct perf_counter, rcu_head); | 1313 | counter = container_of(head, struct perf_counter, rcu_head); |
1314 | put_ctx(counter->ctx); | ||
1245 | kfree(counter); | 1315 | kfree(counter); |
1246 | } | 1316 | } |
1247 | 1317 | ||
@@ -2247,7 +2317,7 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event) | |||
2247 | perf_counter_comm_ctx(&cpuctx->ctx, comm_event); | 2317 | perf_counter_comm_ctx(&cpuctx->ctx, comm_event); |
2248 | put_cpu_var(perf_cpu_context); | 2318 | put_cpu_var(perf_cpu_context); |
2249 | 2319 | ||
2250 | perf_counter_comm_ctx(¤t->perf_counter_ctx, comm_event); | 2320 | perf_counter_comm_ctx(current->perf_counter_ctxp, comm_event); |
2251 | } | 2321 | } |
2252 | 2322 | ||
2253 | void perf_counter_comm(struct task_struct *task) | 2323 | void perf_counter_comm(struct task_struct *task) |
@@ -2256,7 +2326,9 @@ void perf_counter_comm(struct task_struct *task) | |||
2256 | 2326 | ||
2257 | if (!atomic_read(&nr_comm_tracking)) | 2327 | if (!atomic_read(&nr_comm_tracking)) |
2258 | return; | 2328 | return; |
2259 | 2329 | if (!current->perf_counter_ctxp) | |
2330 | return; | ||
2331 | |||
2260 | comm_event = (struct perf_comm_event){ | 2332 | comm_event = (struct perf_comm_event){ |
2261 | .task = task, | 2333 | .task = task, |
2262 | .event = { | 2334 | .event = { |
@@ -2372,7 +2444,7 @@ got_name: | |||
2372 | perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); | 2444 | perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); |
2373 | put_cpu_var(perf_cpu_context); | 2445 | put_cpu_var(perf_cpu_context); |
2374 | 2446 | ||
2375 | perf_counter_mmap_ctx(¤t->perf_counter_ctx, mmap_event); | 2447 | perf_counter_mmap_ctx(current->perf_counter_ctxp, mmap_event); |
2376 | 2448 | ||
2377 | kfree(buf); | 2449 | kfree(buf); |
2378 | } | 2450 | } |
@@ -2384,6 +2456,8 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, | |||
2384 | 2456 | ||
2385 | if (!atomic_read(&nr_mmap_tracking)) | 2457 | if (!atomic_read(&nr_mmap_tracking)) |
2386 | return; | 2458 | return; |
2459 | if (!current->perf_counter_ctxp) | ||
2460 | return; | ||
2387 | 2461 | ||
2388 | mmap_event = (struct perf_mmap_event){ | 2462 | mmap_event = (struct perf_mmap_event){ |
2389 | .file = file, | 2463 | .file = file, |
@@ -2985,6 +3059,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, | |||
2985 | counter->group_leader = group_leader; | 3059 | counter->group_leader = group_leader; |
2986 | counter->pmu = NULL; | 3060 | counter->pmu = NULL; |
2987 | counter->ctx = ctx; | 3061 | counter->ctx = ctx; |
3062 | get_ctx(ctx); | ||
2988 | 3063 | ||
2989 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 3064 | counter->state = PERF_COUNTER_STATE_INACTIVE; |
2990 | if (hw_event->disabled) | 3065 | if (hw_event->disabled) |
@@ -3150,21 +3225,6 @@ err_put_context: | |||
3150 | } | 3225 | } |
3151 | 3226 | ||
3152 | /* | 3227 | /* |
3153 | * Initialize the perf_counter context in a task_struct: | ||
3154 | */ | ||
3155 | static void | ||
3156 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
3157 | struct task_struct *task) | ||
3158 | { | ||
3159 | memset(ctx, 0, sizeof(*ctx)); | ||
3160 | spin_lock_init(&ctx->lock); | ||
3161 | mutex_init(&ctx->mutex); | ||
3162 | INIT_LIST_HEAD(&ctx->counter_list); | ||
3163 | INIT_LIST_HEAD(&ctx->event_list); | ||
3164 | ctx->task = task; | ||
3165 | } | ||
3166 | |||
3167 | /* | ||
3168 | * inherit a counter from parent task to child task: | 3228 | * inherit a counter from parent task to child task: |
3169 | */ | 3229 | */ |
3170 | static struct perf_counter * | 3230 | static struct perf_counter * |
@@ -3195,7 +3255,6 @@ inherit_counter(struct perf_counter *parent_counter, | |||
3195 | /* | 3255 | /* |
3196 | * Link it up in the child's context: | 3256 | * Link it up in the child's context: |
3197 | */ | 3257 | */ |
3198 | child_counter->task = child; | ||
3199 | add_counter_to_ctx(child_counter, child_ctx); | 3258 | add_counter_to_ctx(child_counter, child_ctx); |
3200 | 3259 | ||
3201 | child_counter->parent = parent_counter; | 3260 | child_counter->parent = parent_counter; |
@@ -3294,40 +3353,15 @@ __perf_counter_exit_task(struct task_struct *child, | |||
3294 | struct perf_counter *parent_counter; | 3353 | struct perf_counter *parent_counter; |
3295 | 3354 | ||
3296 | /* | 3355 | /* |
3297 | * If we do not self-reap then we have to wait for the | 3356 | * Protect against concurrent operations on child_counter |
3298 | * child task to unschedule (it will happen for sure), | 3357 | * due its fd getting closed, etc. |
3299 | * so that its counter is at its final count. (This | ||
3300 | * condition triggers rarely - child tasks usually get | ||
3301 | * off their CPU before the parent has a chance to | ||
3302 | * get this far into the reaping action) | ||
3303 | */ | 3358 | */ |
3304 | if (child != current) { | 3359 | mutex_lock(&child_counter->mutex); |
3305 | wait_task_inactive(child, 0); | ||
3306 | update_counter_times(child_counter); | ||
3307 | list_del_counter(child_counter, child_ctx); | ||
3308 | } else { | ||
3309 | struct perf_cpu_context *cpuctx; | ||
3310 | unsigned long flags; | ||
3311 | |||
3312 | /* | ||
3313 | * Disable and unlink this counter. | ||
3314 | * | ||
3315 | * Be careful about zapping the list - IRQ/NMI context | ||
3316 | * could still be processing it: | ||
3317 | */ | ||
3318 | local_irq_save(flags); | ||
3319 | perf_disable(); | ||
3320 | |||
3321 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3322 | 3360 | ||
3323 | group_sched_out(child_counter, cpuctx, child_ctx); | 3361 | update_counter_times(child_counter); |
3324 | update_counter_times(child_counter); | 3362 | list_del_counter(child_counter, child_ctx); |
3325 | 3363 | ||
3326 | list_del_counter(child_counter, child_ctx); | 3364 | mutex_unlock(&child_counter->mutex); |
3327 | |||
3328 | perf_enable(); | ||
3329 | local_irq_restore(flags); | ||
3330 | } | ||
3331 | 3365 | ||
3332 | parent_counter = child_counter->parent; | 3366 | parent_counter = child_counter->parent; |
3333 | /* | 3367 | /* |
@@ -3346,19 +3380,29 @@ __perf_counter_exit_task(struct task_struct *child, | |||
3346 | * | 3380 | * |
3347 | * Note: we may be running in child context, but the PID is not hashed | 3381 | * Note: we may be running in child context, but the PID is not hashed |
3348 | * anymore so new counters will not be added. | 3382 | * anymore so new counters will not be added. |
3383 | * (XXX not sure that is true when we get called from flush_old_exec. | ||
3384 | * -- paulus) | ||
3349 | */ | 3385 | */ |
3350 | void perf_counter_exit_task(struct task_struct *child) | 3386 | void perf_counter_exit_task(struct task_struct *child) |
3351 | { | 3387 | { |
3352 | struct perf_counter *child_counter, *tmp; | 3388 | struct perf_counter *child_counter, *tmp; |
3353 | struct perf_counter_context *child_ctx; | 3389 | struct perf_counter_context *child_ctx; |
3390 | unsigned long flags; | ||
3354 | 3391 | ||
3355 | WARN_ON_ONCE(child != current); | 3392 | WARN_ON_ONCE(child != current); |
3356 | 3393 | ||
3357 | child_ctx = &child->perf_counter_ctx; | 3394 | child_ctx = child->perf_counter_ctxp; |
3358 | 3395 | ||
3359 | if (likely(!child_ctx->nr_counters)) | 3396 | if (likely(!child_ctx)) |
3360 | return; | 3397 | return; |
3361 | 3398 | ||
3399 | local_irq_save(flags); | ||
3400 | __perf_counter_task_sched_out(child_ctx); | ||
3401 | child->perf_counter_ctxp = NULL; | ||
3402 | local_irq_restore(flags); | ||
3403 | |||
3404 | mutex_lock(&child_ctx->mutex); | ||
3405 | |||
3362 | again: | 3406 | again: |
3363 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, | 3407 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, |
3364 | list_entry) | 3408 | list_entry) |
@@ -3371,6 +3415,10 @@ again: | |||
3371 | */ | 3415 | */ |
3372 | if (!list_empty(&child_ctx->counter_list)) | 3416 | if (!list_empty(&child_ctx->counter_list)) |
3373 | goto again; | 3417 | goto again; |
3418 | |||
3419 | mutex_unlock(&child_ctx->mutex); | ||
3420 | |||
3421 | put_ctx(child_ctx); | ||
3374 | } | 3422 | } |
3375 | 3423 | ||
3376 | /* | 3424 | /* |
@@ -3382,19 +3430,25 @@ void perf_counter_init_task(struct task_struct *child) | |||
3382 | struct perf_counter *counter; | 3430 | struct perf_counter *counter; |
3383 | struct task_struct *parent = current; | 3431 | struct task_struct *parent = current; |
3384 | 3432 | ||
3385 | child_ctx = &child->perf_counter_ctx; | 3433 | child->perf_counter_ctxp = NULL; |
3386 | parent_ctx = &parent->perf_counter_ctx; | ||
3387 | |||
3388 | __perf_counter_init_context(child_ctx, child); | ||
3389 | 3434 | ||
3390 | /* | 3435 | /* |
3391 | * This is executed from the parent task context, so inherit | 3436 | * This is executed from the parent task context, so inherit |
3392 | * counters that have been marked for cloning: | 3437 | * counters that have been marked for cloning. |
3438 | * First allocate and initialize a context for the child. | ||
3393 | */ | 3439 | */ |
3394 | 3440 | ||
3395 | if (likely(!parent_ctx->nr_counters)) | 3441 | child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); |
3442 | if (!child_ctx) | ||
3443 | return; | ||
3444 | |||
3445 | parent_ctx = parent->perf_counter_ctxp; | ||
3446 | if (likely(!parent_ctx || !parent_ctx->nr_counters)) | ||
3396 | return; | 3447 | return; |
3397 | 3448 | ||
3449 | __perf_counter_init_context(child_ctx, child); | ||
3450 | child->perf_counter_ctxp = child_ctx; | ||
3451 | |||
3398 | /* | 3452 | /* |
3399 | * Lock the parent list. No need to lock the child - not PID | 3453 | * Lock the parent list. No need to lock the child - not PID |
3400 | * hashed yet and not running, so nobody can access it. | 3454 | * hashed yet and not running, so nobody can access it. |