diff options
Diffstat (limited to 'kernel/perf_event.c')
| -rw-r--r-- | kernel/perf_event.c | 130 |
1 files changed, 107 insertions, 23 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index cb6c0d2af68f..2870feee81dd 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
| 32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
| 33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
| 34 | #include <linux/hw_breakpoint.h> | ||
| 34 | 35 | ||
| 35 | #include <asm/irq_regs.h> | 36 | #include <asm/irq_regs.h> |
| 36 | 37 | ||
| @@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
| 1286 | { | 1287 | { |
| 1287 | int ctxn; | 1288 | int ctxn; |
| 1288 | 1289 | ||
| 1289 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
| 1290 | |||
| 1291 | for_each_task_context_nr(ctxn) | 1290 | for_each_task_context_nr(ctxn) |
| 1292 | perf_event_context_sched_out(task, ctxn, next); | 1291 | perf_event_context_sched_out(task, ctxn, next); |
| 1293 | } | 1292 | } |
| @@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
| 1621 | { | 1620 | { |
| 1622 | raw_spin_lock(&ctx->lock); | 1621 | raw_spin_lock(&ctx->lock); |
| 1623 | 1622 | ||
| 1624 | /* Rotate the first entry last of non-pinned groups */ | 1623 | /* |
| 1625 | list_rotate_left(&ctx->flexible_groups); | 1624 | * Rotate the first entry last of non-pinned groups. Rotation might be |
| 1625 | * disabled by the inheritance code. | ||
| 1626 | */ | ||
| 1627 | if (!ctx->rotate_disable) | ||
| 1628 | list_rotate_left(&ctx->flexible_groups); | ||
| 1626 | 1629 | ||
| 1627 | raw_spin_unlock(&ctx->lock); | 1630 | raw_spin_unlock(&ctx->lock); |
| 1628 | } | 1631 | } |
| @@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
| 2234 | raw_spin_unlock_irq(&ctx->lock); | 2237 | raw_spin_unlock_irq(&ctx->lock); |
| 2235 | mutex_unlock(&ctx->mutex); | 2238 | mutex_unlock(&ctx->mutex); |
| 2236 | 2239 | ||
| 2237 | mutex_lock(&event->owner->perf_event_mutex); | ||
| 2238 | list_del_init(&event->owner_entry); | ||
| 2239 | mutex_unlock(&event->owner->perf_event_mutex); | ||
| 2240 | put_task_struct(event->owner); | ||
| 2241 | |||
| 2242 | free_event(event); | 2240 | free_event(event); |
| 2243 | 2241 | ||
| 2244 | return 0; | 2242 | return 0; |
| @@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
| 2251 | static int perf_release(struct inode *inode, struct file *file) | 2249 | static int perf_release(struct inode *inode, struct file *file) |
| 2252 | { | 2250 | { |
| 2253 | struct perf_event *event = file->private_data; | 2251 | struct perf_event *event = file->private_data; |
| 2252 | struct task_struct *owner; | ||
| 2254 | 2253 | ||
| 2255 | file->private_data = NULL; | 2254 | file->private_data = NULL; |
| 2256 | 2255 | ||
| 2256 | rcu_read_lock(); | ||
| 2257 | owner = ACCESS_ONCE(event->owner); | ||
| 2258 | /* | ||
| 2259 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe | ||
| 2260 | * !owner it means the list deletion is complete and we can indeed | ||
| 2261 | * free this event, otherwise we need to serialize on | ||
| 2262 | * owner->perf_event_mutex. | ||
| 2263 | */ | ||
| 2264 | smp_read_barrier_depends(); | ||
| 2265 | if (owner) { | ||
| 2266 | /* | ||
| 2267 | * Since delayed_put_task_struct() also drops the last | ||
| 2268 | * task reference we can safely take a new reference | ||
| 2269 | * while holding the rcu_read_lock(). | ||
| 2270 | */ | ||
| 2271 | get_task_struct(owner); | ||
| 2272 | } | ||
| 2273 | rcu_read_unlock(); | ||
| 2274 | |||
| 2275 | if (owner) { | ||
| 2276 | mutex_lock(&owner->perf_event_mutex); | ||
| 2277 | /* | ||
| 2278 | * We have to re-check the event->owner field, if it is cleared | ||
| 2279 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
| 2280 | * ensured they're done, and we can proceed with freeing the | ||
| 2281 | * event. | ||
| 2282 | */ | ||
| 2283 | if (event->owner) | ||
| 2284 | list_del_init(&event->owner_entry); | ||
| 2285 | mutex_unlock(&owner->perf_event_mutex); | ||
| 2286 | put_task_struct(owner); | ||
| 2287 | } | ||
| 2288 | |||
| 2257 | return perf_event_release_kernel(event); | 2289 | return perf_event_release_kernel(event); |
| 2258 | } | 2290 | } |
| 2259 | 2291 | ||
| @@ -3792,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
| 3792 | rcu_read_lock(); | 3824 | rcu_read_lock(); |
| 3793 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3825 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 3794 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3826 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 3827 | if (cpuctx->active_pmu != pmu) | ||
| 3828 | goto next; | ||
| 3795 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3829 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
| 3796 | 3830 | ||
| 3797 | ctx = task_event->task_ctx; | 3831 | ctx = task_event->task_ctx; |
| @@ -3927,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 3927 | rcu_read_lock(); | 3961 | rcu_read_lock(); |
| 3928 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3962 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 3929 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3963 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 3964 | if (cpuctx->active_pmu != pmu) | ||
| 3965 | goto next; | ||
| 3930 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3966 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
| 3931 | 3967 | ||
| 3932 | ctxn = pmu->task_ctx_nr; | 3968 | ctxn = pmu->task_ctx_nr; |
| @@ -4112,6 +4148,8 @@ got_name: | |||
| 4112 | rcu_read_lock(); | 4148 | rcu_read_lock(); |
| 4113 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4149 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 4114 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4150 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 4151 | if (cpuctx->active_pmu != pmu) | ||
| 4152 | goto next; | ||
| 4115 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4153 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
| 4116 | vma->vm_flags & VM_EXEC); | 4154 | vma->vm_flags & VM_EXEC); |
| 4117 | 4155 | ||
| @@ -4681,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event) | |||
| 4681 | break; | 4719 | break; |
| 4682 | } | 4720 | } |
| 4683 | 4721 | ||
| 4684 | if (event_id > PERF_COUNT_SW_MAX) | 4722 | if (event_id >= PERF_COUNT_SW_MAX) |
| 4685 | return -ENOENT; | 4723 | return -ENOENT; |
| 4686 | 4724 | ||
| 4687 | if (!event->parent) { | 4725 | if (!event->parent) { |
| @@ -5113,20 +5151,36 @@ static void *find_pmu_context(int ctxn) | |||
| 5113 | return NULL; | 5151 | return NULL; |
| 5114 | } | 5152 | } |
| 5115 | 5153 | ||
| 5116 | static void free_pmu_context(void * __percpu cpu_context) | 5154 | static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) |
| 5117 | { | 5155 | { |
| 5118 | struct pmu *pmu; | 5156 | int cpu; |
| 5157 | |||
| 5158 | for_each_possible_cpu(cpu) { | ||
| 5159 | struct perf_cpu_context *cpuctx; | ||
| 5160 | |||
| 5161 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
| 5162 | |||
| 5163 | if (cpuctx->active_pmu == old_pmu) | ||
| 5164 | cpuctx->active_pmu = pmu; | ||
| 5165 | } | ||
| 5166 | } | ||
| 5167 | |||
| 5168 | static void free_pmu_context(struct pmu *pmu) | ||
| 5169 | { | ||
| 5170 | struct pmu *i; | ||
| 5119 | 5171 | ||
| 5120 | mutex_lock(&pmus_lock); | 5172 | mutex_lock(&pmus_lock); |
| 5121 | /* | 5173 | /* |
| 5122 | * Like a real lame refcount. | 5174 | * Like a real lame refcount. |
| 5123 | */ | 5175 | */ |
| 5124 | list_for_each_entry(pmu, &pmus, entry) { | 5176 | list_for_each_entry(i, &pmus, entry) { |
| 5125 | if (pmu->pmu_cpu_context == cpu_context) | 5177 | if (i->pmu_cpu_context == pmu->pmu_cpu_context) { |
| 5178 | update_pmu_context(i, pmu); | ||
| 5126 | goto out; | 5179 | goto out; |
| 5180 | } | ||
| 5127 | } | 5181 | } |
| 5128 | 5182 | ||
| 5129 | free_percpu(cpu_context); | 5183 | free_percpu(pmu->pmu_cpu_context); |
| 5130 | out: | 5184 | out: |
| 5131 | mutex_unlock(&pmus_lock); | 5185 | mutex_unlock(&pmus_lock); |
| 5132 | } | 5186 | } |
| @@ -5158,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu) | |||
| 5158 | cpuctx->ctx.pmu = pmu; | 5212 | cpuctx->ctx.pmu = pmu; |
| 5159 | cpuctx->jiffies_interval = 1; | 5213 | cpuctx->jiffies_interval = 1; |
| 5160 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5214 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
| 5215 | cpuctx->active_pmu = pmu; | ||
| 5161 | } | 5216 | } |
| 5162 | 5217 | ||
| 5163 | got_cpu_context: | 5218 | got_cpu_context: |
| @@ -5209,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
| 5209 | synchronize_rcu(); | 5264 | synchronize_rcu(); |
| 5210 | 5265 | ||
| 5211 | free_percpu(pmu->pmu_disable_count); | 5266 | free_percpu(pmu->pmu_disable_count); |
| 5212 | free_pmu_context(pmu->pmu_cpu_context); | 5267 | free_pmu_context(pmu); |
| 5213 | } | 5268 | } |
| 5214 | 5269 | ||
| 5215 | struct pmu *perf_init_event(struct perf_event *event) | 5270 | struct pmu *perf_init_event(struct perf_event *event) |
| @@ -5677,7 +5732,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5677 | mutex_unlock(&ctx->mutex); | 5732 | mutex_unlock(&ctx->mutex); |
| 5678 | 5733 | ||
| 5679 | event->owner = current; | 5734 | event->owner = current; |
| 5680 | get_task_struct(current); | 5735 | |
| 5681 | mutex_lock(¤t->perf_event_mutex); | 5736 | mutex_lock(¤t->perf_event_mutex); |
| 5682 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5737 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
| 5683 | mutex_unlock(¤t->perf_event_mutex); | 5738 | mutex_unlock(¤t->perf_event_mutex); |
| @@ -5745,12 +5800,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
| 5745 | ++ctx->generation; | 5800 | ++ctx->generation; |
| 5746 | mutex_unlock(&ctx->mutex); | 5801 | mutex_unlock(&ctx->mutex); |
| 5747 | 5802 | ||
| 5748 | event->owner = current; | ||
| 5749 | get_task_struct(current); | ||
| 5750 | mutex_lock(¤t->perf_event_mutex); | ||
| 5751 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
| 5752 | mutex_unlock(¤t->perf_event_mutex); | ||
| 5753 | |||
| 5754 | return event; | 5803 | return event; |
| 5755 | 5804 | ||
| 5756 | err_free: | 5805 | err_free: |
| @@ -5901,8 +5950,24 @@ again: | |||
| 5901 | */ | 5950 | */ |
| 5902 | void perf_event_exit_task(struct task_struct *child) | 5951 | void perf_event_exit_task(struct task_struct *child) |
| 5903 | { | 5952 | { |
| 5953 | struct perf_event *event, *tmp; | ||
| 5904 | int ctxn; | 5954 | int ctxn; |
| 5905 | 5955 | ||
| 5956 | mutex_lock(&child->perf_event_mutex); | ||
| 5957 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
| 5958 | owner_entry) { | ||
| 5959 | list_del_init(&event->owner_entry); | ||
| 5960 | |||
| 5961 | /* | ||
| 5962 | * Ensure the list deletion is visible before we clear | ||
| 5963 | * the owner, closes a race against perf_release() where | ||
| 5964 | * we need to serialize on the owner->perf_event_mutex. | ||
| 5965 | */ | ||
| 5966 | smp_wmb(); | ||
| 5967 | event->owner = NULL; | ||
| 5968 | } | ||
| 5969 | mutex_unlock(&child->perf_event_mutex); | ||
| 5970 | |||
| 5906 | for_each_task_context_nr(ctxn) | 5971 | for_each_task_context_nr(ctxn) |
| 5907 | perf_event_exit_task_context(child, ctxn); | 5972 | perf_event_exit_task_context(child, ctxn); |
| 5908 | } | 5973 | } |
| @@ -6122,6 +6187,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6122 | struct perf_event *event; | 6187 | struct perf_event *event; |
| 6123 | struct task_struct *parent = current; | 6188 | struct task_struct *parent = current; |
| 6124 | int inherited_all = 1; | 6189 | int inherited_all = 1; |
| 6190 | unsigned long flags; | ||
| 6125 | int ret = 0; | 6191 | int ret = 0; |
| 6126 | 6192 | ||
| 6127 | child->perf_event_ctxp[ctxn] = NULL; | 6193 | child->perf_event_ctxp[ctxn] = NULL; |
| @@ -6162,6 +6228,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6162 | break; | 6228 | break; |
| 6163 | } | 6229 | } |
| 6164 | 6230 | ||
| 6231 | /* | ||
| 6232 | * We can't hold ctx->lock when iterating the ->flexible_group list due | ||
| 6233 | * to allocations, but we need to prevent rotation because | ||
| 6234 | * rotate_ctx() will change the list from interrupt context. | ||
| 6235 | */ | ||
| 6236 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
| 6237 | parent_ctx->rotate_disable = 1; | ||
| 6238 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
| 6239 | |||
| 6165 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6240 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
| 6166 | ret = inherit_task_group(event, parent, parent_ctx, | 6241 | ret = inherit_task_group(event, parent, parent_ctx, |
| 6167 | child, ctxn, &inherited_all); | 6242 | child, ctxn, &inherited_all); |
| @@ -6169,6 +6244,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6169 | break; | 6244 | break; |
| 6170 | } | 6245 | } |
| 6171 | 6246 | ||
| 6247 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
| 6248 | parent_ctx->rotate_disable = 0; | ||
| 6249 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
| 6250 | |||
| 6172 | child_ctx = child->perf_event_ctxp[ctxn]; | 6251 | child_ctx = child->perf_event_ctxp[ctxn]; |
| 6173 | 6252 | ||
| 6174 | if (child_ctx && inherited_all) { | 6253 | if (child_ctx && inherited_all) { |
| @@ -6321,6 +6400,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
| 6321 | 6400 | ||
| 6322 | void __init perf_event_init(void) | 6401 | void __init perf_event_init(void) |
| 6323 | { | 6402 | { |
| 6403 | int ret; | ||
| 6404 | |||
| 6324 | perf_event_init_all_cpus(); | 6405 | perf_event_init_all_cpus(); |
| 6325 | init_srcu_struct(&pmus_srcu); | 6406 | init_srcu_struct(&pmus_srcu); |
| 6326 | perf_pmu_register(&perf_swevent); | 6407 | perf_pmu_register(&perf_swevent); |
| @@ -6328,4 +6409,7 @@ void __init perf_event_init(void) | |||
| 6328 | perf_pmu_register(&perf_task_clock); | 6409 | perf_pmu_register(&perf_task_clock); |
| 6329 | perf_tp_register(); | 6410 | perf_tp_register(); |
| 6330 | perf_cpu_notifier(perf_cpu_notify); | 6411 | perf_cpu_notifier(perf_cpu_notify); |
| 6412 | |||
| 6413 | ret = init_hw_breakpoint(); | ||
| 6414 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | ||
| 6331 | } | 6415 | } |
