aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c20
-rw-r--r--include/linux/perf_event.h9
-rw-r--r--kernel/bpf/arraymap.c21
-rw-r--r--kernel/events/core.c1199
-rw-r--r--kernel/events/hw_breakpoint.c2
-rw-r--r--kernel/events/ring_buffer.c40
-rw-r--r--kernel/trace/bpf_trace.c14
-rw-r--r--tools/perf/Makefile.perf25
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c2
-rw-r--r--tools/perf/config/Makefile4
-rw-r--r--tools/perf/tests/make55
-rw-r--r--tools/perf/ui/browsers/annotate.c4
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/perf/util/stat.c1
-rw-r--r--tools/perf/util/symbol.c2
19 files changed, 746 insertions, 667 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a667078a5180..fed2ab1f1065 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
1960 1960
1961static int intel_alt_er(int idx, u64 config) 1961static int intel_alt_er(int idx, u64 config)
1962{ 1962{
1963 int alt_idx; 1963 int alt_idx = idx;
1964
1964 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) 1965 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
1965 return idx; 1966 return idx;
1966 1967
@@ -2897,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
2897 return; 2898 return;
2898 2899
2899 if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { 2900 if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
2900 void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
2901
2902 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 2901 for_each_cpu(i, topology_sibling_cpumask(cpu)) {
2903 struct intel_shared_regs *pc; 2902 struct intel_shared_regs *pc;
2904 2903
2905 pc = per_cpu(cpu_hw_events, i).shared_regs; 2904 pc = per_cpu(cpu_hw_events, i).shared_regs;
2906 if (pc && pc->core_id == core_id) { 2905 if (pc && pc->core_id == core_id) {
2907 *onln = cpuc->shared_regs; 2906 cpuc->kfree_on_online[0] = cpuc->shared_regs;
2908 cpuc->shared_regs = pc; 2907 cpuc->shared_regs = pc;
2909 break; 2908 break;
2910 } 2909 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index f97f8075bf04..3bf41d413775 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
995 case 87: /* Knights Landing */ 995 case 87: /* Knights Landing */
996 ret = knl_uncore_pci_init(); 996 ret = knl_uncore_pci_init();
997 break; 997 break;
998 case 94: /* SkyLake */
999 ret = skl_uncore_pci_init();
1000 break;
998 default: 1001 default:
999 return 0; 1002 return 0;
1000 } 1003 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 07aa2d6bd710..a7086b862156 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
336int ivb_uncore_pci_init(void); 336int ivb_uncore_pci_init(void);
337int hsw_uncore_pci_init(void); 337int hsw_uncore_pci_init(void);
338int bdw_uncore_pci_init(void); 338int bdw_uncore_pci_init(void);
339int skl_uncore_pci_init(void);
339void snb_uncore_cpu_init(void); 340void snb_uncore_cpu_init(void);
340void nhm_uncore_cpu_init(void); 341void nhm_uncore_cpu_init(void);
341int snb_pci2phy_map_init(int devid); 342int snb_pci2phy_map_init(int devid);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 0b934820fafd..2bd030ddd0db 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -8,6 +8,7 @@
8#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 8#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
9#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 9#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
10#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 10#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
11#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
11 12
12/* SNB event control */ 13/* SNB event control */
13#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff 14#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
524 { /* end: all zeroes */ }, 525 { /* end: all zeroes */ },
525}; 526};
526 527
528static const struct pci_device_id skl_uncore_pci_ids[] = {
529 { /* IMC */
530 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
531 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
532 },
533 { /* end: all zeroes */ },
534};
535
527static struct pci_driver snb_uncore_pci_driver = { 536static struct pci_driver snb_uncore_pci_driver = {
528 .name = "snb_uncore", 537 .name = "snb_uncore",
529 .id_table = snb_uncore_pci_ids, 538 .id_table = snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
544 .id_table = bdw_uncore_pci_ids, 553 .id_table = bdw_uncore_pci_ids,
545}; 554};
546 555
556static struct pci_driver skl_uncore_pci_driver = {
557 .name = "skl_uncore",
558 .id_table = skl_uncore_pci_ids,
559};
560
547struct imc_uncore_pci_dev { 561struct imc_uncore_pci_dev {
548 __u32 pci_id; 562 __u32 pci_id;
549 struct pci_driver *driver; 563 struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
558 IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ 572 IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
559 IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ 573 IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
560 IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ 574 IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
575 IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
561 { /* end marker */ } 576 { /* end marker */ }
562}; 577};
563 578
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
610 return imc_uncore_pci_init(); 625 return imc_uncore_pci_init();
611} 626}
612 627
628int skl_uncore_pci_init(void)
629{
630 return imc_uncore_pci_init();
631}
632
613/* end of Sandy Bridge uncore support */ 633/* end of Sandy Bridge uncore support */
614 634
615/* Nehalem uncore support */ 635/* Nehalem uncore support */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f9828a48f16a..b35a61a481fa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -634,9 +634,6 @@ struct perf_event_context {
634 int nr_cgroups; /* cgroup evts */ 634 int nr_cgroups; /* cgroup evts */
635 void *task_ctx_data; /* pmu specific data */ 635 void *task_ctx_data; /* pmu specific data */
636 struct rcu_head rcu_head; 636 struct rcu_head rcu_head;
637
638 struct delayed_work orphans_remove;
639 bool orphans_remove_sched;
640}; 637};
641 638
642/* 639/*
@@ -729,7 +726,7 @@ extern int perf_event_init_task(struct task_struct *child);
729extern void perf_event_exit_task(struct task_struct *child); 726extern void perf_event_exit_task(struct task_struct *child);
730extern void perf_event_free_task(struct task_struct *task); 727extern void perf_event_free_task(struct task_struct *task);
731extern void perf_event_delayed_put(struct task_struct *task); 728extern void perf_event_delayed_put(struct task_struct *task);
732extern struct perf_event *perf_event_get(unsigned int fd); 729extern struct file *perf_event_get(unsigned int fd);
733extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); 730extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
734extern void perf_event_print_debug(void); 731extern void perf_event_print_debug(void);
735extern void perf_pmu_disable(struct pmu *pmu); 732extern void perf_pmu_disable(struct pmu *pmu);
@@ -1044,7 +1041,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
1044extern u64 perf_swevent_set_period(struct perf_event *event); 1041extern u64 perf_swevent_set_period(struct perf_event *event);
1045extern void perf_event_enable(struct perf_event *event); 1042extern void perf_event_enable(struct perf_event *event);
1046extern void perf_event_disable(struct perf_event *event); 1043extern void perf_event_disable(struct perf_event *event);
1047extern int __perf_event_disable(void *info); 1044extern void perf_event_disable_local(struct perf_event *event);
1048extern void perf_event_task_tick(void); 1045extern void perf_event_task_tick(void);
1049#else /* !CONFIG_PERF_EVENTS: */ 1046#else /* !CONFIG_PERF_EVENTS: */
1050static inline void * 1047static inline void *
@@ -1070,7 +1067,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
1070static inline void perf_event_exit_task(struct task_struct *child) { } 1067static inline void perf_event_exit_task(struct task_struct *child) { }
1071static inline void perf_event_free_task(struct task_struct *task) { } 1068static inline void perf_event_free_task(struct task_struct *task) { }
1072static inline void perf_event_delayed_put(struct task_struct *task) { } 1069static inline void perf_event_delayed_put(struct task_struct *task) { }
1073static inline struct perf_event *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } 1070static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
1074static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) 1071static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
1075{ 1072{
1076 return ERR_PTR(-EINVAL); 1073 return ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b0799bced518..89ebbc4d1164 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
291{ 291{
292 struct perf_event *event; 292 struct perf_event *event;
293 const struct perf_event_attr *attr; 293 const struct perf_event_attr *attr;
294 struct file *file;
294 295
295 event = perf_event_get(fd); 296 file = perf_event_get(fd);
296 if (IS_ERR(event)) 297 if (IS_ERR(file))
297 return event; 298 return file;
299
300 event = file->private_data;
298 301
299 attr = perf_event_attrs(event); 302 attr = perf_event_attrs(event);
300 if (IS_ERR(attr)) 303 if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
304 goto err; 307 goto err;
305 308
306 if (attr->type == PERF_TYPE_RAW) 309 if (attr->type == PERF_TYPE_RAW)
307 return event; 310 return file;
308 311
309 if (attr->type == PERF_TYPE_HARDWARE) 312 if (attr->type == PERF_TYPE_HARDWARE)
310 return event; 313 return file;
311 314
312 if (attr->type == PERF_TYPE_SOFTWARE && 315 if (attr->type == PERF_TYPE_SOFTWARE &&
313 attr->config == PERF_COUNT_SW_BPF_OUTPUT) 316 attr->config == PERF_COUNT_SW_BPF_OUTPUT)
314 return event; 317 return file;
315err: 318err:
316 perf_event_release_kernel(event); 319 fput(file);
317 return ERR_PTR(-EINVAL); 320 return ERR_PTR(-EINVAL);
318} 321}
319 322
320static void perf_event_fd_array_put_ptr(void *ptr) 323static void perf_event_fd_array_put_ptr(void *ptr)
321{ 324{
322 struct perf_event *event = ptr; 325 fput((struct file *)ptr);
323
324 perf_event_release_kernel(event);
325} 326}
326 327
327static const struct bpf_map_ops perf_event_array_ops = { 328static const struct bpf_map_ops perf_event_array_ops = {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 06ae52e99ac2..5946460b2425 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,8 +49,6 @@
49 49
50#include <asm/irq_regs.h> 50#include <asm/irq_regs.h>
51 51
52static struct workqueue_struct *perf_wq;
53
54typedef int (*remote_function_f)(void *); 52typedef int (*remote_function_f)(void *);
55 53
56struct remote_function_call { 54struct remote_function_call {
@@ -126,44 +124,181 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
126 return data.ret; 124 return data.ret;
127} 125}
128 126
129static void event_function_call(struct perf_event *event, 127static inline struct perf_cpu_context *
130 int (*active)(void *), 128__get_cpu_context(struct perf_event_context *ctx)
131 void (*inactive)(void *), 129{
132 void *data) 130 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
131}
132
133static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
134 struct perf_event_context *ctx)
135{
136 raw_spin_lock(&cpuctx->ctx.lock);
137 if (ctx)
138 raw_spin_lock(&ctx->lock);
139}
140
141static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
142 struct perf_event_context *ctx)
143{
144 if (ctx)
145 raw_spin_unlock(&ctx->lock);
146 raw_spin_unlock(&cpuctx->ctx.lock);
147}
148
149#define TASK_TOMBSTONE ((void *)-1L)
150
151static bool is_kernel_event(struct perf_event *event)
152{
153 return READ_ONCE(event->owner) == TASK_TOMBSTONE;
154}
155
156/*
157 * On task ctx scheduling...
158 *
159 * When !ctx->nr_events a task context will not be scheduled. This means
160 * we can disable the scheduler hooks (for performance) without leaving
161 * pending task ctx state.
162 *
163 * This however results in two special cases:
164 *
165 * - removing the last event from a task ctx; this is relatively straight
166 * forward and is done in __perf_remove_from_context.
167 *
168 * - adding the first event to a task ctx; this is tricky because we cannot
169 * rely on ctx->is_active and therefore cannot use event_function_call().
170 * See perf_install_in_context().
171 *
172 * This is because we need a ctx->lock serialized variable (ctx->is_active)
173 * to reliably determine if a particular task/context is scheduled in. The
174 * task_curr() use in task_function_call() is racy in that a remote context
175 * switch is not a single atomic operation.
176 *
177 * As is, the situation is 'safe' because we set rq->curr before we do the
178 * actual context switch. This means that task_curr() will fail early, but
179 * we'll continue spinning on ctx->is_active until we've passed
180 * perf_event_task_sched_out().
181 *
182 * Without this ctx->lock serialized variable we could have race where we find
183 * the task (and hence the context) would not be active while in fact they are.
184 *
185 * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
186 */
187
188typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
189 struct perf_event_context *, void *);
190
191struct event_function_struct {
192 struct perf_event *event;
193 event_f func;
194 void *data;
195};
196
197static int event_function(void *info)
198{
199 struct event_function_struct *efs = info;
200 struct perf_event *event = efs->event;
201 struct perf_event_context *ctx = event->ctx;
202 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
203 struct perf_event_context *task_ctx = cpuctx->task_ctx;
204 int ret = 0;
205
206 WARN_ON_ONCE(!irqs_disabled());
207
208 perf_ctx_lock(cpuctx, task_ctx);
209 /*
210 * Since we do the IPI call without holding ctx->lock things can have
211 * changed, double check we hit the task we set out to hit.
212 */
213 if (ctx->task) {
214 if (ctx->task != current) {
215 ret = -EAGAIN;
216 goto unlock;
217 }
218
219 /*
220 * We only use event_function_call() on established contexts,
221 * and event_function() is only ever called when active (or
222 * rather, we'll have bailed in task_function_call() or the
223 * above ctx->task != current test), therefore we must have
224 * ctx->is_active here.
225 */
226 WARN_ON_ONCE(!ctx->is_active);
227 /*
228 * And since we have ctx->is_active, cpuctx->task_ctx must
229 * match.
230 */
231 WARN_ON_ONCE(task_ctx != ctx);
232 } else {
233 WARN_ON_ONCE(&cpuctx->ctx != ctx);
234 }
235
236 efs->func(event, cpuctx, ctx, efs->data);
237unlock:
238 perf_ctx_unlock(cpuctx, task_ctx);
239
240 return ret;
241}
242
243static void event_function_local(struct perf_event *event, event_f func, void *data)
244{
245 struct event_function_struct efs = {
246 .event = event,
247 .func = func,
248 .data = data,
249 };
250
251 int ret = event_function(&efs);
252 WARN_ON_ONCE(ret);
253}
254
255static void event_function_call(struct perf_event *event, event_f func, void *data)
133{ 256{
134 struct perf_event_context *ctx = event->ctx; 257 struct perf_event_context *ctx = event->ctx;
135 struct task_struct *task = ctx->task; 258 struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
259 struct event_function_struct efs = {
260 .event = event,
261 .func = func,
262 .data = data,
263 };
264
265 if (!event->parent) {
266 /*
267 * If this is a !child event, we must hold ctx::mutex to
268 * stabilize the the event->ctx relation. See
269 * perf_event_ctx_lock().
270 */
271 lockdep_assert_held(&ctx->mutex);
272 }
136 273
137 if (!task) { 274 if (!task) {
138 cpu_function_call(event->cpu, active, data); 275 cpu_function_call(event->cpu, event_function, &efs);
139 return; 276 return;
140 } 277 }
141 278
142again: 279again:
143 if (!task_function_call(task, active, data)) 280 if (task == TASK_TOMBSTONE)
281 return;
282
283 if (!task_function_call(task, event_function, &efs))
144 return; 284 return;
145 285
146 raw_spin_lock_irq(&ctx->lock); 286 raw_spin_lock_irq(&ctx->lock);
147 if (ctx->is_active) { 287 /*
148 /* 288 * Reload the task pointer, it might have been changed by
149 * Reload the task pointer, it might have been changed by 289 * a concurrent perf_event_context_sched_out().
150 * a concurrent perf_event_context_sched_out(). 290 */
151 */ 291 task = ctx->task;
152 task = ctx->task; 292 if (task != TASK_TOMBSTONE) {
153 raw_spin_unlock_irq(&ctx->lock); 293 if (ctx->is_active) {
154 goto again; 294 raw_spin_unlock_irq(&ctx->lock);
295 goto again;
296 }
297 func(event, NULL, ctx, data);
155 } 298 }
156 inactive(data);
157 raw_spin_unlock_irq(&ctx->lock); 299 raw_spin_unlock_irq(&ctx->lock);
158} 300}
159 301
160#define EVENT_OWNER_KERNEL ((void *) -1)
161
162static bool is_kernel_event(struct perf_event *event)
163{
164 return event->owner == EVENT_OWNER_KERNEL;
165}
166
167#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ 302#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
168 PERF_FLAG_FD_OUTPUT |\ 303 PERF_FLAG_FD_OUTPUT |\
169 PERF_FLAG_PID_CGROUP |\ 304 PERF_FLAG_PID_CGROUP |\
@@ -368,28 +503,6 @@ static inline u64 perf_event_clock(struct perf_event *event)
368 return event->clock(); 503 return event->clock();
369} 504}
370 505
371static inline struct perf_cpu_context *
372__get_cpu_context(struct perf_event_context *ctx)
373{
374 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
375}
376
377static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
378 struct perf_event_context *ctx)
379{
380 raw_spin_lock(&cpuctx->ctx.lock);
381 if (ctx)
382 raw_spin_lock(&ctx->lock);
383}
384
385static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
386 struct perf_event_context *ctx)
387{
388 if (ctx)
389 raw_spin_unlock(&ctx->lock);
390 raw_spin_unlock(&cpuctx->ctx.lock);
391}
392
393#ifdef CONFIG_CGROUP_PERF 506#ifdef CONFIG_CGROUP_PERF
394 507
395static inline bool 508static inline bool
@@ -579,13 +692,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
579 * we are holding the rcu lock 692 * we are holding the rcu lock
580 */ 693 */
581 cgrp1 = perf_cgroup_from_task(task, NULL); 694 cgrp1 = perf_cgroup_from_task(task, NULL);
582 695 cgrp2 = perf_cgroup_from_task(next, NULL);
583 /*
584 * next is NULL when called from perf_event_enable_on_exec()
585 * that will systematically cause a cgroup_switch()
586 */
587 if (next)
588 cgrp2 = perf_cgroup_from_task(next, NULL);
589 696
590 /* 697 /*
591 * only schedule out current cgroup events if we know 698 * only schedule out current cgroup events if we know
@@ -611,8 +718,6 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
611 * we are holding the rcu lock 718 * we are holding the rcu lock
612 */ 719 */
613 cgrp1 = perf_cgroup_from_task(task, NULL); 720 cgrp1 = perf_cgroup_from_task(task, NULL);
614
615 /* prev can never be NULL */
616 cgrp2 = perf_cgroup_from_task(prev, NULL); 721 cgrp2 = perf_cgroup_from_task(prev, NULL);
617 722
618 /* 723 /*
@@ -917,7 +1022,7 @@ static void put_ctx(struct perf_event_context *ctx)
917 if (atomic_dec_and_test(&ctx->refcount)) { 1022 if (atomic_dec_and_test(&ctx->refcount)) {
918 if (ctx->parent_ctx) 1023 if (ctx->parent_ctx)
919 put_ctx(ctx->parent_ctx); 1024 put_ctx(ctx->parent_ctx);
920 if (ctx->task) 1025 if (ctx->task && ctx->task != TASK_TOMBSTONE)
921 put_task_struct(ctx->task); 1026 put_task_struct(ctx->task);
922 call_rcu(&ctx->rcu_head, free_ctx); 1027 call_rcu(&ctx->rcu_head, free_ctx);
923 } 1028 }
@@ -934,9 +1039,8 @@ static void put_ctx(struct perf_event_context *ctx)
934 * perf_event_context::mutex nests and those are: 1039 * perf_event_context::mutex nests and those are:
935 * 1040 *
936 * - perf_event_exit_task_context() [ child , 0 ] 1041 * - perf_event_exit_task_context() [ child , 0 ]
937 * __perf_event_exit_task() 1042 * perf_event_exit_event()
938 * sync_child_event() 1043 * put_event() [ parent, 1 ]
939 * put_event() [ parent, 1 ]
940 * 1044 *
941 * - perf_event_init_context() [ parent, 0 ] 1045 * - perf_event_init_context() [ parent, 0 ]
942 * inherit_task_group() 1046 * inherit_task_group()
@@ -979,8 +1083,8 @@ static void put_ctx(struct perf_event_context *ctx)
979 * Lock order: 1083 * Lock order:
980 * task_struct::perf_event_mutex 1084 * task_struct::perf_event_mutex
981 * perf_event_context::mutex 1085 * perf_event_context::mutex
982 * perf_event_context::lock
983 * perf_event::child_mutex; 1086 * perf_event::child_mutex;
1087 * perf_event_context::lock
984 * perf_event::mmap_mutex 1088 * perf_event::mmap_mutex
985 * mmap_sem 1089 * mmap_sem
986 */ 1090 */
@@ -1078,6 +1182,7 @@ static u64 primary_event_id(struct perf_event *event)
1078 1182
1079/* 1183/*
1080 * Get the perf_event_context for a task and lock it. 1184 * Get the perf_event_context for a task and lock it.
1185 *
1081 * This has to cope with with the fact that until it is locked, 1186 * This has to cope with with the fact that until it is locked,
1082 * the context could get moved to another task. 1187 * the context could get moved to another task.
1083 */ 1188 */
@@ -1118,9 +1223,12 @@ retry:
1118 goto retry; 1223 goto retry;
1119 } 1224 }
1120 1225
1121 if (!atomic_inc_not_zero(&ctx->refcount)) { 1226 if (ctx->task == TASK_TOMBSTONE ||
1227 !atomic_inc_not_zero(&ctx->refcount)) {
1122 raw_spin_unlock(&ctx->lock); 1228 raw_spin_unlock(&ctx->lock);
1123 ctx = NULL; 1229 ctx = NULL;
1230 } else {
1231 WARN_ON_ONCE(ctx->task != task);
1124 } 1232 }
1125 } 1233 }
1126 rcu_read_unlock(); 1234 rcu_read_unlock();
@@ -1246,6 +1354,8 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
1246static void 1354static void
1247list_add_event(struct perf_event *event, struct perf_event_context *ctx) 1355list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1248{ 1356{
1357 lockdep_assert_held(&ctx->lock);
1358
1249 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); 1359 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
1250 event->attach_state |= PERF_ATTACH_CONTEXT; 1360 event->attach_state |= PERF_ATTACH_CONTEXT;
1251 1361
@@ -1448,11 +1558,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1448 1558
1449 if (is_cgroup_event(event)) { 1559 if (is_cgroup_event(event)) {
1450 ctx->nr_cgroups--; 1560 ctx->nr_cgroups--;
1561 /*
1562 * Because cgroup events are always per-cpu events, this will
1563 * always be called from the right CPU.
1564 */
1451 cpuctx = __get_cpu_context(ctx); 1565 cpuctx = __get_cpu_context(ctx);
1452 /* 1566 /*
1453 * if there are no more cgroup events 1567 * If there are no more cgroup events then clear cgrp to avoid
1454 * then cler cgrp to avoid stale pointer 1568 * stale pointer in update_cgrp_time_from_cpuctx().
1455 * in update_cgrp_time_from_cpuctx()
1456 */ 1569 */
1457 if (!ctx->nr_cgroups) 1570 if (!ctx->nr_cgroups)
1458 cpuctx->cgrp = NULL; 1571 cpuctx->cgrp = NULL;
@@ -1530,45 +1643,11 @@ out:
1530 perf_event__header_size(tmp); 1643 perf_event__header_size(tmp);
1531} 1644}
1532 1645
1533/*
1534 * User event without the task.
1535 */
1536static bool is_orphaned_event(struct perf_event *event) 1646static bool is_orphaned_event(struct perf_event *event)
1537{ 1647{
1538 return event && !is_kernel_event(event) && !event->owner; 1648 return event->state == PERF_EVENT_STATE_EXIT;
1539} 1649}
1540 1650
1541/*
1542 * Event has a parent but parent's task finished and it's
1543 * alive only because of children holding refference.
1544 */
1545static bool is_orphaned_child(struct perf_event *event)
1546{
1547 return is_orphaned_event(event->parent);
1548}
1549
1550static void orphans_remove_work(struct work_struct *work);
1551
1552static void schedule_orphans_remove(struct perf_event_context *ctx)
1553{
1554 if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
1555 return;
1556
1557 if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
1558 get_ctx(ctx);
1559 ctx->orphans_remove_sched = true;
1560 }
1561}
1562
1563static int __init perf_workqueue_init(void)
1564{
1565 perf_wq = create_singlethread_workqueue("perf");
1566 WARN(!perf_wq, "failed to create perf workqueue\n");
1567 return perf_wq ? 0 : -1;
1568}
1569
1570core_initcall(perf_workqueue_init);
1571
1572static inline int pmu_filter_match(struct perf_event *event) 1651static inline int pmu_filter_match(struct perf_event *event)
1573{ 1652{
1574 struct pmu *pmu = event->pmu; 1653 struct pmu *pmu = event->pmu;
@@ -1629,9 +1708,6 @@ event_sched_out(struct perf_event *event,
1629 if (event->attr.exclusive || !cpuctx->active_oncpu) 1708 if (event->attr.exclusive || !cpuctx->active_oncpu)
1630 cpuctx->exclusive = 0; 1709 cpuctx->exclusive = 0;
1631 1710
1632 if (is_orphaned_child(event))
1633 schedule_orphans_remove(ctx);
1634
1635 perf_pmu_enable(event->pmu); 1711 perf_pmu_enable(event->pmu);
1636} 1712}
1637 1713
@@ -1655,21 +1731,8 @@ group_sched_out(struct perf_event *group_event,
1655 cpuctx->exclusive = 0; 1731 cpuctx->exclusive = 0;
1656} 1732}
1657 1733
1658struct remove_event { 1734#define DETACH_GROUP 0x01UL
1659 struct perf_event *event; 1735#define DETACH_STATE 0x02UL
1660 bool detach_group;
1661};
1662
1663static void ___perf_remove_from_context(void *info)
1664{
1665 struct remove_event *re = info;
1666 struct perf_event *event = re->event;
1667 struct perf_event_context *ctx = event->ctx;
1668
1669 if (re->detach_group)
1670 perf_group_detach(event);
1671 list_del_event(event, ctx);
1672}
1673 1736
1674/* 1737/*
1675 * Cross CPU call to remove a performance event 1738 * Cross CPU call to remove a performance event
@@ -1677,33 +1740,33 @@ static void ___perf_remove_from_context(void *info)
1677 * We disable the event on the hardware level first. After that we 1740 * We disable the event on the hardware level first. After that we
1678 * remove it from the context list. 1741 * remove it from the context list.
1679 */ 1742 */
1680static int __perf_remove_from_context(void *info) 1743static void
1744__perf_remove_from_context(struct perf_event *event,
1745 struct perf_cpu_context *cpuctx,
1746 struct perf_event_context *ctx,
1747 void *info)
1681{ 1748{
1682 struct remove_event *re = info; 1749 unsigned long flags = (unsigned long)info;
1683 struct perf_event *event = re->event;
1684 struct perf_event_context *ctx = event->ctx;
1685 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1686 1750
1687 raw_spin_lock(&ctx->lock);
1688 event_sched_out(event, cpuctx, ctx); 1751 event_sched_out(event, cpuctx, ctx);
1689 if (re->detach_group) 1752 if (flags & DETACH_GROUP)
1690 perf_group_detach(event); 1753 perf_group_detach(event);
1691 list_del_event(event, ctx); 1754 list_del_event(event, ctx);
1692 if (!ctx->nr_events && cpuctx->task_ctx == ctx) { 1755 if (flags & DETACH_STATE)
1756 event->state = PERF_EVENT_STATE_EXIT;
1757
1758 if (!ctx->nr_events && ctx->is_active) {
1693 ctx->is_active = 0; 1759 ctx->is_active = 0;
1694 cpuctx->task_ctx = NULL; 1760 if (ctx->task) {
1761 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
1762 cpuctx->task_ctx = NULL;
1763 }
1695 } 1764 }
1696 raw_spin_unlock(&ctx->lock);
1697
1698 return 0;
1699} 1765}
1700 1766
1701/* 1767/*
1702 * Remove the event from a task's (or a CPU's) list of events. 1768 * Remove the event from a task's (or a CPU's) list of events.
1703 * 1769 *
1704 * CPU events are removed with a smp call. For task events we only
1705 * call when the task is on a CPU.
1706 *
1707 * If event->ctx is a cloned context, callers must make sure that 1770 * If event->ctx is a cloned context, callers must make sure that
1708 * every task struct that event->ctx->task could possibly point to 1771 * every task struct that event->ctx->task could possibly point to
1709 * remains valid. This is OK when called from perf_release since 1772 * remains valid. This is OK when called from perf_release since
@@ -1711,73 +1774,32 @@ static int __perf_remove_from_context(void *info)
1711 * When called from perf_event_exit_task, it's OK because the 1774 * When called from perf_event_exit_task, it's OK because the
1712 * context has been detached from its task. 1775 * context has been detached from its task.
1713 */ 1776 */
1714static void perf_remove_from_context(struct perf_event *event, bool detach_group) 1777static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
1715{ 1778{
1716 struct perf_event_context *ctx = event->ctx; 1779 lockdep_assert_held(&event->ctx->mutex);
1717 struct remove_event re = {
1718 .event = event,
1719 .detach_group = detach_group,
1720 };
1721 1780
1722 lockdep_assert_held(&ctx->mutex); 1781 event_function_call(event, __perf_remove_from_context, (void *)flags);
1723
1724 event_function_call(event, __perf_remove_from_context,
1725 ___perf_remove_from_context, &re);
1726} 1782}
1727 1783
1728/* 1784/*
1729 * Cross CPU call to disable a performance event 1785 * Cross CPU call to disable a performance event
1730 */ 1786 */
1731int __perf_event_disable(void *info) 1787static void __perf_event_disable(struct perf_event *event,
1732{ 1788 struct perf_cpu_context *cpuctx,
1733 struct perf_event *event = info; 1789 struct perf_event_context *ctx,
1734 struct perf_event_context *ctx = event->ctx; 1790 void *info)
1735 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1736
1737 /*
1738 * If this is a per-task event, need to check whether this
1739 * event's task is the current task on this cpu.
1740 *
1741 * Can trigger due to concurrent perf_event_context_sched_out()
1742 * flipping contexts around.
1743 */
1744 if (ctx->task && cpuctx->task_ctx != ctx)
1745 return -EINVAL;
1746
1747 raw_spin_lock(&ctx->lock);
1748
1749 /*
1750 * If the event is on, turn it off.
1751 * If it is in error state, leave it in error state.
1752 */
1753 if (event->state >= PERF_EVENT_STATE_INACTIVE) {
1754 update_context_time(ctx);
1755 update_cgrp_time_from_event(event);
1756 update_group_times(event);
1757 if (event == event->group_leader)
1758 group_sched_out(event, cpuctx, ctx);
1759 else
1760 event_sched_out(event, cpuctx, ctx);
1761 event->state = PERF_EVENT_STATE_OFF;
1762 }
1763
1764 raw_spin_unlock(&ctx->lock);
1765
1766 return 0;
1767}
1768
1769void ___perf_event_disable(void *info)
1770{ 1791{
1771 struct perf_event *event = info; 1792 if (event->state < PERF_EVENT_STATE_INACTIVE)
1793 return;
1772 1794
1773 /* 1795 update_context_time(ctx);
1774 * Since we have the lock this context can't be scheduled 1796 update_cgrp_time_from_event(event);
1775 * in, so we can change the state safely. 1797 update_group_times(event);
1776 */ 1798 if (event == event->group_leader)
1777 if (event->state == PERF_EVENT_STATE_INACTIVE) { 1799 group_sched_out(event, cpuctx, ctx);
1778 update_group_times(event); 1800 else
1779 event->state = PERF_EVENT_STATE_OFF; 1801 event_sched_out(event, cpuctx, ctx);
1780 } 1802 event->state = PERF_EVENT_STATE_OFF;
1781} 1803}
1782 1804
1783/* 1805/*
@@ -1788,7 +1810,8 @@ void ___perf_event_disable(void *info)
1788 * remains valid. This condition is satisifed when called through 1810 * remains valid. This condition is satisifed when called through
1789 * perf_event_for_each_child or perf_event_for_each because they 1811 * perf_event_for_each_child or perf_event_for_each because they
1790 * hold the top-level event's child_mutex, so any descendant that 1812 * hold the top-level event's child_mutex, so any descendant that
1791 * goes to exit will block in sync_child_event. 1813 * goes to exit will block in perf_event_exit_event().
1814 *
1792 * When called from perf_pending_event it's OK because event->ctx 1815 * When called from perf_pending_event it's OK because event->ctx
1793 * is the current context on this CPU and preemption is disabled, 1816 * is the current context on this CPU and preemption is disabled,
1794 * hence we can't get into perf_event_task_sched_out for this context. 1817 * hence we can't get into perf_event_task_sched_out for this context.
@@ -1804,8 +1827,12 @@ static void _perf_event_disable(struct perf_event *event)
1804 } 1827 }
1805 raw_spin_unlock_irq(&ctx->lock); 1828 raw_spin_unlock_irq(&ctx->lock);
1806 1829
1807 event_function_call(event, __perf_event_disable, 1830 event_function_call(event, __perf_event_disable, NULL);
1808 ___perf_event_disable, event); 1831}
1832
1833void perf_event_disable_local(struct perf_event *event)
1834{
1835 event_function_local(event, __perf_event_disable, NULL);
1809} 1836}
1810 1837
1811/* 1838/*
@@ -1918,9 +1945,6 @@ event_sched_in(struct perf_event *event,
1918 if (event->attr.exclusive) 1945 if (event->attr.exclusive)
1919 cpuctx->exclusive = 1; 1946 cpuctx->exclusive = 1;
1920 1947
1921 if (is_orphaned_child(event))
1922 schedule_orphans_remove(ctx);
1923
1924out: 1948out:
1925 perf_pmu_enable(event->pmu); 1949 perf_pmu_enable(event->pmu);
1926 1950
@@ -2039,7 +2063,8 @@ static void add_event_to_ctx(struct perf_event *event,
2039 event->tstamp_stopped = tstamp; 2063 event->tstamp_stopped = tstamp;
2040} 2064}
2041 2065
2042static void task_ctx_sched_out(struct perf_event_context *ctx); 2066static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
2067 struct perf_event_context *ctx);
2043static void 2068static void
2044ctx_sched_in(struct perf_event_context *ctx, 2069ctx_sched_in(struct perf_event_context *ctx,
2045 struct perf_cpu_context *cpuctx, 2070 struct perf_cpu_context *cpuctx,
@@ -2058,16 +2083,15 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
2058 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); 2083 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
2059} 2084}
2060 2085
2061static void ___perf_install_in_context(void *info) 2086static void ctx_resched(struct perf_cpu_context *cpuctx,
2087 struct perf_event_context *task_ctx)
2062{ 2088{
2063 struct perf_event *event = info; 2089 perf_pmu_disable(cpuctx->ctx.pmu);
2064 struct perf_event_context *ctx = event->ctx; 2090 if (task_ctx)
2065 2091 task_ctx_sched_out(cpuctx, task_ctx);
2066 /* 2092 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
2067 * Since the task isn't running, its safe to add the event, us holding 2093 perf_event_sched_in(cpuctx, task_ctx, current);
2068 * the ctx->lock ensures the task won't get scheduled in. 2094 perf_pmu_enable(cpuctx->ctx.pmu);
2069 */
2070 add_event_to_ctx(event, ctx);
2071} 2095}
2072 2096
2073/* 2097/*
@@ -2077,55 +2101,31 @@ static void ___perf_install_in_context(void *info)
2077 */ 2101 */
2078static int __perf_install_in_context(void *info) 2102static int __perf_install_in_context(void *info)
2079{ 2103{
2080 struct perf_event *event = info; 2104 struct perf_event_context *ctx = info;
2081 struct perf_event_context *ctx = event->ctx;
2082 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 2105 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2083 struct perf_event_context *task_ctx = cpuctx->task_ctx; 2106 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2084 struct task_struct *task = current;
2085
2086 perf_ctx_lock(cpuctx, task_ctx);
2087 perf_pmu_disable(cpuctx->ctx.pmu);
2088
2089 /*
2090 * If there was an active task_ctx schedule it out.
2091 */
2092 if (task_ctx)
2093 task_ctx_sched_out(task_ctx);
2094 2107
2095 /* 2108 raw_spin_lock(&cpuctx->ctx.lock);
2096 * If the context we're installing events in is not the 2109 if (ctx->task) {
2097 * active task_ctx, flip them.
2098 */
2099 if (ctx->task && task_ctx != ctx) {
2100 if (task_ctx)
2101 raw_spin_unlock(&task_ctx->lock);
2102 raw_spin_lock(&ctx->lock); 2110 raw_spin_lock(&ctx->lock);
2111 /*
2112 * If we hit the 'wrong' task, we've since scheduled and
2113 * everything should be sorted, nothing to do!
2114 */
2103 task_ctx = ctx; 2115 task_ctx = ctx;
2104 } 2116 if (ctx->task != current)
2117 goto unlock;
2105 2118
2106 if (task_ctx) { 2119 /*
2107 cpuctx->task_ctx = task_ctx; 2120 * If task_ctx is set, it had better be to us.
2108 task = task_ctx->task; 2121 */
2122 WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
2123 } else if (task_ctx) {
2124 raw_spin_lock(&task_ctx->lock);
2109 } 2125 }
2110 2126
2111 cpu_ctx_sched_out(cpuctx, EVENT_ALL); 2127 ctx_resched(cpuctx, task_ctx);
2112 2128unlock:
2113 update_context_time(ctx);
2114 /*
2115 * update cgrp time only if current cgrp
2116 * matches event->cgrp. Must be done before
2117 * calling add_event_to_ctx()
2118 */
2119 update_cgrp_time_from_event(event);
2120
2121 add_event_to_ctx(event, ctx);
2122
2123 /*
2124 * Schedule everything back in
2125 */
2126 perf_event_sched_in(cpuctx, task_ctx, task);
2127
2128 perf_pmu_enable(cpuctx->ctx.pmu);
2129 perf_ctx_unlock(cpuctx, task_ctx); 2129 perf_ctx_unlock(cpuctx, task_ctx);
2130 2130
2131 return 0; 2131 return 0;
@@ -2133,27 +2133,54 @@ static int __perf_install_in_context(void *info)
2133 2133
2134/* 2134/*
2135 * Attach a performance event to a context 2135 * Attach a performance event to a context
2136 *
2137 * First we add the event to the list with the hardware enable bit
2138 * in event->hw_config cleared.
2139 *
2140 * If the event is attached to a task which is on a CPU we use a smp
2141 * call to enable it in the task context. The task might have been
2142 * scheduled away, but we check this in the smp call again.
2143 */ 2136 */
2144static void 2137static void
2145perf_install_in_context(struct perf_event_context *ctx, 2138perf_install_in_context(struct perf_event_context *ctx,
2146 struct perf_event *event, 2139 struct perf_event *event,
2147 int cpu) 2140 int cpu)
2148{ 2141{
2142 struct task_struct *task = NULL;
2143
2149 lockdep_assert_held(&ctx->mutex); 2144 lockdep_assert_held(&ctx->mutex);
2150 2145
2151 event->ctx = ctx; 2146 event->ctx = ctx;
2152 if (event->cpu != -1) 2147 if (event->cpu != -1)
2153 event->cpu = cpu; 2148 event->cpu = cpu;
2154 2149
2155 event_function_call(event, __perf_install_in_context, 2150 /*
2156 ___perf_install_in_context, event); 2151 * Installing events is tricky because we cannot rely on ctx->is_active
2152 * to be set in case this is the nr_events 0 -> 1 transition.
2153 *
2154 * So what we do is we add the event to the list here, which will allow
2155 * a future context switch to DTRT and then send a racy IPI. If the IPI
2156 * fails to hit the right task, this means a context switch must have
2157 * happened and that will have taken care of business.
2158 */
2159 raw_spin_lock_irq(&ctx->lock);
2160 task = ctx->task;
2161 /*
2162 * Worse, we cannot even rely on the ctx actually existing anymore. If
2163 * between find_get_context() and perf_install_in_context() the task
2164 * went through perf_event_exit_task() its dead and we should not be
2165 * adding new events.
2166 */
2167 if (task == TASK_TOMBSTONE) {
2168 raw_spin_unlock_irq(&ctx->lock);
2169 return;
2170 }
2171 update_context_time(ctx);
2172 /*
2173 * Update cgrp time only if current cgrp matches event->cgrp.
2174 * Must be done before calling add_event_to_ctx().
2175 */
2176 update_cgrp_time_from_event(event);
2177 add_event_to_ctx(event, ctx);
2178 raw_spin_unlock_irq(&ctx->lock);
2179
2180 if (task)
2181 task_function_call(task, __perf_install_in_context, ctx);
2182 else
2183 cpu_function_call(cpu, __perf_install_in_context, ctx);
2157} 2184}
2158 2185
2159/* 2186/*
@@ -2180,43 +2207,30 @@ static void __perf_event_mark_enabled(struct perf_event *event)
2180/* 2207/*
2181 * Cross CPU call to enable a performance event 2208 * Cross CPU call to enable a performance event
2182 */ 2209 */
2183static int __perf_event_enable(void *info) 2210static void __perf_event_enable(struct perf_event *event,
2211 struct perf_cpu_context *cpuctx,
2212 struct perf_event_context *ctx,
2213 void *info)
2184{ 2214{
2185 struct perf_event *event = info;
2186 struct perf_event_context *ctx = event->ctx;
2187 struct perf_event *leader = event->group_leader; 2215 struct perf_event *leader = event->group_leader;
2188 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 2216 struct perf_event_context *task_ctx;
2189 int err;
2190 2217
2191 /* 2218 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
2192 * There's a time window between 'ctx->is_active' check 2219 event->state <= PERF_EVENT_STATE_ERROR)
2193 * in perf_event_enable function and this place having: 2220 return;
2194 * - IRQs on
2195 * - ctx->lock unlocked
2196 *
2197 * where the task could be killed and 'ctx' deactivated
2198 * by perf_event_exit_task.
2199 */
2200 if (!ctx->is_active)
2201 return -EINVAL;
2202 2221
2203 raw_spin_lock(&ctx->lock);
2204 update_context_time(ctx); 2222 update_context_time(ctx);
2205
2206 if (event->state >= PERF_EVENT_STATE_INACTIVE)
2207 goto unlock;
2208
2209 /*
2210 * set current task's cgroup time reference point
2211 */
2212 perf_cgroup_set_timestamp(current, ctx);
2213
2214 __perf_event_mark_enabled(event); 2223 __perf_event_mark_enabled(event);
2215 2224
2225 if (!ctx->is_active)
2226 return;
2227
2216 if (!event_filter_match(event)) { 2228 if (!event_filter_match(event)) {
2217 if (is_cgroup_event(event)) 2229 if (is_cgroup_event(event)) {
2230 perf_cgroup_set_timestamp(current, ctx); // XXX ?
2218 perf_cgroup_defer_enabled(event); 2231 perf_cgroup_defer_enabled(event);
2219 goto unlock; 2232 }
2233 return;
2220 } 2234 }
2221 2235
2222 /* 2236 /*
@@ -2224,41 +2238,13 @@ static int __perf_event_enable(void *info)
2224 * then don't put it on unless the group is on. 2238 * then don't put it on unless the group is on.
2225 */ 2239 */
2226 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) 2240 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
2227 goto unlock; 2241 return;
2228
2229 if (!group_can_go_on(event, cpuctx, 1)) {
2230 err = -EEXIST;
2231 } else {
2232 if (event == leader)
2233 err = group_sched_in(event, cpuctx, ctx);
2234 else
2235 err = event_sched_in(event, cpuctx, ctx);
2236 }
2237
2238 if (err) {
2239 /*
2240 * If this event can't go on and it's part of a
2241 * group, then the whole group has to come off.
2242 */
2243 if (leader != event) {
2244 group_sched_out(leader, cpuctx, ctx);
2245 perf_mux_hrtimer_restart(cpuctx);
2246 }
2247 if (leader->attr.pinned) {
2248 update_group_times(leader);
2249 leader->state = PERF_EVENT_STATE_ERROR;
2250 }
2251 }
2252 2242
2253unlock: 2243 task_ctx = cpuctx->task_ctx;
2254 raw_spin_unlock(&ctx->lock); 2244 if (ctx->task)
2245 WARN_ON_ONCE(task_ctx != ctx);
2255 2246
2256 return 0; 2247 ctx_resched(cpuctx, task_ctx);
2257}
2258
2259void ___perf_event_enable(void *info)
2260{
2261 __perf_event_mark_enabled((struct perf_event *)info);
2262} 2248}
2263 2249
2264/* 2250/*
@@ -2275,7 +2261,8 @@ static void _perf_event_enable(struct perf_event *event)
2275 struct perf_event_context *ctx = event->ctx; 2261 struct perf_event_context *ctx = event->ctx;
2276 2262
2277 raw_spin_lock_irq(&ctx->lock); 2263 raw_spin_lock_irq(&ctx->lock);
2278 if (event->state >= PERF_EVENT_STATE_INACTIVE) { 2264 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
2265 event->state < PERF_EVENT_STATE_ERROR) {
2279 raw_spin_unlock_irq(&ctx->lock); 2266 raw_spin_unlock_irq(&ctx->lock);
2280 return; 2267 return;
2281 } 2268 }
@@ -2291,8 +2278,7 @@ static void _perf_event_enable(struct perf_event *event)
2291 event->state = PERF_EVENT_STATE_OFF; 2278 event->state = PERF_EVENT_STATE_OFF;
2292 raw_spin_unlock_irq(&ctx->lock); 2279 raw_spin_unlock_irq(&ctx->lock);
2293 2280
2294 event_function_call(event, __perf_event_enable, 2281 event_function_call(event, __perf_event_enable, NULL);
2295 ___perf_event_enable, event);
2296} 2282}
2297 2283
2298/* 2284/*
@@ -2342,12 +2328,27 @@ static void ctx_sched_out(struct perf_event_context *ctx,
2342 struct perf_cpu_context *cpuctx, 2328 struct perf_cpu_context *cpuctx,
2343 enum event_type_t event_type) 2329 enum event_type_t event_type)
2344{ 2330{
2345 struct perf_event *event;
2346 int is_active = ctx->is_active; 2331 int is_active = ctx->is_active;
2332 struct perf_event *event;
2347 2333
2348 ctx->is_active &= ~event_type; 2334 lockdep_assert_held(&ctx->lock);
2349 if (likely(!ctx->nr_events)) 2335
2336 if (likely(!ctx->nr_events)) {
2337 /*
2338 * See __perf_remove_from_context().
2339 */
2340 WARN_ON_ONCE(ctx->is_active);
2341 if (ctx->task)
2342 WARN_ON_ONCE(cpuctx->task_ctx);
2350 return; 2343 return;
2344 }
2345
2346 ctx->is_active &= ~event_type;
2347 if (ctx->task) {
2348 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
2349 if (!ctx->is_active)
2350 cpuctx->task_ctx = NULL;
2351 }
2351 2352
2352 update_context_time(ctx); 2353 update_context_time(ctx);
2353 update_cgrp_time_from_cpuctx(cpuctx); 2354 update_cgrp_time_from_cpuctx(cpuctx);
@@ -2518,17 +2519,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2518 raw_spin_lock(&ctx->lock); 2519 raw_spin_lock(&ctx->lock);
2519 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 2520 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
2520 if (context_equiv(ctx, next_ctx)) { 2521 if (context_equiv(ctx, next_ctx)) {
2521 /* 2522 WRITE_ONCE(ctx->task, next);
2522 * XXX do we need a memory barrier of sorts 2523 WRITE_ONCE(next_ctx->task, task);
2523 * wrt to rcu_dereference() of perf_event_ctxp
2524 */
2525 task->perf_event_ctxp[ctxn] = next_ctx;
2526 next->perf_event_ctxp[ctxn] = ctx;
2527 ctx->task = next;
2528 next_ctx->task = task;
2529 2524
2530 swap(ctx->task_ctx_data, next_ctx->task_ctx_data); 2525 swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
2531 2526
2527 /*
2528 * RCU_INIT_POINTER here is safe because we've not
2529 * modified the ctx and the above modification of
2530 * ctx->task and ctx->task_ctx_data are immaterial
2531 * since those values are always verified under
2532 * ctx->lock which we're now holding.
2533 */
2534 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
2535 RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
2536
2532 do_switch = 0; 2537 do_switch = 0;
2533 2538
2534 perf_event_sync_stat(ctx, next_ctx); 2539 perf_event_sync_stat(ctx, next_ctx);
@@ -2541,8 +2546,7 @@ unlock:
2541 2546
2542 if (do_switch) { 2547 if (do_switch) {
2543 raw_spin_lock(&ctx->lock); 2548 raw_spin_lock(&ctx->lock);
2544 ctx_sched_out(ctx, cpuctx, EVENT_ALL); 2549 task_ctx_sched_out(cpuctx, ctx);
2545 cpuctx->task_ctx = NULL;
2546 raw_spin_unlock(&ctx->lock); 2550 raw_spin_unlock(&ctx->lock);
2547 } 2551 }
2548} 2552}
@@ -2637,10 +2641,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
2637 perf_cgroup_sched_out(task, next); 2641 perf_cgroup_sched_out(task, next);
2638} 2642}
2639 2643
2640static void task_ctx_sched_out(struct perf_event_context *ctx) 2644static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
2645 struct perf_event_context *ctx)
2641{ 2646{
2642 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2643
2644 if (!cpuctx->task_ctx) 2647 if (!cpuctx->task_ctx)
2645 return; 2648 return;
2646 2649
@@ -2648,7 +2651,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx)
2648 return; 2651 return;
2649 2652
2650 ctx_sched_out(ctx, cpuctx, EVENT_ALL); 2653 ctx_sched_out(ctx, cpuctx, EVENT_ALL);
2651 cpuctx->task_ctx = NULL;
2652} 2654}
2653 2655
2654/* 2656/*
@@ -2725,13 +2727,22 @@ ctx_sched_in(struct perf_event_context *ctx,
2725 enum event_type_t event_type, 2727 enum event_type_t event_type,
2726 struct task_struct *task) 2728 struct task_struct *task)
2727{ 2729{
2728 u64 now;
2729 int is_active = ctx->is_active; 2730 int is_active = ctx->is_active;
2731 u64 now;
2732
2733 lockdep_assert_held(&ctx->lock);
2730 2734
2731 ctx->is_active |= event_type;
2732 if (likely(!ctx->nr_events)) 2735 if (likely(!ctx->nr_events))
2733 return; 2736 return;
2734 2737
2738 ctx->is_active |= event_type;
2739 if (ctx->task) {
2740 if (!is_active)
2741 cpuctx->task_ctx = ctx;
2742 else
2743 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
2744 }
2745
2735 now = perf_clock(); 2746 now = perf_clock();
2736 ctx->timestamp = now; 2747 ctx->timestamp = now;
2737 perf_cgroup_set_timestamp(task, ctx); 2748 perf_cgroup_set_timestamp(task, ctx);
@@ -2773,12 +2784,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2773 * cpu flexible, task flexible. 2784 * cpu flexible, task flexible.
2774 */ 2785 */
2775 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2786 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2776 2787 perf_event_sched_in(cpuctx, ctx, task);
2777 if (ctx->nr_events)
2778 cpuctx->task_ctx = ctx;
2779
2780 perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
2781
2782 perf_pmu_enable(ctx->pmu); 2788 perf_pmu_enable(ctx->pmu);
2783 perf_ctx_unlock(cpuctx, ctx); 2789 perf_ctx_unlock(cpuctx, ctx);
2784} 2790}
@@ -2800,6 +2806,16 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2800 struct perf_event_context *ctx; 2806 struct perf_event_context *ctx;
2801 int ctxn; 2807 int ctxn;
2802 2808
2809 /*
2810 * If cgroup events exist on this CPU, then we need to check if we have
2811 * to switch in PMU state; cgroup event are system-wide mode only.
2812 *
2813 * Since cgroup events are CPU events, we must schedule these in before
2814 * we schedule in the task events.
2815 */
2816 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
2817 perf_cgroup_sched_in(prev, task);
2818
2803 for_each_task_context_nr(ctxn) { 2819 for_each_task_context_nr(ctxn) {
2804 ctx = task->perf_event_ctxp[ctxn]; 2820 ctx = task->perf_event_ctxp[ctxn];
2805 if (likely(!ctx)) 2821 if (likely(!ctx))
@@ -2807,13 +2823,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2807 2823
2808 perf_event_context_sched_in(ctx, task); 2824 perf_event_context_sched_in(ctx, task);
2809 } 2825 }
2810 /*
2811 * if cgroup events exist on this CPU, then we need
2812 * to check if we have to switch in PMU state.
2813 * cgroup event are system-wide mode only
2814 */
2815 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
2816 perf_cgroup_sched_in(prev, task);
2817 2826
2818 if (atomic_read(&nr_switch_events)) 2827 if (atomic_read(&nr_switch_events))
2819 perf_event_switch(task, prev, true); 2828 perf_event_switch(task, prev, true);
@@ -3099,46 +3108,30 @@ static int event_enable_on_exec(struct perf_event *event,
3099static void perf_event_enable_on_exec(int ctxn) 3108static void perf_event_enable_on_exec(int ctxn)
3100{ 3109{
3101 struct perf_event_context *ctx, *clone_ctx = NULL; 3110 struct perf_event_context *ctx, *clone_ctx = NULL;
3111 struct perf_cpu_context *cpuctx;
3102 struct perf_event *event; 3112 struct perf_event *event;
3103 unsigned long flags; 3113 unsigned long flags;
3104 int enabled = 0; 3114 int enabled = 0;
3105 int ret;
3106 3115
3107 local_irq_save(flags); 3116 local_irq_save(flags);
3108 ctx = current->perf_event_ctxp[ctxn]; 3117 ctx = current->perf_event_ctxp[ctxn];
3109 if (!ctx || !ctx->nr_events) 3118 if (!ctx || !ctx->nr_events)
3110 goto out; 3119 goto out;
3111 3120
3112 /* 3121 cpuctx = __get_cpu_context(ctx);
3113 * We must ctxsw out cgroup events to avoid conflict 3122 perf_ctx_lock(cpuctx, ctx);
3114 * when invoking perf_task_event_sched_in() later on 3123 list_for_each_entry(event, &ctx->event_list, event_entry)
3115 * in this function. Otherwise we end up trying to 3124 enabled |= event_enable_on_exec(event, ctx);
3116 * ctxswin cgroup events which are already scheduled
3117 * in.
3118 */
3119 perf_cgroup_sched_out(current, NULL);
3120
3121 raw_spin_lock(&ctx->lock);
3122 task_ctx_sched_out(ctx);
3123
3124 list_for_each_entry(event, &ctx->event_list, event_entry) {
3125 ret = event_enable_on_exec(event, ctx);
3126 if (ret)
3127 enabled = 1;
3128 }
3129 3125
3130 /* 3126 /*
3131 * Unclone this context if we enabled any event. 3127 * Unclone and reschedule this context if we enabled any event.
3132 */ 3128 */
3133 if (enabled) 3129 if (enabled) {
3134 clone_ctx = unclone_ctx(ctx); 3130 clone_ctx = unclone_ctx(ctx);
3131 ctx_resched(cpuctx, ctx);
3132 }
3133 perf_ctx_unlock(cpuctx, ctx);
3135 3134
3136 raw_spin_unlock(&ctx->lock);
3137
3138 /*
3139 * Also calls ctxswin for cgroup events, if any:
3140 */
3141 perf_event_context_sched_in(ctx, ctx->task);
3142out: 3135out:
3143 local_irq_restore(flags); 3136 local_irq_restore(flags);
3144 3137
@@ -3334,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3334 INIT_LIST_HEAD(&ctx->flexible_groups); 3327 INIT_LIST_HEAD(&ctx->flexible_groups);
3335 INIT_LIST_HEAD(&ctx->event_list); 3328 INIT_LIST_HEAD(&ctx->event_list);
3336 atomic_set(&ctx->refcount, 1); 3329 atomic_set(&ctx->refcount, 1);
3337 INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
3338} 3330}
3339 3331
3340static struct perf_event_context * 3332static struct perf_event_context *
@@ -3521,11 +3513,13 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
3521 3513
3522static void unaccount_event(struct perf_event *event) 3514static void unaccount_event(struct perf_event *event)
3523{ 3515{
3516 bool dec = false;
3517
3524 if (event->parent) 3518 if (event->parent)
3525 return; 3519 return;
3526 3520
3527 if (event->attach_state & PERF_ATTACH_TASK) 3521 if (event->attach_state & PERF_ATTACH_TASK)
3528 static_key_slow_dec_deferred(&perf_sched_events); 3522 dec = true;
3529 if (event->attr.mmap || event->attr.mmap_data) 3523 if (event->attr.mmap || event->attr.mmap_data)
3530 atomic_dec(&nr_mmap_events); 3524 atomic_dec(&nr_mmap_events);
3531 if (event->attr.comm) 3525 if (event->attr.comm)
@@ -3535,12 +3529,15 @@ static void unaccount_event(struct perf_event *event)
3535 if (event->attr.freq) 3529 if (event->attr.freq)
3536 atomic_dec(&nr_freq_events); 3530 atomic_dec(&nr_freq_events);
3537 if (event->attr.context_switch) { 3531 if (event->attr.context_switch) {
3538 static_key_slow_dec_deferred(&perf_sched_events); 3532 dec = true;
3539 atomic_dec(&nr_switch_events); 3533 atomic_dec(&nr_switch_events);
3540 } 3534 }
3541 if (is_cgroup_event(event)) 3535 if (is_cgroup_event(event))
3542 static_key_slow_dec_deferred(&perf_sched_events); 3536 dec = true;
3543 if (has_branch_stack(event)) 3537 if (has_branch_stack(event))
3538 dec = true;
3539
3540 if (dec)
3544 static_key_slow_dec_deferred(&perf_sched_events); 3541 static_key_slow_dec_deferred(&perf_sched_events);
3545 3542
3546 unaccount_event_cpu(event, event->cpu); 3543 unaccount_event_cpu(event, event->cpu);
@@ -3556,7 +3553,7 @@ static void unaccount_event(struct perf_event *event)
3556 * 3) two matching events on the same context. 3553 * 3) two matching events on the same context.
3557 * 3554 *
3558 * The former two cases are handled in the allocation path (perf_event_alloc(), 3555 * The former two cases are handled in the allocation path (perf_event_alloc(),
3559 * __free_event()), the latter -- before the first perf_install_in_context(). 3556 * _free_event()), the latter -- before the first perf_install_in_context().
3560 */ 3557 */
3561static int exclusive_event_init(struct perf_event *event) 3558static int exclusive_event_init(struct perf_event *event)
3562{ 3559{
@@ -3631,29 +3628,6 @@ static bool exclusive_event_installable(struct perf_event *event,
3631 return true; 3628 return true;
3632} 3629}
3633 3630
3634static void __free_event(struct perf_event *event)
3635{
3636 if (!event->parent) {
3637 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
3638 put_callchain_buffers();
3639 }
3640
3641 perf_event_free_bpf_prog(event);
3642
3643 if (event->destroy)
3644 event->destroy(event);
3645
3646 if (event->ctx)
3647 put_ctx(event->ctx);
3648
3649 if (event->pmu) {
3650 exclusive_event_destroy(event);
3651 module_put(event->pmu->module);
3652 }
3653
3654 call_rcu(&event->rcu_head, free_event_rcu);
3655}
3656
3657static void _free_event(struct perf_event *event) 3631static void _free_event(struct perf_event *event)
3658{ 3632{
3659 irq_work_sync(&event->pending); 3633 irq_work_sync(&event->pending);
@@ -3675,7 +3649,25 @@ static void _free_event(struct perf_event *event)
3675 if (is_cgroup_event(event)) 3649 if (is_cgroup_event(event))
3676 perf_detach_cgroup(event); 3650 perf_detach_cgroup(event);
3677 3651
3678 __free_event(event); 3652 if (!event->parent) {
3653 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
3654 put_callchain_buffers();
3655 }
3656
3657 perf_event_free_bpf_prog(event);
3658
3659 if (event->destroy)
3660 event->destroy(event);
3661
3662 if (event->ctx)
3663 put_ctx(event->ctx);
3664
3665 if (event->pmu) {
3666 exclusive_event_destroy(event);
3667 module_put(event->pmu->module);
3668 }
3669
3670 call_rcu(&event->rcu_head, free_event_rcu);
3679} 3671}
3680 3672
3681/* 3673/*
@@ -3702,14 +3694,13 @@ static void perf_remove_from_owner(struct perf_event *event)
3702 struct task_struct *owner; 3694 struct task_struct *owner;
3703 3695
3704 rcu_read_lock(); 3696 rcu_read_lock();
3705 owner = ACCESS_ONCE(event->owner);
3706 /* 3697 /*
3707 * Matches the smp_wmb() in perf_event_exit_task(). If we observe 3698 * Matches the smp_store_release() in perf_event_exit_task(). If we
3708 * !owner it means the list deletion is complete and we can indeed 3699 * observe !owner it means the list deletion is complete and we can
3709 * free this event, otherwise we need to serialize on 3700 * indeed free this event, otherwise we need to serialize on
3710 * owner->perf_event_mutex. 3701 * owner->perf_event_mutex.
3711 */ 3702 */
3712 smp_read_barrier_depends(); 3703 owner = lockless_dereference(event->owner);
3713 if (owner) { 3704 if (owner) {
3714 /* 3705 /*
3715 * Since delayed_put_task_struct() also drops the last 3706 * Since delayed_put_task_struct() also drops the last
@@ -3737,8 +3728,10 @@ static void perf_remove_from_owner(struct perf_event *event)
3737 * ensured they're done, and we can proceed with freeing the 3728 * ensured they're done, and we can proceed with freeing the
3738 * event. 3729 * event.
3739 */ 3730 */
3740 if (event->owner) 3731 if (event->owner) {
3741 list_del_init(&event->owner_entry); 3732 list_del_init(&event->owner_entry);
3733 smp_store_release(&event->owner, NULL);
3734 }
3742 mutex_unlock(&owner->perf_event_mutex); 3735 mutex_unlock(&owner->perf_event_mutex);
3743 put_task_struct(owner); 3736 put_task_struct(owner);
3744 } 3737 }
@@ -3746,36 +3739,98 @@ static void perf_remove_from_owner(struct perf_event *event)
3746 3739
3747static void put_event(struct perf_event *event) 3740static void put_event(struct perf_event *event)
3748{ 3741{
3749 struct perf_event_context *ctx;
3750
3751 if (!atomic_long_dec_and_test(&event->refcount)) 3742 if (!atomic_long_dec_and_test(&event->refcount))
3752 return; 3743 return;
3753 3744
3745 _free_event(event);
3746}
3747
3748/*
3749 * Kill an event dead; while event:refcount will preserve the event
3750 * object, it will not preserve its functionality. Once the last 'user'
3751 * gives up the object, we'll destroy the thing.
3752 */
3753int perf_event_release_kernel(struct perf_event *event)
3754{
3755 struct perf_event_context *ctx;
3756 struct perf_event *child, *tmp;
3757
3754 if (!is_kernel_event(event)) 3758 if (!is_kernel_event(event))
3755 perf_remove_from_owner(event); 3759 perf_remove_from_owner(event);
3756 3760
3761 ctx = perf_event_ctx_lock(event);
3762 WARN_ON_ONCE(ctx->parent_ctx);
3763 perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
3764 perf_event_ctx_unlock(event, ctx);
3765
3757 /* 3766 /*
3758 * There are two ways this annotation is useful: 3767 * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
3768 * either from the above perf_remove_from_context() or through
3769 * perf_event_exit_event().
3759 * 3770 *
3760 * 1) there is a lock recursion from perf_event_exit_task 3771 * Therefore, anybody acquiring event->child_mutex after the below
3761 * see the comment there. 3772 * loop _must_ also see this, most importantly inherit_event() which
3773 * will avoid placing more children on the list.
3762 * 3774 *
3763 * 2) there is a lock-inversion with mmap_sem through 3775 * Thus this guarantees that we will in fact observe and kill _ALL_
3764 * perf_read_group(), which takes faults while 3776 * child events.
3765 * holding ctx->mutex, however this is called after
3766 * the last filedesc died, so there is no possibility
3767 * to trigger the AB-BA case.
3768 */ 3777 */
3769 ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING); 3778 WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
3770 WARN_ON_ONCE(ctx->parent_ctx);
3771 perf_remove_from_context(event, true);
3772 perf_event_ctx_unlock(event, ctx);
3773 3779
3774 _free_event(event); 3780again:
3775} 3781 mutex_lock(&event->child_mutex);
3782 list_for_each_entry(child, &event->child_list, child_list) {
3776 3783
3777int perf_event_release_kernel(struct perf_event *event) 3784 /*
3778{ 3785 * Cannot change, child events are not migrated, see the
3786 * comment with perf_event_ctx_lock_nested().
3787 */
3788 ctx = lockless_dereference(child->ctx);
3789 /*
3790 * Since child_mutex nests inside ctx::mutex, we must jump
3791 * through hoops. We start by grabbing a reference on the ctx.
3792 *
3793 * Since the event cannot get freed while we hold the
3794 * child_mutex, the context must also exist and have a !0
3795 * reference count.
3796 */
3797 get_ctx(ctx);
3798
3799 /*
3800 * Now that we have a ctx ref, we can drop child_mutex, and
3801 * acquire ctx::mutex without fear of it going away. Then we
3802 * can re-acquire child_mutex.
3803 */
3804 mutex_unlock(&event->child_mutex);
3805 mutex_lock(&ctx->mutex);
3806 mutex_lock(&event->child_mutex);
3807
3808 /*
3809 * Now that we hold ctx::mutex and child_mutex, revalidate our
3810 * state, if child is still the first entry, it didn't get freed
3811 * and we can continue doing so.
3812 */
3813 tmp = list_first_entry_or_null(&event->child_list,
3814 struct perf_event, child_list);
3815 if (tmp == child) {
3816 perf_remove_from_context(child, DETACH_GROUP);
3817 list_del(&child->child_list);
3818 free_event(child);
3819 /*
3820 * This matches the refcount bump in inherit_event();
3821 * this can't be the last reference.
3822 */
3823 put_event(event);
3824 }
3825
3826 mutex_unlock(&event->child_mutex);
3827 mutex_unlock(&ctx->mutex);
3828 put_ctx(ctx);
3829 goto again;
3830 }
3831 mutex_unlock(&event->child_mutex);
3832
3833 /* Must be the last reference */
3779 put_event(event); 3834 put_event(event);
3780 return 0; 3835 return 0;
3781} 3836}
@@ -3786,46 +3841,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3786 */ 3841 */
3787static int perf_release(struct inode *inode, struct file *file) 3842static int perf_release(struct inode *inode, struct file *file)
3788{ 3843{
3789 put_event(file->private_data); 3844 perf_event_release_kernel(file->private_data);
3790 return 0; 3845 return 0;
3791} 3846}
3792 3847
3793/*
3794 * Remove all orphanes events from the context.
3795 */
3796static void orphans_remove_work(struct work_struct *work)
3797{
3798 struct perf_event_context *ctx;
3799 struct perf_event *event, *tmp;
3800
3801 ctx = container_of(work, struct perf_event_context,
3802 orphans_remove.work);
3803
3804 mutex_lock(&ctx->mutex);
3805 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
3806 struct perf_event *parent_event = event->parent;
3807
3808 if (!is_orphaned_child(event))
3809 continue;
3810
3811 perf_remove_from_context(event, true);
3812
3813 mutex_lock(&parent_event->child_mutex);
3814 list_del_init(&event->child_list);
3815 mutex_unlock(&parent_event->child_mutex);
3816
3817 free_event(event);
3818 put_event(parent_event);
3819 }
3820
3821 raw_spin_lock_irq(&ctx->lock);
3822 ctx->orphans_remove_sched = false;
3823 raw_spin_unlock_irq(&ctx->lock);
3824 mutex_unlock(&ctx->mutex);
3825
3826 put_ctx(ctx);
3827}
3828
3829u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 3848u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
3830{ 3849{
3831 struct perf_event *child; 3850 struct perf_event *child;
@@ -4054,7 +4073,7 @@ static void _perf_event_reset(struct perf_event *event)
4054/* 4073/*
4055 * Holding the top-level event's child_mutex means that any 4074 * Holding the top-level event's child_mutex means that any
4056 * descendant process that has inherited this event will block 4075 * descendant process that has inherited this event will block
4057 * in sync_child_event if it goes to exit, thus satisfying the 4076 * in perf_event_exit_event() if it goes to exit, thus satisfying the
4058 * task existence requirements of perf_event_enable/disable. 4077 * task existence requirements of perf_event_enable/disable.
4059 */ 4078 */
4060static void perf_event_for_each_child(struct perf_event *event, 4079static void perf_event_for_each_child(struct perf_event *event,
@@ -4086,36 +4105,14 @@ static void perf_event_for_each(struct perf_event *event,
4086 perf_event_for_each_child(sibling, func); 4105 perf_event_for_each_child(sibling, func);
4087} 4106}
4088 4107
4089struct period_event { 4108static void __perf_event_period(struct perf_event *event,
4090 struct perf_event *event; 4109 struct perf_cpu_context *cpuctx,
4091 u64 value; 4110 struct perf_event_context *ctx,
4092}; 4111 void *info)
4093
4094static void ___perf_event_period(void *info)
4095{
4096 struct period_event *pe = info;
4097 struct perf_event *event = pe->event;
4098 u64 value = pe->value;
4099
4100 if (event->attr.freq) {
4101 event->attr.sample_freq = value;
4102 } else {
4103 event->attr.sample_period = value;
4104 event->hw.sample_period = value;
4105 }
4106
4107 local64_set(&event->hw.period_left, 0);
4108}
4109
4110static int __perf_event_period(void *info)
4111{ 4112{
4112 struct period_event *pe = info; 4113 u64 value = *((u64 *)info);
4113 struct perf_event *event = pe->event;
4114 struct perf_event_context *ctx = event->ctx;
4115 u64 value = pe->value;
4116 bool active; 4114 bool active;
4117 4115
4118 raw_spin_lock(&ctx->lock);
4119 if (event->attr.freq) { 4116 if (event->attr.freq) {
4120 event->attr.sample_freq = value; 4117 event->attr.sample_freq = value;
4121 } else { 4118 } else {
@@ -4135,14 +4132,10 @@ static int __perf_event_period(void *info)
4135 event->pmu->start(event, PERF_EF_RELOAD); 4132 event->pmu->start(event, PERF_EF_RELOAD);
4136 perf_pmu_enable(ctx->pmu); 4133 perf_pmu_enable(ctx->pmu);
4137 } 4134 }
4138 raw_spin_unlock(&ctx->lock);
4139
4140 return 0;
4141} 4135}
4142 4136
4143static int perf_event_period(struct perf_event *event, u64 __user *arg) 4137static int perf_event_period(struct perf_event *event, u64 __user *arg)
4144{ 4138{
4145 struct period_event pe = { .event = event, };
4146 u64 value; 4139 u64 value;
4147 4140
4148 if (!is_sampling_event(event)) 4141 if (!is_sampling_event(event))
@@ -4157,10 +4150,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
4157 if (event->attr.freq && value > sysctl_perf_event_sample_rate) 4150 if (event->attr.freq && value > sysctl_perf_event_sample_rate)
4158 return -EINVAL; 4151 return -EINVAL;
4159 4152
4160 pe.value = value; 4153 event_function_call(event, __perf_event_period, &value);
4161
4162 event_function_call(event, __perf_event_period,
4163 ___perf_event_period, &pe);
4164 4154
4165 return 0; 4155 return 0;
4166} 4156}
@@ -4932,7 +4922,7 @@ static void perf_pending_event(struct irq_work *entry)
4932 4922
4933 if (event->pending_disable) { 4923 if (event->pending_disable) {
4934 event->pending_disable = 0; 4924 event->pending_disable = 0;
4935 __perf_event_disable(event); 4925 perf_event_disable_local(event);
4936 } 4926 }
4937 4927
4938 if (event->pending_wakeup) { 4928 if (event->pending_wakeup) {
@@ -7753,11 +7743,13 @@ static void account_event_cpu(struct perf_event *event, int cpu)
7753 7743
7754static void account_event(struct perf_event *event) 7744static void account_event(struct perf_event *event)
7755{ 7745{
7746 bool inc = false;
7747
7756 if (event->parent) 7748 if (event->parent)
7757 return; 7749 return;
7758 7750
7759 if (event->attach_state & PERF_ATTACH_TASK) 7751 if (event->attach_state & PERF_ATTACH_TASK)
7760 static_key_slow_inc(&perf_sched_events.key); 7752 inc = true;
7761 if (event->attr.mmap || event->attr.mmap_data) 7753 if (event->attr.mmap || event->attr.mmap_data)
7762 atomic_inc(&nr_mmap_events); 7754 atomic_inc(&nr_mmap_events);
7763 if (event->attr.comm) 7755 if (event->attr.comm)
@@ -7770,11 +7762,14 @@ static void account_event(struct perf_event *event)
7770 } 7762 }
7771 if (event->attr.context_switch) { 7763 if (event->attr.context_switch) {
7772 atomic_inc(&nr_switch_events); 7764 atomic_inc(&nr_switch_events);
7773 static_key_slow_inc(&perf_sched_events.key); 7765 inc = true;
7774 } 7766 }
7775 if (has_branch_stack(event)) 7767 if (has_branch_stack(event))
7776 static_key_slow_inc(&perf_sched_events.key); 7768 inc = true;
7777 if (is_cgroup_event(event)) 7769 if (is_cgroup_event(event))
7770 inc = true;
7771
7772 if (inc)
7778 static_key_slow_inc(&perf_sched_events.key); 7773 static_key_slow_inc(&perf_sched_events.key);
7779 7774
7780 account_event_cpu(event, event->cpu); 7775 account_event_cpu(event, event->cpu);
@@ -8422,11 +8417,11 @@ SYSCALL_DEFINE5(perf_event_open,
8422 * See perf_event_ctx_lock() for comments on the details 8417 * See perf_event_ctx_lock() for comments on the details
8423 * of swizzling perf_event::ctx. 8418 * of swizzling perf_event::ctx.
8424 */ 8419 */
8425 perf_remove_from_context(group_leader, false); 8420 perf_remove_from_context(group_leader, 0);
8426 8421
8427 list_for_each_entry(sibling, &group_leader->sibling_list, 8422 list_for_each_entry(sibling, &group_leader->sibling_list,
8428 group_entry) { 8423 group_entry) {
8429 perf_remove_from_context(sibling, false); 8424 perf_remove_from_context(sibling, 0);
8430 put_ctx(gctx); 8425 put_ctx(gctx);
8431 } 8426 }
8432 8427
@@ -8479,6 +8474,8 @@ SYSCALL_DEFINE5(perf_event_open,
8479 perf_event__header_size(event); 8474 perf_event__header_size(event);
8480 perf_event__id_header_size(event); 8475 perf_event__id_header_size(event);
8481 8476
8477 event->owner = current;
8478
8482 perf_install_in_context(ctx, event, event->cpu); 8479 perf_install_in_context(ctx, event, event->cpu);
8483 perf_unpin_context(ctx); 8480 perf_unpin_context(ctx);
8484 8481
@@ -8488,8 +8485,6 @@ SYSCALL_DEFINE5(perf_event_open,
8488 8485
8489 put_online_cpus(); 8486 put_online_cpus();
8490 8487
8491 event->owner = current;
8492
8493 mutex_lock(&current->perf_event_mutex); 8488 mutex_lock(&current->perf_event_mutex);
8494 list_add_tail(&event->owner_entry, &current->perf_event_list); 8489 list_add_tail(&event->owner_entry, &current->perf_event_list);
8495 mutex_unlock(&current->perf_event_mutex); 8490 mutex_unlock(&current->perf_event_mutex);
@@ -8556,7 +8551,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
8556 } 8551 }
8557 8552
8558 /* Mark owner so we could distinguish it from user events. */ 8553 /* Mark owner so we could distinguish it from user events. */
8559 event->owner = EVENT_OWNER_KERNEL; 8554 event->owner = TASK_TOMBSTONE;
8560 8555
8561 account_event(event); 8556 account_event(event);
8562 8557
@@ -8606,7 +8601,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
8606 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); 8601 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
8607 list_for_each_entry_safe(event, tmp, &src_ctx->event_list, 8602 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
8608 event_entry) { 8603 event_entry) {
8609 perf_remove_from_context(event, false); 8604 perf_remove_from_context(event, 0);
8610 unaccount_event_cpu(event, src_cpu); 8605 unaccount_event_cpu(event, src_cpu);
8611 put_ctx(src_ctx); 8606 put_ctx(src_ctx);
8612 list_add(&event->migrate_entry, &events); 8607 list_add(&event->migrate_entry, &events);
@@ -8673,33 +8668,15 @@ static void sync_child_event(struct perf_event *child_event,
8673 &parent_event->child_total_time_enabled); 8668 &parent_event->child_total_time_enabled);
8674 atomic64_add(child_event->total_time_running, 8669 atomic64_add(child_event->total_time_running,
8675 &parent_event->child_total_time_running); 8670 &parent_event->child_total_time_running);
8676
8677 /*
8678 * Remove this event from the parent's list
8679 */
8680 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
8681 mutex_lock(&parent_event->child_mutex);
8682 list_del_init(&child_event->child_list);
8683 mutex_unlock(&parent_event->child_mutex);
8684
8685 /*
8686 * Make sure user/parent get notified, that we just
8687 * lost one event.
8688 */
8689 perf_event_wakeup(parent_event);
8690
8691 /*
8692 * Release the parent event, if this was the last
8693 * reference to it.
8694 */
8695 put_event(parent_event);
8696} 8671}
8697 8672
8698static void 8673static void
8699__perf_event_exit_task(struct perf_event *child_event, 8674perf_event_exit_event(struct perf_event *child_event,
8700 struct perf_event_context *child_ctx, 8675 struct perf_event_context *child_ctx,
8701 struct task_struct *child) 8676 struct task_struct *child)
8702{ 8677{
8678 struct perf_event *parent_event = child_event->parent;
8679
8703 /* 8680 /*
8704 * Do not destroy the 'original' grouping; because of the context 8681 * Do not destroy the 'original' grouping; because of the context
8705 * switch optimization the original events could've ended up in a 8682 * switch optimization the original events could've ended up in a
@@ -8712,57 +8689,86 @@ __perf_event_exit_task(struct perf_event *child_event,
8712 * Do destroy all inherited groups, we don't care about those 8689 * Do destroy all inherited groups, we don't care about those
8713 * and being thorough is better. 8690 * and being thorough is better.
8714 */ 8691 */
8715 perf_remove_from_context(child_event, !!child_event->parent); 8692 raw_spin_lock_irq(&child_ctx->lock);
8693 WARN_ON_ONCE(child_ctx->is_active);
8694
8695 if (parent_event)
8696 perf_group_detach(child_event);
8697 list_del_event(child_event, child_ctx);
8698 child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
8699 raw_spin_unlock_irq(&child_ctx->lock);
8716 8700
8717 /* 8701 /*
8718 * It can happen that the parent exits first, and has events 8702 * Parent events are governed by their filedesc, retain them.
8719 * that are still around due to the child reference. These
8720 * events need to be zapped.
8721 */ 8703 */
8722 if (child_event->parent) { 8704 if (!parent_event) {
8723 sync_child_event(child_event, child);
8724 free_event(child_event);
8725 } else {
8726 child_event->state = PERF_EVENT_STATE_EXIT;
8727 perf_event_wakeup(child_event); 8705 perf_event_wakeup(child_event);
8706 return;
8728 } 8707 }
8708 /*
8709 * Child events can be cleaned up.
8710 */
8711
8712 sync_child_event(child_event, child);
8713
8714 /*
8715 * Remove this event from the parent's list
8716 */
8717 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
8718 mutex_lock(&parent_event->child_mutex);
8719 list_del_init(&child_event->child_list);
8720 mutex_unlock(&parent_event->child_mutex);
8721
8722 /*
8723 * Kick perf_poll() for is_event_hup().
8724 */
8725 perf_event_wakeup(parent_event);
8726 free_event(child_event);
8727 put_event(parent_event);
8729} 8728}
8730 8729
8731static void perf_event_exit_task_context(struct task_struct *child, int ctxn) 8730static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8732{ 8731{
8733 struct perf_event *child_event, *next;
8734 struct perf_event_context *child_ctx, *clone_ctx = NULL; 8732 struct perf_event_context *child_ctx, *clone_ctx = NULL;
8735 unsigned long flags; 8733 struct perf_event *child_event, *next;
8734
8735 WARN_ON_ONCE(child != current);
8736 8736
8737 if (likely(!child->perf_event_ctxp[ctxn])) 8737 child_ctx = perf_pin_task_context(child, ctxn);
8738 if (!child_ctx)
8738 return; 8739 return;
8739 8740
8740 local_irq_save(flags);
8741 /* 8741 /*
8742 * We can't reschedule here because interrupts are disabled, 8742 * In order to reduce the amount of tricky in ctx tear-down, we hold
8743 * and either child is current or it is a task that can't be 8743 * ctx::mutex over the entire thing. This serializes against almost
8744 * scheduled, so we are now safe from rescheduling changing 8744 * everything that wants to access the ctx.
8745 * our context. 8745 *
8746 * The exception is sys_perf_event_open() /
8747 * perf_event_create_kernel_count() which does find_get_context()
8748 * without ctx::mutex (it cannot because of the move_group double mutex
8749 * lock thing). See the comments in perf_install_in_context().
8746 */ 8750 */
8747 child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); 8751 mutex_lock(&child_ctx->mutex);
8748 8752
8749 /* 8753 /*
8750 * Take the context lock here so that if find_get_context is 8754 * In a single ctx::lock section, de-schedule the events and detach the
8751 * reading child->perf_event_ctxp, we wait until it has 8755 * context from the task such that we cannot ever get it scheduled back
8752 * incremented the context's refcount before we do put_ctx below. 8756 * in.
8753 */ 8757 */
8754 raw_spin_lock(&child_ctx->lock); 8758 raw_spin_lock_irq(&child_ctx->lock);
8755 task_ctx_sched_out(child_ctx); 8759 task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
8756 child->perf_event_ctxp[ctxn] = NULL;
8757 8760
8758 /* 8761 /*
8759 * If this context is a clone; unclone it so it can't get 8762 * Now that the context is inactive, destroy the task <-> ctx relation
8760 * swapped to another process while we're removing all 8763 * and mark the context dead.
8761 * the events from it.
8762 */ 8764 */
8765 RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
8766 put_ctx(child_ctx); /* cannot be last */
8767 WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
8768 put_task_struct(current); /* cannot be last */
8769
8763 clone_ctx = unclone_ctx(child_ctx); 8770 clone_ctx = unclone_ctx(child_ctx);
8764 update_context_time(child_ctx); 8771 raw_spin_unlock_irq(&child_ctx->lock);
8765 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
8766 8772
8767 if (clone_ctx) 8773 if (clone_ctx)
8768 put_ctx(clone_ctx); 8774 put_ctx(clone_ctx);
@@ -8774,20 +8780,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8774 */ 8780 */
8775 perf_event_task(child, child_ctx, 0); 8781 perf_event_task(child, child_ctx, 0);
8776 8782
8777 /*
8778 * We can recurse on the same lock type through:
8779 *
8780 * __perf_event_exit_task()
8781 * sync_child_event()
8782 * put_event()
8783 * mutex_lock(&ctx->mutex)
8784 *
8785 * But since its the parent context it won't be the same instance.
8786 */
8787 mutex_lock(&child_ctx->mutex);
8788
8789 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) 8783 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
8790 __perf_event_exit_task(child_event, child_ctx, child); 8784 perf_event_exit_event(child_event, child_ctx, child);
8791 8785
8792 mutex_unlock(&child_ctx->mutex); 8786 mutex_unlock(&child_ctx->mutex);
8793 8787
@@ -8812,8 +8806,7 @@ void perf_event_exit_task(struct task_struct *child)
8812 * the owner, closes a race against perf_release() where 8806 * the owner, closes a race against perf_release() where
8813 * we need to serialize on the owner->perf_event_mutex. 8807 * we need to serialize on the owner->perf_event_mutex.
8814 */ 8808 */
8815 smp_wmb(); 8809 smp_store_release(&event->owner, NULL);
8816 event->owner = NULL;
8817 } 8810 }
8818 mutex_unlock(&child->perf_event_mutex); 8811 mutex_unlock(&child->perf_event_mutex);
8819 8812
@@ -8896,21 +8889,20 @@ void perf_event_delayed_put(struct task_struct *task)
8896 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); 8889 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
8897} 8890}
8898 8891
8899struct perf_event *perf_event_get(unsigned int fd) 8892struct file *perf_event_get(unsigned int fd)
8900{ 8893{
8901 int err; 8894 struct file *file;
8902 struct fd f;
8903 struct perf_event *event;
8904 8895
8905 err = perf_fget_light(fd, &f); 8896 file = fget_raw(fd);
8906 if (err) 8897 if (!file)
8907 return ERR_PTR(err); 8898 return ERR_PTR(-EBADF);
8908 8899
8909 event = f.file->private_data; 8900 if (file->f_op != &perf_fops) {
8910 atomic_long_inc(&event->refcount); 8901 fput(file);
8911 fdput(f); 8902 return ERR_PTR(-EBADF);
8903 }
8912 8904
8913 return event; 8905 return file;
8914} 8906}
8915 8907
8916const struct perf_event_attr *perf_event_attrs(struct perf_event *event) 8908const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
@@ -8953,8 +8945,16 @@ inherit_event(struct perf_event *parent_event,
8953 if (IS_ERR(child_event)) 8945 if (IS_ERR(child_event))
8954 return child_event; 8946 return child_event;
8955 8947
8948 /*
8949 * is_orphaned_event() and list_add_tail(&parent_event->child_list)
8950 * must be under the same lock in order to serialize against
8951 * perf_event_release_kernel(), such that either we must observe
8952 * is_orphaned_event() or they will observe us on the child_list.
8953 */
8954 mutex_lock(&parent_event->child_mutex);
8956 if (is_orphaned_event(parent_event) || 8955 if (is_orphaned_event(parent_event) ||
8957 !atomic_long_inc_not_zero(&parent_event->refcount)) { 8956 !atomic_long_inc_not_zero(&parent_event->refcount)) {
8957 mutex_unlock(&parent_event->child_mutex);
8958 free_event(child_event); 8958 free_event(child_event);
8959 return NULL; 8959 return NULL;
8960 } 8960 }
@@ -9002,8 +9002,6 @@ inherit_event(struct perf_event *parent_event,
9002 /* 9002 /*
9003 * Link this into the parent event's child list 9003 * Link this into the parent event's child list
9004 */ 9004 */
9005 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
9006 mutex_lock(&parent_event->child_mutex);
9007 list_add_tail(&child_event->child_list, &parent_event->child_list); 9005 list_add_tail(&child_event->child_list, &parent_event->child_list);
9008 mutex_unlock(&parent_event->child_mutex); 9006 mutex_unlock(&parent_event->child_mutex);
9009 9007
@@ -9221,13 +9219,14 @@ static void perf_event_init_cpu(int cpu)
9221#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE 9219#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
9222static void __perf_event_exit_context(void *__info) 9220static void __perf_event_exit_context(void *__info)
9223{ 9221{
9224 struct remove_event re = { .detach_group = true };
9225 struct perf_event_context *ctx = __info; 9222 struct perf_event_context *ctx = __info;
9223 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
9224 struct perf_event *event;
9226 9225
9227 rcu_read_lock(); 9226 raw_spin_lock(&ctx->lock);
9228 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) 9227 list_for_each_entry(event, &ctx->event_list, event_entry)
9229 __perf_remove_from_context(&re); 9228 __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
9230 rcu_read_unlock(); 9229 raw_spin_unlock(&ctx->lock);
9231} 9230}
9232 9231
9233static void perf_event_exit_cpu_context(int cpu) 9232static void perf_event_exit_cpu_context(int cpu)
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 92ce5f4ccc26..3f8cb1e14588 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
444 * current task. 444 * current task.
445 */ 445 */
446 if (irqs_disabled() && bp->ctx && bp->ctx->task == current) 446 if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
447 __perf_event_disable(bp); 447 perf_event_disable_local(bp);
448 else 448 else
449 perf_event_disable(bp); 449 perf_event_disable(bp);
450 450
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index adfdc0536117..1faad2cfdb9e 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -459,6 +459,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
459 __free_page(page); 459 __free_page(page);
460} 460}
461 461
462static void __rb_free_aux(struct ring_buffer *rb)
463{
464 int pg;
465
466 if (rb->aux_priv) {
467 rb->free_aux(rb->aux_priv);
468 rb->free_aux = NULL;
469 rb->aux_priv = NULL;
470 }
471
472 if (rb->aux_nr_pages) {
473 for (pg = 0; pg < rb->aux_nr_pages; pg++)
474 rb_free_aux_page(rb, pg);
475
476 kfree(rb->aux_pages);
477 rb->aux_nr_pages = 0;
478 }
479}
480
462int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, 481int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
463 pgoff_t pgoff, int nr_pages, long watermark, int flags) 482 pgoff_t pgoff, int nr_pages, long watermark, int flags)
464{ 483{
@@ -547,30 +566,11 @@ out:
547 if (!ret) 566 if (!ret)
548 rb->aux_pgoff = pgoff; 567 rb->aux_pgoff = pgoff;
549 else 568 else
550 rb_free_aux(rb); 569 __rb_free_aux(rb);
551 570
552 return ret; 571 return ret;
553} 572}
554 573
555static void __rb_free_aux(struct ring_buffer *rb)
556{
557 int pg;
558
559 if (rb->aux_priv) {
560 rb->free_aux(rb->aux_priv);
561 rb->free_aux = NULL;
562 rb->aux_priv = NULL;
563 }
564
565 if (rb->aux_nr_pages) {
566 for (pg = 0; pg < rb->aux_nr_pages; pg++)
567 rb_free_aux_page(rb, pg);
568
569 kfree(rb->aux_pages);
570 rb->aux_nr_pages = 0;
571 }
572}
573
574void rb_free_aux(struct ring_buffer *rb) 574void rb_free_aux(struct ring_buffer *rb)
575{ 575{
576 if (atomic_dec_and_test(&rb->aux_refcount)) 576 if (atomic_dec_and_test(&rb->aux_refcount))
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 45dd798bcd37..326a75e884db 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
191 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; 191 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
192 struct bpf_array *array = container_of(map, struct bpf_array, map); 192 struct bpf_array *array = container_of(map, struct bpf_array, map);
193 struct perf_event *event; 193 struct perf_event *event;
194 struct file *file;
194 195
195 if (unlikely(index >= array->map.max_entries)) 196 if (unlikely(index >= array->map.max_entries))
196 return -E2BIG; 197 return -E2BIG;
197 198
198 event = (struct perf_event *)array->ptrs[index]; 199 file = (struct file *)array->ptrs[index];
199 if (!event) 200 if (unlikely(!file))
200 return -ENOENT; 201 return -ENOENT;
201 202
203 event = file->private_data;
204
202 /* make sure event is local and doesn't have pmu::count */ 205 /* make sure event is local and doesn't have pmu::count */
203 if (event->oncpu != smp_processor_id() || 206 if (event->oncpu != smp_processor_id() ||
204 event->pmu->count) 207 event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
228 void *data = (void *) (long) r4; 231 void *data = (void *) (long) r4;
229 struct perf_sample_data sample_data; 232 struct perf_sample_data sample_data;
230 struct perf_event *event; 233 struct perf_event *event;
234 struct file *file;
231 struct perf_raw_record raw = { 235 struct perf_raw_record raw = {
232 .size = size, 236 .size = size,
233 .data = data, 237 .data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
236 if (unlikely(index >= array->map.max_entries)) 240 if (unlikely(index >= array->map.max_entries))
237 return -E2BIG; 241 return -E2BIG;
238 242
239 event = (struct perf_event *)array->ptrs[index]; 243 file = (struct file *)array->ptrs[index];
240 if (unlikely(!event)) 244 if (unlikely(!file))
241 return -ENOENT; 245 return -ENOENT;
242 246
247 event = file->private_data;
248
243 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 249 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
244 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 250 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
245 return -EINVAL; 251 return -EINVAL;
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0a22407e1d7d..5d34815c7ccb 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -77,6 +77,9 @@ include config/utilities.mak
77# Define NO_AUXTRACE if you do not want AUX area tracing support 77# Define NO_AUXTRACE if you do not want AUX area tracing support
78# 78#
79# Define NO_LIBBPF if you do not want BPF support 79# Define NO_LIBBPF if you do not want BPF support
80#
81# Define FEATURES_DUMP to provide features detection dump file
82# and bypass the feature detection
80 83
81# As per kernel Makefile, avoid funny character set dependencies 84# As per kernel Makefile, avoid funny character set dependencies
82unexport LC_ALL 85unexport LC_ALL
@@ -166,6 +169,15 @@ ifeq ($(config),1)
166include config/Makefile 169include config/Makefile
167endif 170endif
168 171
172# The FEATURE_DUMP_EXPORT holds location of the actual
173# FEATURE_DUMP file to be used to bypass feature detection
174# (for bpf or any other subproject)
175ifeq ($(FEATURES_DUMP),)
176FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
177else
178FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
179endif
180
169export prefix bindir sharedir sysconfdir DESTDIR 181export prefix bindir sharedir sysconfdir DESTDIR
170 182
171# sparse is architecture-neutral, which means that we need to tell it 183# sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
436 $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null 448 $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
437 449
438$(LIBBPF): fixdep FORCE 450$(LIBBPF): fixdep FORCE
439 $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP) 451 $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
440 452
441$(LIBBPF)-clean: 453$(LIBBPF)-clean:
442 $(call QUIET_CLEAN, libbpf) 454 $(call QUIET_CLEAN, libbpf)
@@ -611,6 +623,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
611 $(python-clean) 623 $(python-clean)
612 624
613# 625#
626# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
627# file if defined, with no further action.
628feature-dump:
629ifdef FEATURE_DUMP_COPY
630 @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
631 @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
632else
633 @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
634endif
635
636#
614# Trick: if ../../.git does not exist - we are building out of tree for example, 637# Trick: if ../../.git does not exist - we are building out of tree for example,
615# then force version regeneration: 638# then force version regeneration:
616# 639#
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index 3e89ba825f6b..7f064eb37158 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -17,7 +17,7 @@ static pid_t spawn(void)
17 if (pid) 17 if (pid)
18 return pid; 18 return pid;
19 19
20 while(1); 20 while(1)
21 sleep(5); 21 sleep(5);
22 return 0; 22 return 0;
23} 23}
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index e5959c136a19..511141b102e8 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
181 181
182EXTLIBS = -lpthread -lrt -lm -ldl 182EXTLIBS = -lpthread -lrt -lm -ldl
183 183
184ifeq ($(FEATURES_DUMP),)
184include $(srctree)/tools/build/Makefile.feature 185include $(srctree)/tools/build/Makefile.feature
186else
187include $(FEATURES_DUMP)
188endif
185 189
186ifeq ($(feature-stackprotector-all), 1) 190ifeq ($(feature-stackprotector-all), 1)
187 CFLAGS += -fstack-protector-all 191 CFLAGS += -fstack-protector-all
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index df38decc48c3..f918015512af 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
5# no target specified, trigger the whole suite 5# no target specified, trigger the whole suite
6all: 6all:
7 @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile 7 @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile
8 @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf 8 @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
9else 9else
10# run only specific test over 'Makefile' 10# run only specific test over 'Makefile'
11%: 11%:
@@ -13,6 +13,26 @@ else
13endif 13endif
14else 14else
15PERF := . 15PERF := .
16PERF_O := $(PERF)
17O_OPT :=
18
19ifneq ($(O),)
20 FULL_O := $(shell readlink -f $(O) || echo $(O))
21 PERF_O := $(FULL_O)
22 ifeq ($(SET_O),1)
23 O_OPT := 'O=$(FULL_O)'
24 endif
25 K_O_OPT := 'O=$(FULL_O)'
26endif
27
28PARALLEL_OPT=
29ifeq ($(SET_PARALLEL),1)
30 cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
31 ifeq ($(cores),0)
32 cores := 1
33 endif
34 PARALLEL_OPT="-j$(cores)"
35endif
16 36
17# As per kernel Makefile, avoid funny character set dependencies 37# As per kernel Makefile, avoid funny character set dependencies
18unexport LC_ALL 38unexport LC_ALL
@@ -156,11 +176,11 @@ test_make_doc := $(test_ok)
156test_make_help_O := $(test_ok) 176test_make_help_O := $(test_ok)
157test_make_doc_O := $(test_ok) 177test_make_doc_O := $(test_ok)
158 178
159test_make_python_perf_so := test -f $(PERF)/python/perf.so 179test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
160 180
161test_make_perf_o := test -f $(PERF)/perf.o 181test_make_perf_o := test -f $(PERF_O)/perf.o
162test_make_util_map_o := test -f $(PERF)/util/map.o 182test_make_util_map_o := test -f $(PERF_O)/util/map.o
163test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o 183test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
164 184
165define test_dest_files 185define test_dest_files
166 for file in $(1); do \ 186 for file in $(1); do \
@@ -227,7 +247,7 @@ test_make_perf_o_O := test -f $$TMP_O/perf.o
227test_make_util_map_o_O := test -f $$TMP_O/util/map.o 247test_make_util_map_o_O := test -f $$TMP_O/util/map.o
228test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o 248test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
229 249
230test_default = test -x $(PERF)/perf 250test_default = test -x $(PERF_O)/perf
231test = $(if $(test_$1),$(test_$1),$(test_default)) 251test = $(if $(test_$1),$(test_$1),$(test_default))
232 252
233test_default_O = test -x $$TMP_O/perf 253test_default_O = test -x $$TMP_O/perf
@@ -247,12 +267,12 @@ endif
247 267
248MAKEFLAGS := --no-print-directory 268MAKEFLAGS := --no-print-directory
249 269
250clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null) 270clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
251 271
252$(run): 272$(run):
253 $(call clean) 273 $(call clean)
254 @TMP_DEST=$$(mktemp -d); \ 274 @TMP_DEST=$$(mktemp -d); \
255 cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \ 275 cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
256 echo "- $@: $$cmd" && echo $$cmd > $@ && \ 276 echo "- $@: $$cmd" && echo $$cmd > $@ && \
257 ( eval $$cmd ) >> $@ 2>&1; \ 277 ( eval $$cmd ) >> $@ 2>&1; \
258 echo " test: $(call test,$@)" >> $@ 2>&1; \ 278 echo " test: $(call test,$@)" >> $@ 2>&1; \
@@ -263,7 +283,7 @@ $(run_O):
263 $(call clean) 283 $(call clean)
264 @TMP_O=$$(mktemp -d); \ 284 @TMP_O=$$(mktemp -d); \
265 TMP_DEST=$$(mktemp -d); \ 285 TMP_DEST=$$(mktemp -d); \
266 cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ 286 cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
267 echo "- $@: $$cmd" && echo $$cmd > $@ && \ 287 echo "- $@: $$cmd" && echo $$cmd > $@ && \
268 ( eval $$cmd ) >> $@ 2>&1 && \ 288 ( eval $$cmd ) >> $@ 2>&1 && \
269 echo " test: $(call test_O,$@)" >> $@ 2>&1; \ 289 echo " test: $(call test_O,$@)" >> $@ 2>&1; \
@@ -276,17 +296,22 @@ tarpkg:
276 ( eval $$cmd ) >> $@ 2>&1 && \ 296 ( eval $$cmd ) >> $@ 2>&1 && \
277 rm -f $@ 297 rm -f $@
278 298
299KERNEL_O := ../..
300ifneq ($(O),)
301 KERNEL_O := $(O)
302endif
303
279make_kernelsrc: 304make_kernelsrc:
280 @echo "- make -C <kernelsrc> tools/perf" 305 @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
281 $(call clean); \ 306 $(call clean); \
282 (make -C ../.. tools/perf) > $@ 2>&1 && \ 307 (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
283 test -x perf && rm -f $@ || (cat $@ ; false) 308 test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
284 309
285make_kernelsrc_tools: 310make_kernelsrc_tools:
286 @echo "- make -C <kernelsrc>/tools perf" 311 @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
287 $(call clean); \ 312 $(call clean); \
288 (make -C ../../tools perf) > $@ 2>&1 && \ 313 (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
289 test -x perf && rm -f $@ || (cat $@ ; false) 314 test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
290 315
291all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools 316all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
292 @echo OK 317 @echo OK
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d4d7cc27252f..718bd46d47fa 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
755 nd = browser->curr_hot; 755 nd = browser->curr_hot;
756 break; 756 break;
757 case K_UNTAB: 757 case K_UNTAB:
758 if (nd != NULL) 758 if (nd != NULL) {
759 nd = rb_next(nd); 759 nd = rb_next(nd);
760 if (nd == NULL) 760 if (nd == NULL)
761 nd = rb_first(&browser->entries); 761 nd = rb_first(&browser->entries);
762 else 762 } else
763 nd = browser->curr_hot; 763 nd = browser->curr_hot;
764 break; 764 break;
765 case K_F1: 765 case K_F1:
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c226303e3da0..68a7612019dc 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
131 symlen = unresolved_col_width + 4 + 2; 131 symlen = unresolved_col_width + 4 + 2;
132 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, 132 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
133 symlen); 133 symlen);
134 hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
135 symlen);
134 } 136 }
135 137
136 if (h->mem_info->iaddr.sym) { 138 if (h->mem_info->iaddr.sym) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d5636ba94b20..40b7a0d0905b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
1149 1149
1150 machine = machines__find(machines, pid); 1150 machine = machines__find(machines, pid);
1151 if (!machine) 1151 if (!machine)
1152 machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); 1152 machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
1153 return machine; 1153 return machine;
1154 } 1154 }
1155 1155
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2f901d15e063..2b58edccd56f 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
310 int i, ret; 310 int i, ret;
311 311
312 aggr->val = aggr->ena = aggr->run = 0; 312 aggr->val = aggr->ena = aggr->run = 0;
313 init_stats(ps->res_stats);
314 313
315 if (counter->per_pkg) 314 if (counter->per_pkg)
316 zero_per_pkg(counter); 315 zero_per_pkg(counter);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3b2de6eb3376..ab02209a7cf3 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
1466 * Read the build id if possible. This is required for 1466 * Read the build id if possible. This is required for
1467 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work 1467 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
1468 */ 1468 */
1469 if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0) 1469 if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
1470 dso__set_build_id(dso, build_id); 1470 dso__set_build_id(dso, build_id);
1471 1471
1472 /* 1472 /*