diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 1070 |
1 files changed, 901 insertions, 169 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 999835b6112b..8e81a9860a0d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -38,13 +38,96 @@ | |||
38 | 38 | ||
39 | #include <asm/irq_regs.h> | 39 | #include <asm/irq_regs.h> |
40 | 40 | ||
41 | struct remote_function_call { | ||
42 | struct task_struct *p; | ||
43 | int (*func)(void *info); | ||
44 | void *info; | ||
45 | int ret; | ||
46 | }; | ||
47 | |||
48 | static void remote_function(void *data) | ||
49 | { | ||
50 | struct remote_function_call *tfc = data; | ||
51 | struct task_struct *p = tfc->p; | ||
52 | |||
53 | if (p) { | ||
54 | tfc->ret = -EAGAIN; | ||
55 | if (task_cpu(p) != smp_processor_id() || !task_curr(p)) | ||
56 | return; | ||
57 | } | ||
58 | |||
59 | tfc->ret = tfc->func(tfc->info); | ||
60 | } | ||
61 | |||
62 | /** | ||
63 | * task_function_call - call a function on the cpu on which a task runs | ||
64 | * @p: the task to evaluate | ||
65 | * @func: the function to be called | ||
66 | * @info: the function call argument | ||
67 | * | ||
68 | * Calls the function @func when the task is currently running. This might | ||
69 | * be on the current CPU, which just calls the function directly | ||
70 | * | ||
71 | * returns: @func return value, or | ||
72 | * -ESRCH - when the process isn't running | ||
73 | * -EAGAIN - when the process moved away | ||
74 | */ | ||
75 | static int | ||
76 | task_function_call(struct task_struct *p, int (*func) (void *info), void *info) | ||
77 | { | ||
78 | struct remote_function_call data = { | ||
79 | .p = p, | ||
80 | .func = func, | ||
81 | .info = info, | ||
82 | .ret = -ESRCH, /* No such (running) process */ | ||
83 | }; | ||
84 | |||
85 | if (task_curr(p)) | ||
86 | smp_call_function_single(task_cpu(p), remote_function, &data, 1); | ||
87 | |||
88 | return data.ret; | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * cpu_function_call - call a function on the cpu | ||
93 | * @func: the function to be called | ||
94 | * @info: the function call argument | ||
95 | * | ||
96 | * Calls the function @func on the remote cpu. | ||
97 | * | ||
98 | * returns: @func return value or -ENXIO when the cpu is offline | ||
99 | */ | ||
100 | static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | ||
101 | { | ||
102 | struct remote_function_call data = { | ||
103 | .p = NULL, | ||
104 | .func = func, | ||
105 | .info = info, | ||
106 | .ret = -ENXIO, /* No such CPU */ | ||
107 | }; | ||
108 | |||
109 | smp_call_function_single(cpu, remote_function, &data, 1); | ||
110 | |||
111 | return data.ret; | ||
112 | } | ||
113 | |||
114 | #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ | ||
115 | PERF_FLAG_FD_OUTPUT |\ | ||
116 | PERF_FLAG_PID_CGROUP) | ||
117 | |||
41 | enum event_type_t { | 118 | enum event_type_t { |
42 | EVENT_FLEXIBLE = 0x1, | 119 | EVENT_FLEXIBLE = 0x1, |
43 | EVENT_PINNED = 0x2, | 120 | EVENT_PINNED = 0x2, |
44 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | 121 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, |
45 | }; | 122 | }; |
46 | 123 | ||
47 | atomic_t perf_task_events __read_mostly; | 124 | /* |
125 | * perf_sched_events : >0 events exist | ||
126 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu | ||
127 | */ | ||
128 | atomic_t perf_sched_events __read_mostly; | ||
129 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | ||
130 | |||
48 | static atomic_t nr_mmap_events __read_mostly; | 131 | static atomic_t nr_mmap_events __read_mostly; |
49 | static atomic_t nr_comm_events __read_mostly; | 132 | static atomic_t nr_comm_events __read_mostly; |
50 | static atomic_t nr_task_events __read_mostly; | 133 | static atomic_t nr_task_events __read_mostly; |
@@ -62,12 +145,30 @@ static struct srcu_struct pmus_srcu; | |||
62 | */ | 145 | */ |
63 | int sysctl_perf_event_paranoid __read_mostly = 1; | 146 | int sysctl_perf_event_paranoid __read_mostly = 1; |
64 | 147 | ||
65 | int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ | 148 | /* Minimum for 512 kiB + 1 user control page */ |
149 | int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ | ||
66 | 150 | ||
67 | /* | 151 | /* |
68 | * max perf event sample rate | 152 | * max perf event sample rate |
69 | */ | 153 | */ |
70 | int sysctl_perf_event_sample_rate __read_mostly = 100000; | 154 | #define DEFAULT_MAX_SAMPLE_RATE 100000 |
155 | int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; | ||
156 | static int max_samples_per_tick __read_mostly = | ||
157 | DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); | ||
158 | |||
159 | int perf_proc_update_handler(struct ctl_table *table, int write, | ||
160 | void __user *buffer, size_t *lenp, | ||
161 | loff_t *ppos) | ||
162 | { | ||
163 | int ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
164 | |||
165 | if (ret || !write) | ||
166 | return ret; | ||
167 | |||
168 | max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); | ||
169 | |||
170 | return 0; | ||
171 | } | ||
71 | 172 | ||
72 | static atomic64_t perf_event_id; | 173 | static atomic64_t perf_event_id; |
73 | 174 | ||
@@ -75,7 +176,11 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | |||
75 | enum event_type_t event_type); | 176 | enum event_type_t event_type); |
76 | 177 | ||
77 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | 178 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, |
78 | enum event_type_t event_type); | 179 | enum event_type_t event_type, |
180 | struct task_struct *task); | ||
181 | |||
182 | static void update_context_time(struct perf_event_context *ctx); | ||
183 | static u64 perf_event_time(struct perf_event *event); | ||
79 | 184 | ||
80 | void __weak perf_event_print_debug(void) { } | 185 | void __weak perf_event_print_debug(void) { } |
81 | 186 | ||
@@ -89,6 +194,361 @@ static inline u64 perf_clock(void) | |||
89 | return local_clock(); | 194 | return local_clock(); |
90 | } | 195 | } |
91 | 196 | ||
197 | static inline struct perf_cpu_context * | ||
198 | __get_cpu_context(struct perf_event_context *ctx) | ||
199 | { | ||
200 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); | ||
201 | } | ||
202 | |||
203 | #ifdef CONFIG_CGROUP_PERF | ||
204 | |||
205 | /* | ||
206 | * Must ensure cgroup is pinned (css_get) before calling | ||
207 | * this function. In other words, we cannot call this function | ||
208 | * if there is no cgroup event for the current CPU context. | ||
209 | */ | ||
210 | static inline struct perf_cgroup * | ||
211 | perf_cgroup_from_task(struct task_struct *task) | ||
212 | { | ||
213 | return container_of(task_subsys_state(task, perf_subsys_id), | ||
214 | struct perf_cgroup, css); | ||
215 | } | ||
216 | |||
217 | static inline bool | ||
218 | perf_cgroup_match(struct perf_event *event) | ||
219 | { | ||
220 | struct perf_event_context *ctx = event->ctx; | ||
221 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
222 | |||
223 | return !event->cgrp || event->cgrp == cpuctx->cgrp; | ||
224 | } | ||
225 | |||
226 | static inline void perf_get_cgroup(struct perf_event *event) | ||
227 | { | ||
228 | css_get(&event->cgrp->css); | ||
229 | } | ||
230 | |||
231 | static inline void perf_put_cgroup(struct perf_event *event) | ||
232 | { | ||
233 | css_put(&event->cgrp->css); | ||
234 | } | ||
235 | |||
236 | static inline void perf_detach_cgroup(struct perf_event *event) | ||
237 | { | ||
238 | perf_put_cgroup(event); | ||
239 | event->cgrp = NULL; | ||
240 | } | ||
241 | |||
242 | static inline int is_cgroup_event(struct perf_event *event) | ||
243 | { | ||
244 | return event->cgrp != NULL; | ||
245 | } | ||
246 | |||
247 | static inline u64 perf_cgroup_event_time(struct perf_event *event) | ||
248 | { | ||
249 | struct perf_cgroup_info *t; | ||
250 | |||
251 | t = per_cpu_ptr(event->cgrp->info, event->cpu); | ||
252 | return t->time; | ||
253 | } | ||
254 | |||
255 | static inline void __update_cgrp_time(struct perf_cgroup *cgrp) | ||
256 | { | ||
257 | struct perf_cgroup_info *info; | ||
258 | u64 now; | ||
259 | |||
260 | now = perf_clock(); | ||
261 | |||
262 | info = this_cpu_ptr(cgrp->info); | ||
263 | |||
264 | info->time += now - info->timestamp; | ||
265 | info->timestamp = now; | ||
266 | } | ||
267 | |||
268 | static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) | ||
269 | { | ||
270 | struct perf_cgroup *cgrp_out = cpuctx->cgrp; | ||
271 | if (cgrp_out) | ||
272 | __update_cgrp_time(cgrp_out); | ||
273 | } | ||
274 | |||
275 | static inline void update_cgrp_time_from_event(struct perf_event *event) | ||
276 | { | ||
277 | struct perf_cgroup *cgrp; | ||
278 | |||
279 | /* | ||
280 | * ensure we access cgroup data only when needed and | ||
281 | * when we know the cgroup is pinned (css_get) | ||
282 | */ | ||
283 | if (!is_cgroup_event(event)) | ||
284 | return; | ||
285 | |||
286 | cgrp = perf_cgroup_from_task(current); | ||
287 | /* | ||
288 | * Do not update time when cgroup is not active | ||
289 | */ | ||
290 | if (cgrp == event->cgrp) | ||
291 | __update_cgrp_time(event->cgrp); | ||
292 | } | ||
293 | |||
294 | static inline void | ||
295 | perf_cgroup_set_timestamp(struct task_struct *task, | ||
296 | struct perf_event_context *ctx) | ||
297 | { | ||
298 | struct perf_cgroup *cgrp; | ||
299 | struct perf_cgroup_info *info; | ||
300 | |||
301 | /* | ||
302 | * ctx->lock held by caller | ||
303 | * ensure we do not access cgroup data | ||
304 | * unless we have the cgroup pinned (css_get) | ||
305 | */ | ||
306 | if (!task || !ctx->nr_cgroups) | ||
307 | return; | ||
308 | |||
309 | cgrp = perf_cgroup_from_task(task); | ||
310 | info = this_cpu_ptr(cgrp->info); | ||
311 | info->timestamp = ctx->timestamp; | ||
312 | } | ||
313 | |||
314 | #define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */ | ||
315 | #define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */ | ||
316 | |||
317 | /* | ||
318 | * reschedule events based on the cgroup constraint of task. | ||
319 | * | ||
320 | * mode SWOUT : schedule out everything | ||
321 | * mode SWIN : schedule in based on cgroup for next | ||
322 | */ | ||
323 | void perf_cgroup_switch(struct task_struct *task, int mode) | ||
324 | { | ||
325 | struct perf_cpu_context *cpuctx; | ||
326 | struct pmu *pmu; | ||
327 | unsigned long flags; | ||
328 | |||
329 | /* | ||
330 | * disable interrupts to avoid geting nr_cgroup | ||
331 | * changes via __perf_event_disable(). Also | ||
332 | * avoids preemption. | ||
333 | */ | ||
334 | local_irq_save(flags); | ||
335 | |||
336 | /* | ||
337 | * we reschedule only in the presence of cgroup | ||
338 | * constrained events. | ||
339 | */ | ||
340 | rcu_read_lock(); | ||
341 | |||
342 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
343 | |||
344 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
345 | |||
346 | perf_pmu_disable(cpuctx->ctx.pmu); | ||
347 | |||
348 | /* | ||
349 | * perf_cgroup_events says at least one | ||
350 | * context on this CPU has cgroup events. | ||
351 | * | ||
352 | * ctx->nr_cgroups reports the number of cgroup | ||
353 | * events for a context. | ||
354 | */ | ||
355 | if (cpuctx->ctx.nr_cgroups > 0) { | ||
356 | |||
357 | if (mode & PERF_CGROUP_SWOUT) { | ||
358 | cpu_ctx_sched_out(cpuctx, EVENT_ALL); | ||
359 | /* | ||
360 | * must not be done before ctxswout due | ||
361 | * to event_filter_match() in event_sched_out() | ||
362 | */ | ||
363 | cpuctx->cgrp = NULL; | ||
364 | } | ||
365 | |||
366 | if (mode & PERF_CGROUP_SWIN) { | ||
367 | WARN_ON_ONCE(cpuctx->cgrp); | ||
368 | /* set cgrp before ctxsw in to | ||
369 | * allow event_filter_match() to not | ||
370 | * have to pass task around | ||
371 | */ | ||
372 | cpuctx->cgrp = perf_cgroup_from_task(task); | ||
373 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); | ||
374 | } | ||
375 | } | ||
376 | |||
377 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
378 | } | ||
379 | |||
380 | rcu_read_unlock(); | ||
381 | |||
382 | local_irq_restore(flags); | ||
383 | } | ||
384 | |||
385 | static inline void perf_cgroup_sched_out(struct task_struct *task) | ||
386 | { | ||
387 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT); | ||
388 | } | ||
389 | |||
390 | static inline void perf_cgroup_sched_in(struct task_struct *task) | ||
391 | { | ||
392 | perf_cgroup_switch(task, PERF_CGROUP_SWIN); | ||
393 | } | ||
394 | |||
395 | static inline int perf_cgroup_connect(int fd, struct perf_event *event, | ||
396 | struct perf_event_attr *attr, | ||
397 | struct perf_event *group_leader) | ||
398 | { | ||
399 | struct perf_cgroup *cgrp; | ||
400 | struct cgroup_subsys_state *css; | ||
401 | struct file *file; | ||
402 | int ret = 0, fput_needed; | ||
403 | |||
404 | file = fget_light(fd, &fput_needed); | ||
405 | if (!file) | ||
406 | return -EBADF; | ||
407 | |||
408 | css = cgroup_css_from_dir(file, perf_subsys_id); | ||
409 | if (IS_ERR(css)) { | ||
410 | ret = PTR_ERR(css); | ||
411 | goto out; | ||
412 | } | ||
413 | |||
414 | cgrp = container_of(css, struct perf_cgroup, css); | ||
415 | event->cgrp = cgrp; | ||
416 | |||
417 | /* must be done before we fput() the file */ | ||
418 | perf_get_cgroup(event); | ||
419 | |||
420 | /* | ||
421 | * all events in a group must monitor | ||
422 | * the same cgroup because a task belongs | ||
423 | * to only one perf cgroup at a time | ||
424 | */ | ||
425 | if (group_leader && group_leader->cgrp != cgrp) { | ||
426 | perf_detach_cgroup(event); | ||
427 | ret = -EINVAL; | ||
428 | } | ||
429 | out: | ||
430 | fput_light(file, fput_needed); | ||
431 | return ret; | ||
432 | } | ||
433 | |||
434 | static inline void | ||
435 | perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) | ||
436 | { | ||
437 | struct perf_cgroup_info *t; | ||
438 | t = per_cpu_ptr(event->cgrp->info, event->cpu); | ||
439 | event->shadow_ctx_time = now - t->timestamp; | ||
440 | } | ||
441 | |||
442 | static inline void | ||
443 | perf_cgroup_defer_enabled(struct perf_event *event) | ||
444 | { | ||
445 | /* | ||
446 | * when the current task's perf cgroup does not match | ||
447 | * the event's, we need to remember to call the | ||
448 | * perf_mark_enable() function the first time a task with | ||
449 | * a matching perf cgroup is scheduled in. | ||
450 | */ | ||
451 | if (is_cgroup_event(event) && !perf_cgroup_match(event)) | ||
452 | event->cgrp_defer_enabled = 1; | ||
453 | } | ||
454 | |||
455 | static inline void | ||
456 | perf_cgroup_mark_enabled(struct perf_event *event, | ||
457 | struct perf_event_context *ctx) | ||
458 | { | ||
459 | struct perf_event *sub; | ||
460 | u64 tstamp = perf_event_time(event); | ||
461 | |||
462 | if (!event->cgrp_defer_enabled) | ||
463 | return; | ||
464 | |||
465 | event->cgrp_defer_enabled = 0; | ||
466 | |||
467 | event->tstamp_enabled = tstamp - event->total_time_enabled; | ||
468 | list_for_each_entry(sub, &event->sibling_list, group_entry) { | ||
469 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { | ||
470 | sub->tstamp_enabled = tstamp - sub->total_time_enabled; | ||
471 | sub->cgrp_defer_enabled = 0; | ||
472 | } | ||
473 | } | ||
474 | } | ||
475 | #else /* !CONFIG_CGROUP_PERF */ | ||
476 | |||
477 | static inline bool | ||
478 | perf_cgroup_match(struct perf_event *event) | ||
479 | { | ||
480 | return true; | ||
481 | } | ||
482 | |||
483 | static inline void perf_detach_cgroup(struct perf_event *event) | ||
484 | {} | ||
485 | |||
486 | static inline int is_cgroup_event(struct perf_event *event) | ||
487 | { | ||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event) | ||
492 | { | ||
493 | return 0; | ||
494 | } | ||
495 | |||
496 | static inline void update_cgrp_time_from_event(struct perf_event *event) | ||
497 | { | ||
498 | } | ||
499 | |||
500 | static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) | ||
501 | { | ||
502 | } | ||
503 | |||
504 | static inline void perf_cgroup_sched_out(struct task_struct *task) | ||
505 | { | ||
506 | } | ||
507 | |||
508 | static inline void perf_cgroup_sched_in(struct task_struct *task) | ||
509 | { | ||
510 | } | ||
511 | |||
512 | static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, | ||
513 | struct perf_event_attr *attr, | ||
514 | struct perf_event *group_leader) | ||
515 | { | ||
516 | return -EINVAL; | ||
517 | } | ||
518 | |||
519 | static inline void | ||
520 | perf_cgroup_set_timestamp(struct task_struct *task, | ||
521 | struct perf_event_context *ctx) | ||
522 | { | ||
523 | } | ||
524 | |||
525 | void | ||
526 | perf_cgroup_switch(struct task_struct *task, struct task_struct *next) | ||
527 | { | ||
528 | } | ||
529 | |||
530 | static inline void | ||
531 | perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) | ||
532 | { | ||
533 | } | ||
534 | |||
535 | static inline u64 perf_cgroup_event_time(struct perf_event *event) | ||
536 | { | ||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | static inline void | ||
541 | perf_cgroup_defer_enabled(struct perf_event *event) | ||
542 | { | ||
543 | } | ||
544 | |||
545 | static inline void | ||
546 | perf_cgroup_mark_enabled(struct perf_event *event, | ||
547 | struct perf_event_context *ctx) | ||
548 | { | ||
549 | } | ||
550 | #endif | ||
551 | |||
92 | void perf_pmu_disable(struct pmu *pmu) | 552 | void perf_pmu_disable(struct pmu *pmu) |
93 | { | 553 | { |
94 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | 554 | int *count = this_cpu_ptr(pmu->pmu_disable_count); |
@@ -254,7 +714,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
254 | raw_spin_lock_irqsave(&ctx->lock, flags); | 714 | raw_spin_lock_irqsave(&ctx->lock, flags); |
255 | --ctx->pin_count; | 715 | --ctx->pin_count; |
256 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 716 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
257 | put_ctx(ctx); | ||
258 | } | 717 | } |
259 | 718 | ||
260 | /* | 719 | /* |
@@ -271,6 +730,10 @@ static void update_context_time(struct perf_event_context *ctx) | |||
271 | static u64 perf_event_time(struct perf_event *event) | 730 | static u64 perf_event_time(struct perf_event *event) |
272 | { | 731 | { |
273 | struct perf_event_context *ctx = event->ctx; | 732 | struct perf_event_context *ctx = event->ctx; |
733 | |||
734 | if (is_cgroup_event(event)) | ||
735 | return perf_cgroup_event_time(event); | ||
736 | |||
274 | return ctx ? ctx->time : 0; | 737 | return ctx ? ctx->time : 0; |
275 | } | 738 | } |
276 | 739 | ||
@@ -285,9 +748,20 @@ static void update_event_times(struct perf_event *event) | |||
285 | if (event->state < PERF_EVENT_STATE_INACTIVE || | 748 | if (event->state < PERF_EVENT_STATE_INACTIVE || |
286 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | 749 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) |
287 | return; | 750 | return; |
288 | 751 | /* | |
289 | if (ctx->is_active) | 752 | * in cgroup mode, time_enabled represents |
753 | * the time the event was enabled AND active | ||
754 | * tasks were in the monitored cgroup. This is | ||
755 | * independent of the activity of the context as | ||
756 | * there may be a mix of cgroup and non-cgroup events. | ||
757 | * | ||
758 | * That is why we treat cgroup events differently | ||
759 | * here. | ||
760 | */ | ||
761 | if (is_cgroup_event(event)) | ||
290 | run_end = perf_event_time(event); | 762 | run_end = perf_event_time(event); |
763 | else if (ctx->is_active) | ||
764 | run_end = ctx->time; | ||
291 | else | 765 | else |
292 | run_end = event->tstamp_stopped; | 766 | run_end = event->tstamp_stopped; |
293 | 767 | ||
@@ -299,6 +773,7 @@ static void update_event_times(struct perf_event *event) | |||
299 | run_end = perf_event_time(event); | 773 | run_end = perf_event_time(event); |
300 | 774 | ||
301 | event->total_time_running = run_end - event->tstamp_running; | 775 | event->total_time_running = run_end - event->tstamp_running; |
776 | |||
302 | } | 777 | } |
303 | 778 | ||
304 | /* | 779 | /* |
@@ -347,6 +822,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
347 | list_add_tail(&event->group_entry, list); | 822 | list_add_tail(&event->group_entry, list); |
348 | } | 823 | } |
349 | 824 | ||
825 | if (is_cgroup_event(event)) | ||
826 | ctx->nr_cgroups++; | ||
827 | |||
350 | list_add_rcu(&event->event_entry, &ctx->event_list); | 828 | list_add_rcu(&event->event_entry, &ctx->event_list); |
351 | if (!ctx->nr_events) | 829 | if (!ctx->nr_events) |
352 | perf_pmu_rotate_start(ctx->pmu); | 830 | perf_pmu_rotate_start(ctx->pmu); |
@@ -465,6 +943,7 @@ static void perf_group_attach(struct perf_event *event) | |||
465 | static void | 943 | static void |
466 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 944 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
467 | { | 945 | { |
946 | struct perf_cpu_context *cpuctx; | ||
468 | /* | 947 | /* |
469 | * We can have double detach due to exit/hot-unplug + close. | 948 | * We can have double detach due to exit/hot-unplug + close. |
470 | */ | 949 | */ |
@@ -473,6 +952,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
473 | 952 | ||
474 | event->attach_state &= ~PERF_ATTACH_CONTEXT; | 953 | event->attach_state &= ~PERF_ATTACH_CONTEXT; |
475 | 954 | ||
955 | if (is_cgroup_event(event)) { | ||
956 | ctx->nr_cgroups--; | ||
957 | cpuctx = __get_cpu_context(ctx); | ||
958 | /* | ||
959 | * if there are no more cgroup events | ||
960 | * then cler cgrp to avoid stale pointer | ||
961 | * in update_cgrp_time_from_cpuctx() | ||
962 | */ | ||
963 | if (!ctx->nr_cgroups) | ||
964 | cpuctx->cgrp = NULL; | ||
965 | } | ||
966 | |||
476 | ctx->nr_events--; | 967 | ctx->nr_events--; |
477 | if (event->attr.inherit_stat) | 968 | if (event->attr.inherit_stat) |
478 | ctx->nr_stat--; | 969 | ctx->nr_stat--; |
@@ -544,7 +1035,8 @@ out: | |||
544 | static inline int | 1035 | static inline int |
545 | event_filter_match(struct perf_event *event) | 1036 | event_filter_match(struct perf_event *event) |
546 | { | 1037 | { |
547 | return event->cpu == -1 || event->cpu == smp_processor_id(); | 1038 | return (event->cpu == -1 || event->cpu == smp_processor_id()) |
1039 | && perf_cgroup_match(event); | ||
548 | } | 1040 | } |
549 | 1041 | ||
550 | static void | 1042 | static void |
@@ -562,7 +1054,7 @@ event_sched_out(struct perf_event *event, | |||
562 | */ | 1054 | */ |
563 | if (event->state == PERF_EVENT_STATE_INACTIVE | 1055 | if (event->state == PERF_EVENT_STATE_INACTIVE |
564 | && !event_filter_match(event)) { | 1056 | && !event_filter_match(event)) { |
565 | delta = ctx->time - event->tstamp_stopped; | 1057 | delta = tstamp - event->tstamp_stopped; |
566 | event->tstamp_running += delta; | 1058 | event->tstamp_running += delta; |
567 | event->tstamp_stopped = tstamp; | 1059 | event->tstamp_stopped = tstamp; |
568 | } | 1060 | } |
@@ -606,47 +1098,30 @@ group_sched_out(struct perf_event *group_event, | |||
606 | cpuctx->exclusive = 0; | 1098 | cpuctx->exclusive = 0; |
607 | } | 1099 | } |
608 | 1100 | ||
609 | static inline struct perf_cpu_context * | ||
610 | __get_cpu_context(struct perf_event_context *ctx) | ||
611 | { | ||
612 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); | ||
613 | } | ||
614 | |||
615 | /* | 1101 | /* |
616 | * Cross CPU call to remove a performance event | 1102 | * Cross CPU call to remove a performance event |
617 | * | 1103 | * |
618 | * We disable the event on the hardware level first. After that we | 1104 | * We disable the event on the hardware level first. After that we |
619 | * remove it from the context list. | 1105 | * remove it from the context list. |
620 | */ | 1106 | */ |
621 | static void __perf_event_remove_from_context(void *info) | 1107 | static int __perf_remove_from_context(void *info) |
622 | { | 1108 | { |
623 | struct perf_event *event = info; | 1109 | struct perf_event *event = info; |
624 | struct perf_event_context *ctx = event->ctx; | 1110 | struct perf_event_context *ctx = event->ctx; |
625 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1111 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
626 | 1112 | ||
627 | /* | ||
628 | * If this is a task context, we need to check whether it is | ||
629 | * the current task context of this cpu. If not it has been | ||
630 | * scheduled out before the smp call arrived. | ||
631 | */ | ||
632 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
633 | return; | ||
634 | |||
635 | raw_spin_lock(&ctx->lock); | 1113 | raw_spin_lock(&ctx->lock); |
636 | |||
637 | event_sched_out(event, cpuctx, ctx); | 1114 | event_sched_out(event, cpuctx, ctx); |
638 | |||
639 | list_del_event(event, ctx); | 1115 | list_del_event(event, ctx); |
640 | |||
641 | raw_spin_unlock(&ctx->lock); | 1116 | raw_spin_unlock(&ctx->lock); |
1117 | |||
1118 | return 0; | ||
642 | } | 1119 | } |
643 | 1120 | ||
644 | 1121 | ||
645 | /* | 1122 | /* |
646 | * Remove the event from a task's (or a CPU's) list of events. | 1123 | * Remove the event from a task's (or a CPU's) list of events. |
647 | * | 1124 | * |
648 | * Must be called with ctx->mutex held. | ||
649 | * | ||
650 | * CPU events are removed with a smp call. For task events we only | 1125 | * CPU events are removed with a smp call. For task events we only |
651 | * call when the task is on a CPU. | 1126 | * call when the task is on a CPU. |
652 | * | 1127 | * |
@@ -657,49 +1132,48 @@ static void __perf_event_remove_from_context(void *info) | |||
657 | * When called from perf_event_exit_task, it's OK because the | 1132 | * When called from perf_event_exit_task, it's OK because the |
658 | * context has been detached from its task. | 1133 | * context has been detached from its task. |
659 | */ | 1134 | */ |
660 | static void perf_event_remove_from_context(struct perf_event *event) | 1135 | static void perf_remove_from_context(struct perf_event *event) |
661 | { | 1136 | { |
662 | struct perf_event_context *ctx = event->ctx; | 1137 | struct perf_event_context *ctx = event->ctx; |
663 | struct task_struct *task = ctx->task; | 1138 | struct task_struct *task = ctx->task; |
664 | 1139 | ||
1140 | lockdep_assert_held(&ctx->mutex); | ||
1141 | |||
665 | if (!task) { | 1142 | if (!task) { |
666 | /* | 1143 | /* |
667 | * Per cpu events are removed via an smp call and | 1144 | * Per cpu events are removed via an smp call and |
668 | * the removal is always successful. | 1145 | * the removal is always successful. |
669 | */ | 1146 | */ |
670 | smp_call_function_single(event->cpu, | 1147 | cpu_function_call(event->cpu, __perf_remove_from_context, event); |
671 | __perf_event_remove_from_context, | ||
672 | event, 1); | ||
673 | return; | 1148 | return; |
674 | } | 1149 | } |
675 | 1150 | ||
676 | retry: | 1151 | retry: |
677 | task_oncpu_function_call(task, __perf_event_remove_from_context, | 1152 | if (!task_function_call(task, __perf_remove_from_context, event)) |
678 | event); | 1153 | return; |
679 | 1154 | ||
680 | raw_spin_lock_irq(&ctx->lock); | 1155 | raw_spin_lock_irq(&ctx->lock); |
681 | /* | 1156 | /* |
682 | * If the context is active we need to retry the smp call. | 1157 | * If we failed to find a running task, but find the context active now |
1158 | * that we've acquired the ctx->lock, retry. | ||
683 | */ | 1159 | */ |
684 | if (ctx->nr_active && !list_empty(&event->group_entry)) { | 1160 | if (ctx->is_active) { |
685 | raw_spin_unlock_irq(&ctx->lock); | 1161 | raw_spin_unlock_irq(&ctx->lock); |
686 | goto retry; | 1162 | goto retry; |
687 | } | 1163 | } |
688 | 1164 | ||
689 | /* | 1165 | /* |
690 | * The lock prevents that this context is scheduled in so we | 1166 | * Since the task isn't running, its safe to remove the event, us |
691 | * can remove the event safely, if the call above did not | 1167 | * holding the ctx->lock ensures the task won't get scheduled in. |
692 | * succeed. | ||
693 | */ | 1168 | */ |
694 | if (!list_empty(&event->group_entry)) | 1169 | list_del_event(event, ctx); |
695 | list_del_event(event, ctx); | ||
696 | raw_spin_unlock_irq(&ctx->lock); | 1170 | raw_spin_unlock_irq(&ctx->lock); |
697 | } | 1171 | } |
698 | 1172 | ||
699 | /* | 1173 | /* |
700 | * Cross CPU call to disable a performance event | 1174 | * Cross CPU call to disable a performance event |
701 | */ | 1175 | */ |
702 | static void __perf_event_disable(void *info) | 1176 | static int __perf_event_disable(void *info) |
703 | { | 1177 | { |
704 | struct perf_event *event = info; | 1178 | struct perf_event *event = info; |
705 | struct perf_event_context *ctx = event->ctx; | 1179 | struct perf_event_context *ctx = event->ctx; |
@@ -708,9 +1182,12 @@ static void __perf_event_disable(void *info) | |||
708 | /* | 1182 | /* |
709 | * If this is a per-task event, need to check whether this | 1183 | * If this is a per-task event, need to check whether this |
710 | * event's task is the current task on this cpu. | 1184 | * event's task is the current task on this cpu. |
1185 | * | ||
1186 | * Can trigger due to concurrent perf_event_context_sched_out() | ||
1187 | * flipping contexts around. | ||
711 | */ | 1188 | */ |
712 | if (ctx->task && cpuctx->task_ctx != ctx) | 1189 | if (ctx->task && cpuctx->task_ctx != ctx) |
713 | return; | 1190 | return -EINVAL; |
714 | 1191 | ||
715 | raw_spin_lock(&ctx->lock); | 1192 | raw_spin_lock(&ctx->lock); |
716 | 1193 | ||
@@ -720,6 +1197,7 @@ static void __perf_event_disable(void *info) | |||
720 | */ | 1197 | */ |
721 | if (event->state >= PERF_EVENT_STATE_INACTIVE) { | 1198 | if (event->state >= PERF_EVENT_STATE_INACTIVE) { |
722 | update_context_time(ctx); | 1199 | update_context_time(ctx); |
1200 | update_cgrp_time_from_event(event); | ||
723 | update_group_times(event); | 1201 | update_group_times(event); |
724 | if (event == event->group_leader) | 1202 | if (event == event->group_leader) |
725 | group_sched_out(event, cpuctx, ctx); | 1203 | group_sched_out(event, cpuctx, ctx); |
@@ -729,6 +1207,8 @@ static void __perf_event_disable(void *info) | |||
729 | } | 1207 | } |
730 | 1208 | ||
731 | raw_spin_unlock(&ctx->lock); | 1209 | raw_spin_unlock(&ctx->lock); |
1210 | |||
1211 | return 0; | ||
732 | } | 1212 | } |
733 | 1213 | ||
734 | /* | 1214 | /* |
@@ -753,13 +1233,13 @@ void perf_event_disable(struct perf_event *event) | |||
753 | /* | 1233 | /* |
754 | * Disable the event on the cpu that it's on | 1234 | * Disable the event on the cpu that it's on |
755 | */ | 1235 | */ |
756 | smp_call_function_single(event->cpu, __perf_event_disable, | 1236 | cpu_function_call(event->cpu, __perf_event_disable, event); |
757 | event, 1); | ||
758 | return; | 1237 | return; |
759 | } | 1238 | } |
760 | 1239 | ||
761 | retry: | 1240 | retry: |
762 | task_oncpu_function_call(task, __perf_event_disable, event); | 1241 | if (!task_function_call(task, __perf_event_disable, event)) |
1242 | return; | ||
763 | 1243 | ||
764 | raw_spin_lock_irq(&ctx->lock); | 1244 | raw_spin_lock_irq(&ctx->lock); |
765 | /* | 1245 | /* |
@@ -767,6 +1247,11 @@ retry: | |||
767 | */ | 1247 | */ |
768 | if (event->state == PERF_EVENT_STATE_ACTIVE) { | 1248 | if (event->state == PERF_EVENT_STATE_ACTIVE) { |
769 | raw_spin_unlock_irq(&ctx->lock); | 1249 | raw_spin_unlock_irq(&ctx->lock); |
1250 | /* | ||
1251 | * Reload the task pointer, it might have been changed by | ||
1252 | * a concurrent perf_event_context_sched_out(). | ||
1253 | */ | ||
1254 | task = ctx->task; | ||
770 | goto retry; | 1255 | goto retry; |
771 | } | 1256 | } |
772 | 1257 | ||
@@ -778,10 +1263,48 @@ retry: | |||
778 | update_group_times(event); | 1263 | update_group_times(event); |
779 | event->state = PERF_EVENT_STATE_OFF; | 1264 | event->state = PERF_EVENT_STATE_OFF; |
780 | } | 1265 | } |
781 | |||
782 | raw_spin_unlock_irq(&ctx->lock); | 1266 | raw_spin_unlock_irq(&ctx->lock); |
783 | } | 1267 | } |
784 | 1268 | ||
1269 | static void perf_set_shadow_time(struct perf_event *event, | ||
1270 | struct perf_event_context *ctx, | ||
1271 | u64 tstamp) | ||
1272 | { | ||
1273 | /* | ||
1274 | * use the correct time source for the time snapshot | ||
1275 | * | ||
1276 | * We could get by without this by leveraging the | ||
1277 | * fact that to get to this function, the caller | ||
1278 | * has most likely already called update_context_time() | ||
1279 | * and update_cgrp_time_xx() and thus both timestamp | ||
1280 | * are identical (or very close). Given that tstamp is, | ||
1281 | * already adjusted for cgroup, we could say that: | ||
1282 | * tstamp - ctx->timestamp | ||
1283 | * is equivalent to | ||
1284 | * tstamp - cgrp->timestamp. | ||
1285 | * | ||
1286 | * Then, in perf_output_read(), the calculation would | ||
1287 | * work with no changes because: | ||
1288 | * - event is guaranteed scheduled in | ||
1289 | * - no scheduled out in between | ||
1290 | * - thus the timestamp would be the same | ||
1291 | * | ||
1292 | * But this is a bit hairy. | ||
1293 | * | ||
1294 | * So instead, we have an explicit cgroup call to remain | ||
1295 | * within the time time source all along. We believe it | ||
1296 | * is cleaner and simpler to understand. | ||
1297 | */ | ||
1298 | if (is_cgroup_event(event)) | ||
1299 | perf_cgroup_set_shadow_time(event, tstamp); | ||
1300 | else | ||
1301 | event->shadow_ctx_time = tstamp - ctx->timestamp; | ||
1302 | } | ||
1303 | |||
1304 | #define MAX_INTERRUPTS (~0ULL) | ||
1305 | |||
1306 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
1307 | |||
785 | static int | 1308 | static int |
786 | event_sched_in(struct perf_event *event, | 1309 | event_sched_in(struct perf_event *event, |
787 | struct perf_cpu_context *cpuctx, | 1310 | struct perf_cpu_context *cpuctx, |
@@ -794,6 +1317,17 @@ event_sched_in(struct perf_event *event, | |||
794 | 1317 | ||
795 | event->state = PERF_EVENT_STATE_ACTIVE; | 1318 | event->state = PERF_EVENT_STATE_ACTIVE; |
796 | event->oncpu = smp_processor_id(); | 1319 | event->oncpu = smp_processor_id(); |
1320 | |||
1321 | /* | ||
1322 | * Unthrottle events, since we scheduled we might have missed several | ||
1323 | * ticks already, also for a heavily scheduling task there is little | ||
1324 | * guarantee it'll get a tick in a timely manner. | ||
1325 | */ | ||
1326 | if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) { | ||
1327 | perf_log_throttle(event, 1); | ||
1328 | event->hw.interrupts = 0; | ||
1329 | } | ||
1330 | |||
797 | /* | 1331 | /* |
798 | * The new state must be visible before we turn it on in the hardware: | 1332 | * The new state must be visible before we turn it on in the hardware: |
799 | */ | 1333 | */ |
@@ -807,7 +1341,7 @@ event_sched_in(struct perf_event *event, | |||
807 | 1341 | ||
808 | event->tstamp_running += tstamp - event->tstamp_stopped; | 1342 | event->tstamp_running += tstamp - event->tstamp_stopped; |
809 | 1343 | ||
810 | event->shadow_ctx_time = tstamp - ctx->timestamp; | 1344 | perf_set_shadow_time(event, ctx, tstamp); |
811 | 1345 | ||
812 | if (!is_software_event(event)) | 1346 | if (!is_software_event(event)) |
813 | cpuctx->active_oncpu++; | 1347 | cpuctx->active_oncpu++; |
@@ -928,12 +1462,15 @@ static void add_event_to_ctx(struct perf_event *event, | |||
928 | event->tstamp_stopped = tstamp; | 1462 | event->tstamp_stopped = tstamp; |
929 | } | 1463 | } |
930 | 1464 | ||
1465 | static void perf_event_context_sched_in(struct perf_event_context *ctx, | ||
1466 | struct task_struct *tsk); | ||
1467 | |||
931 | /* | 1468 | /* |
932 | * Cross CPU call to install and enable a performance event | 1469 | * Cross CPU call to install and enable a performance event |
933 | * | 1470 | * |
934 | * Must be called with ctx->mutex held | 1471 | * Must be called with ctx->mutex held |
935 | */ | 1472 | */ |
936 | static void __perf_install_in_context(void *info) | 1473 | static int __perf_install_in_context(void *info) |
937 | { | 1474 | { |
938 | struct perf_event *event = info; | 1475 | struct perf_event *event = info; |
939 | struct perf_event_context *ctx = event->ctx; | 1476 | struct perf_event_context *ctx = event->ctx; |
@@ -942,21 +1479,22 @@ static void __perf_install_in_context(void *info) | |||
942 | int err; | 1479 | int err; |
943 | 1480 | ||
944 | /* | 1481 | /* |
945 | * If this is a task context, we need to check whether it is | 1482 | * In case we're installing a new context to an already running task, |
946 | * the current task context of this cpu. If not it has been | 1483 | * could also happen before perf_event_task_sched_in() on architectures |
947 | * scheduled out before the smp call arrived. | 1484 | * which do context switches with IRQs enabled. |
948 | * Or possibly this is the right context but it isn't | ||
949 | * on this cpu because it had no events. | ||
950 | */ | 1485 | */ |
951 | if (ctx->task && cpuctx->task_ctx != ctx) { | 1486 | if (ctx->task && !cpuctx->task_ctx) |
952 | if (cpuctx->task_ctx || ctx->task != current) | 1487 | perf_event_context_sched_in(ctx, ctx->task); |
953 | return; | ||
954 | cpuctx->task_ctx = ctx; | ||
955 | } | ||
956 | 1488 | ||
957 | raw_spin_lock(&ctx->lock); | 1489 | raw_spin_lock(&ctx->lock); |
958 | ctx->is_active = 1; | 1490 | ctx->is_active = 1; |
959 | update_context_time(ctx); | 1491 | update_context_time(ctx); |
1492 | /* | ||
1493 | * update cgrp time only if current cgrp | ||
1494 | * matches event->cgrp. Must be done before | ||
1495 | * calling add_event_to_ctx() | ||
1496 | */ | ||
1497 | update_cgrp_time_from_event(event); | ||
960 | 1498 | ||
961 | add_event_to_ctx(event, ctx); | 1499 | add_event_to_ctx(event, ctx); |
962 | 1500 | ||
@@ -997,6 +1535,8 @@ static void __perf_install_in_context(void *info) | |||
997 | 1535 | ||
998 | unlock: | 1536 | unlock: |
999 | raw_spin_unlock(&ctx->lock); | 1537 | raw_spin_unlock(&ctx->lock); |
1538 | |||
1539 | return 0; | ||
1000 | } | 1540 | } |
1001 | 1541 | ||
1002 | /* | 1542 | /* |
@@ -1008,8 +1548,6 @@ unlock: | |||
1008 | * If the event is attached to a task which is on a CPU we use a smp | 1548 | * If the event is attached to a task which is on a CPU we use a smp |
1009 | * call to enable it in the task context. The task might have been | 1549 | * call to enable it in the task context. The task might have been |
1010 | * scheduled away, but we check this in the smp call again. | 1550 | * scheduled away, but we check this in the smp call again. |
1011 | * | ||
1012 | * Must be called with ctx->mutex held. | ||
1013 | */ | 1551 | */ |
1014 | static void | 1552 | static void |
1015 | perf_install_in_context(struct perf_event_context *ctx, | 1553 | perf_install_in_context(struct perf_event_context *ctx, |
@@ -1018,6 +1556,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
1018 | { | 1556 | { |
1019 | struct task_struct *task = ctx->task; | 1557 | struct task_struct *task = ctx->task; |
1020 | 1558 | ||
1559 | lockdep_assert_held(&ctx->mutex); | ||
1560 | |||
1021 | event->ctx = ctx; | 1561 | event->ctx = ctx; |
1022 | 1562 | ||
1023 | if (!task) { | 1563 | if (!task) { |
@@ -1025,31 +1565,29 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
1025 | * Per cpu events are installed via an smp call and | 1565 | * Per cpu events are installed via an smp call and |
1026 | * the install is always successful. | 1566 | * the install is always successful. |
1027 | */ | 1567 | */ |
1028 | smp_call_function_single(cpu, __perf_install_in_context, | 1568 | cpu_function_call(cpu, __perf_install_in_context, event); |
1029 | event, 1); | ||
1030 | return; | 1569 | return; |
1031 | } | 1570 | } |
1032 | 1571 | ||
1033 | retry: | 1572 | retry: |
1034 | task_oncpu_function_call(task, __perf_install_in_context, | 1573 | if (!task_function_call(task, __perf_install_in_context, event)) |
1035 | event); | 1574 | return; |
1036 | 1575 | ||
1037 | raw_spin_lock_irq(&ctx->lock); | 1576 | raw_spin_lock_irq(&ctx->lock); |
1038 | /* | 1577 | /* |
1039 | * we need to retry the smp call. | 1578 | * If we failed to find a running task, but find the context active now |
1579 | * that we've acquired the ctx->lock, retry. | ||
1040 | */ | 1580 | */ |
1041 | if (ctx->is_active && list_empty(&event->group_entry)) { | 1581 | if (ctx->is_active) { |
1042 | raw_spin_unlock_irq(&ctx->lock); | 1582 | raw_spin_unlock_irq(&ctx->lock); |
1043 | goto retry; | 1583 | goto retry; |
1044 | } | 1584 | } |
1045 | 1585 | ||
1046 | /* | 1586 | /* |
1047 | * The lock prevents that this context is scheduled in so we | 1587 | * Since the task isn't running, its safe to add the event, us holding |
1048 | * can add the event safely, if it the call above did not | 1588 | * the ctx->lock ensures the task won't get scheduled in. |
1049 | * succeed. | ||
1050 | */ | 1589 | */ |
1051 | if (list_empty(&event->group_entry)) | 1590 | add_event_to_ctx(event, ctx); |
1052 | add_event_to_ctx(event, ctx); | ||
1053 | raw_spin_unlock_irq(&ctx->lock); | 1591 | raw_spin_unlock_irq(&ctx->lock); |
1054 | } | 1592 | } |
1055 | 1593 | ||
@@ -1078,7 +1616,7 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
1078 | /* | 1616 | /* |
1079 | * Cross CPU call to enable a performance event | 1617 | * Cross CPU call to enable a performance event |
1080 | */ | 1618 | */ |
1081 | static void __perf_event_enable(void *info) | 1619 | static int __perf_event_enable(void *info) |
1082 | { | 1620 | { |
1083 | struct perf_event *event = info; | 1621 | struct perf_event *event = info; |
1084 | struct perf_event_context *ctx = event->ctx; | 1622 | struct perf_event_context *ctx = event->ctx; |
@@ -1086,26 +1624,27 @@ static void __perf_event_enable(void *info) | |||
1086 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1624 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1087 | int err; | 1625 | int err; |
1088 | 1626 | ||
1089 | /* | 1627 | if (WARN_ON_ONCE(!ctx->is_active)) |
1090 | * If this is a per-task event, need to check whether this | 1628 | return -EINVAL; |
1091 | * event's task is the current task on this cpu. | ||
1092 | */ | ||
1093 | if (ctx->task && cpuctx->task_ctx != ctx) { | ||
1094 | if (cpuctx->task_ctx || ctx->task != current) | ||
1095 | return; | ||
1096 | cpuctx->task_ctx = ctx; | ||
1097 | } | ||
1098 | 1629 | ||
1099 | raw_spin_lock(&ctx->lock); | 1630 | raw_spin_lock(&ctx->lock); |
1100 | ctx->is_active = 1; | ||
1101 | update_context_time(ctx); | 1631 | update_context_time(ctx); |
1102 | 1632 | ||
1103 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1633 | if (event->state >= PERF_EVENT_STATE_INACTIVE) |
1104 | goto unlock; | 1634 | goto unlock; |
1635 | |||
1636 | /* | ||
1637 | * set current task's cgroup time reference point | ||
1638 | */ | ||
1639 | perf_cgroup_set_timestamp(current, ctx); | ||
1640 | |||
1105 | __perf_event_mark_enabled(event, ctx); | 1641 | __perf_event_mark_enabled(event, ctx); |
1106 | 1642 | ||
1107 | if (!event_filter_match(event)) | 1643 | if (!event_filter_match(event)) { |
1644 | if (is_cgroup_event(event)) | ||
1645 | perf_cgroup_defer_enabled(event); | ||
1108 | goto unlock; | 1646 | goto unlock; |
1647 | } | ||
1109 | 1648 | ||
1110 | /* | 1649 | /* |
1111 | * If the event is in a group and isn't the group leader, | 1650 | * If the event is in a group and isn't the group leader, |
@@ -1138,6 +1677,8 @@ static void __perf_event_enable(void *info) | |||
1138 | 1677 | ||
1139 | unlock: | 1678 | unlock: |
1140 | raw_spin_unlock(&ctx->lock); | 1679 | raw_spin_unlock(&ctx->lock); |
1680 | |||
1681 | return 0; | ||
1141 | } | 1682 | } |
1142 | 1683 | ||
1143 | /* | 1684 | /* |
@@ -1158,8 +1699,7 @@ void perf_event_enable(struct perf_event *event) | |||
1158 | /* | 1699 | /* |
1159 | * Enable the event on the cpu that it's on | 1700 | * Enable the event on the cpu that it's on |
1160 | */ | 1701 | */ |
1161 | smp_call_function_single(event->cpu, __perf_event_enable, | 1702 | cpu_function_call(event->cpu, __perf_event_enable, event); |
1162 | event, 1); | ||
1163 | return; | 1703 | return; |
1164 | } | 1704 | } |
1165 | 1705 | ||
@@ -1178,8 +1718,15 @@ void perf_event_enable(struct perf_event *event) | |||
1178 | event->state = PERF_EVENT_STATE_OFF; | 1718 | event->state = PERF_EVENT_STATE_OFF; |
1179 | 1719 | ||
1180 | retry: | 1720 | retry: |
1721 | if (!ctx->is_active) { | ||
1722 | __perf_event_mark_enabled(event, ctx); | ||
1723 | goto out; | ||
1724 | } | ||
1725 | |||
1181 | raw_spin_unlock_irq(&ctx->lock); | 1726 | raw_spin_unlock_irq(&ctx->lock); |
1182 | task_oncpu_function_call(task, __perf_event_enable, event); | 1727 | |
1728 | if (!task_function_call(task, __perf_event_enable, event)) | ||
1729 | return; | ||
1183 | 1730 | ||
1184 | raw_spin_lock_irq(&ctx->lock); | 1731 | raw_spin_lock_irq(&ctx->lock); |
1185 | 1732 | ||
@@ -1187,15 +1734,14 @@ retry: | |||
1187 | * If the context is active and the event is still off, | 1734 | * If the context is active and the event is still off, |
1188 | * we need to retry the cross-call. | 1735 | * we need to retry the cross-call. |
1189 | */ | 1736 | */ |
1190 | if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) | 1737 | if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) { |
1738 | /* | ||
1739 | * task could have been flipped by a concurrent | ||
1740 | * perf_event_context_sched_out() | ||
1741 | */ | ||
1742 | task = ctx->task; | ||
1191 | goto retry; | 1743 | goto retry; |
1192 | 1744 | } | |
1193 | /* | ||
1194 | * Since we have the lock this context can't be scheduled | ||
1195 | * in, so we can change the state safely. | ||
1196 | */ | ||
1197 | if (event->state == PERF_EVENT_STATE_OFF) | ||
1198 | __perf_event_mark_enabled(event, ctx); | ||
1199 | 1745 | ||
1200 | out: | 1746 | out: |
1201 | raw_spin_unlock_irq(&ctx->lock); | 1747 | raw_spin_unlock_irq(&ctx->lock); |
@@ -1227,6 +1773,7 @@ static void ctx_sched_out(struct perf_event_context *ctx, | |||
1227 | if (likely(!ctx->nr_events)) | 1773 | if (likely(!ctx->nr_events)) |
1228 | goto out; | 1774 | goto out; |
1229 | update_context_time(ctx); | 1775 | update_context_time(ctx); |
1776 | update_cgrp_time_from_cpuctx(cpuctx); | ||
1230 | 1777 | ||
1231 | if (!ctx->nr_active) | 1778 | if (!ctx->nr_active) |
1232 | goto out; | 1779 | goto out; |
@@ -1339,8 +1886,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1339 | } | 1886 | } |
1340 | } | 1887 | } |
1341 | 1888 | ||
1342 | void perf_event_context_sched_out(struct task_struct *task, int ctxn, | 1889 | static void perf_event_context_sched_out(struct task_struct *task, int ctxn, |
1343 | struct task_struct *next) | 1890 | struct task_struct *next) |
1344 | { | 1891 | { |
1345 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; | 1892 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; |
1346 | struct perf_event_context *next_ctx; | 1893 | struct perf_event_context *next_ctx; |
@@ -1416,6 +1963,14 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
1416 | 1963 | ||
1417 | for_each_task_context_nr(ctxn) | 1964 | for_each_task_context_nr(ctxn) |
1418 | perf_event_context_sched_out(task, ctxn, next); | 1965 | perf_event_context_sched_out(task, ctxn, next); |
1966 | |||
1967 | /* | ||
1968 | * if cgroup events exist on this CPU, then we need | ||
1969 | * to check if we have to switch out PMU state. | ||
1970 | * cgroup event are system-wide mode only | ||
1971 | */ | ||
1972 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | ||
1973 | perf_cgroup_sched_out(task); | ||
1419 | } | 1974 | } |
1420 | 1975 | ||
1421 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 1976 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
@@ -1454,6 +2009,10 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, | |||
1454 | if (!event_filter_match(event)) | 2009 | if (!event_filter_match(event)) |
1455 | continue; | 2010 | continue; |
1456 | 2011 | ||
2012 | /* may need to reset tstamp_enabled */ | ||
2013 | if (is_cgroup_event(event)) | ||
2014 | perf_cgroup_mark_enabled(event, ctx); | ||
2015 | |||
1457 | if (group_can_go_on(event, cpuctx, 1)) | 2016 | if (group_can_go_on(event, cpuctx, 1)) |
1458 | group_sched_in(event, cpuctx, ctx); | 2017 | group_sched_in(event, cpuctx, ctx); |
1459 | 2018 | ||
@@ -1486,6 +2045,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
1486 | if (!event_filter_match(event)) | 2045 | if (!event_filter_match(event)) |
1487 | continue; | 2046 | continue; |
1488 | 2047 | ||
2048 | /* may need to reset tstamp_enabled */ | ||
2049 | if (is_cgroup_event(event)) | ||
2050 | perf_cgroup_mark_enabled(event, ctx); | ||
2051 | |||
1489 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | 2052 | if (group_can_go_on(event, cpuctx, can_add_hw)) { |
1490 | if (group_sched_in(event, cpuctx, ctx)) | 2053 | if (group_sched_in(event, cpuctx, ctx)) |
1491 | can_add_hw = 0; | 2054 | can_add_hw = 0; |
@@ -1496,15 +2059,19 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
1496 | static void | 2059 | static void |
1497 | ctx_sched_in(struct perf_event_context *ctx, | 2060 | ctx_sched_in(struct perf_event_context *ctx, |
1498 | struct perf_cpu_context *cpuctx, | 2061 | struct perf_cpu_context *cpuctx, |
1499 | enum event_type_t event_type) | 2062 | enum event_type_t event_type, |
2063 | struct task_struct *task) | ||
1500 | { | 2064 | { |
2065 | u64 now; | ||
2066 | |||
1501 | raw_spin_lock(&ctx->lock); | 2067 | raw_spin_lock(&ctx->lock); |
1502 | ctx->is_active = 1; | 2068 | ctx->is_active = 1; |
1503 | if (likely(!ctx->nr_events)) | 2069 | if (likely(!ctx->nr_events)) |
1504 | goto out; | 2070 | goto out; |
1505 | 2071 | ||
1506 | ctx->timestamp = perf_clock(); | 2072 | now = perf_clock(); |
1507 | 2073 | ctx->timestamp = now; | |
2074 | perf_cgroup_set_timestamp(task, ctx); | ||
1508 | /* | 2075 | /* |
1509 | * First go through the list and put on any pinned groups | 2076 | * First go through the list and put on any pinned groups |
1510 | * in order to give them the best chance of going on. | 2077 | * in order to give them the best chance of going on. |
@@ -1521,11 +2088,12 @@ out: | |||
1521 | } | 2088 | } |
1522 | 2089 | ||
1523 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | 2090 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, |
1524 | enum event_type_t event_type) | 2091 | enum event_type_t event_type, |
2092 | struct task_struct *task) | ||
1525 | { | 2093 | { |
1526 | struct perf_event_context *ctx = &cpuctx->ctx; | 2094 | struct perf_event_context *ctx = &cpuctx->ctx; |
1527 | 2095 | ||
1528 | ctx_sched_in(ctx, cpuctx, event_type); | 2096 | ctx_sched_in(ctx, cpuctx, event_type, task); |
1529 | } | 2097 | } |
1530 | 2098 | ||
1531 | static void task_ctx_sched_in(struct perf_event_context *ctx, | 2099 | static void task_ctx_sched_in(struct perf_event_context *ctx, |
@@ -1533,15 +2101,16 @@ static void task_ctx_sched_in(struct perf_event_context *ctx, | |||
1533 | { | 2101 | { |
1534 | struct perf_cpu_context *cpuctx; | 2102 | struct perf_cpu_context *cpuctx; |
1535 | 2103 | ||
1536 | cpuctx = __get_cpu_context(ctx); | 2104 | cpuctx = __get_cpu_context(ctx); |
1537 | if (cpuctx->task_ctx == ctx) | 2105 | if (cpuctx->task_ctx == ctx) |
1538 | return; | 2106 | return; |
1539 | 2107 | ||
1540 | ctx_sched_in(ctx, cpuctx, event_type); | 2108 | ctx_sched_in(ctx, cpuctx, event_type, NULL); |
1541 | cpuctx->task_ctx = ctx; | 2109 | cpuctx->task_ctx = ctx; |
1542 | } | 2110 | } |
1543 | 2111 | ||
1544 | void perf_event_context_sched_in(struct perf_event_context *ctx) | 2112 | static void perf_event_context_sched_in(struct perf_event_context *ctx, |
2113 | struct task_struct *task) | ||
1545 | { | 2114 | { |
1546 | struct perf_cpu_context *cpuctx; | 2115 | struct perf_cpu_context *cpuctx; |
1547 | 2116 | ||
@@ -1557,9 +2126,9 @@ void perf_event_context_sched_in(struct perf_event_context *ctx) | |||
1557 | */ | 2126 | */ |
1558 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 2127 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
1559 | 2128 | ||
1560 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED); | 2129 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); |
1561 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | 2130 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); |
1562 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | 2131 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); |
1563 | 2132 | ||
1564 | cpuctx->task_ctx = ctx; | 2133 | cpuctx->task_ctx = ctx; |
1565 | 2134 | ||
@@ -1592,14 +2161,17 @@ void __perf_event_task_sched_in(struct task_struct *task) | |||
1592 | if (likely(!ctx)) | 2161 | if (likely(!ctx)) |
1593 | continue; | 2162 | continue; |
1594 | 2163 | ||
1595 | perf_event_context_sched_in(ctx); | 2164 | perf_event_context_sched_in(ctx, task); |
1596 | } | 2165 | } |
2166 | /* | ||
2167 | * if cgroup events exist on this CPU, then we need | ||
2168 | * to check if we have to switch in PMU state. | ||
2169 | * cgroup event are system-wide mode only | ||
2170 | */ | ||
2171 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | ||
2172 | perf_cgroup_sched_in(task); | ||
1597 | } | 2173 | } |
1598 | 2174 | ||
1599 | #define MAX_INTERRUPTS (~0ULL) | ||
1600 | |||
1601 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
1602 | |||
1603 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2175 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
1604 | { | 2176 | { |
1605 | u64 frequency = event->attr.sample_freq; | 2177 | u64 frequency = event->attr.sample_freq; |
@@ -1627,7 +2199,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | |||
1627 | * Reduce accuracy by one bit such that @a and @b converge | 2199 | * Reduce accuracy by one bit such that @a and @b converge |
1628 | * to a similar magnitude. | 2200 | * to a similar magnitude. |
1629 | */ | 2201 | */ |
1630 | #define REDUCE_FLS(a, b) \ | 2202 | #define REDUCE_FLS(a, b) \ |
1631 | do { \ | 2203 | do { \ |
1632 | if (a##_fls > b##_fls) { \ | 2204 | if (a##_fls > b##_fls) { \ |
1633 | a >>= 1; \ | 2205 | a >>= 1; \ |
@@ -1797,7 +2369,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
1797 | if (ctx) | 2369 | if (ctx) |
1798 | rotate_ctx(ctx); | 2370 | rotate_ctx(ctx); |
1799 | 2371 | ||
1800 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | 2372 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); |
1801 | if (ctx) | 2373 | if (ctx) |
1802 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); | 2374 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); |
1803 | 2375 | ||
@@ -1852,6 +2424,14 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) | |||
1852 | if (!ctx || !ctx->nr_events) | 2424 | if (!ctx || !ctx->nr_events) |
1853 | goto out; | 2425 | goto out; |
1854 | 2426 | ||
2427 | /* | ||
2428 | * We must ctxsw out cgroup events to avoid conflict | ||
2429 | * when invoking perf_task_event_sched_in() later on | ||
2430 | * in this function. Otherwise we end up trying to | ||
2431 | * ctxswin cgroup events which are already scheduled | ||
2432 | * in. | ||
2433 | */ | ||
2434 | perf_cgroup_sched_out(current); | ||
1855 | task_ctx_sched_out(ctx, EVENT_ALL); | 2435 | task_ctx_sched_out(ctx, EVENT_ALL); |
1856 | 2436 | ||
1857 | raw_spin_lock(&ctx->lock); | 2437 | raw_spin_lock(&ctx->lock); |
@@ -1876,7 +2456,10 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) | |||
1876 | 2456 | ||
1877 | raw_spin_unlock(&ctx->lock); | 2457 | raw_spin_unlock(&ctx->lock); |
1878 | 2458 | ||
1879 | perf_event_context_sched_in(ctx); | 2459 | /* |
2460 | * Also calls ctxswin for cgroup events, if any: | ||
2461 | */ | ||
2462 | perf_event_context_sched_in(ctx, ctx->task); | ||
1880 | out: | 2463 | out: |
1881 | local_irq_restore(flags); | 2464 | local_irq_restore(flags); |
1882 | } | 2465 | } |
@@ -1901,8 +2484,10 @@ static void __perf_event_read(void *info) | |||
1901 | return; | 2484 | return; |
1902 | 2485 | ||
1903 | raw_spin_lock(&ctx->lock); | 2486 | raw_spin_lock(&ctx->lock); |
1904 | if (ctx->is_active) | 2487 | if (ctx->is_active) { |
1905 | update_context_time(ctx); | 2488 | update_context_time(ctx); |
2489 | update_cgrp_time_from_event(event); | ||
2490 | } | ||
1906 | update_event_times(event); | 2491 | update_event_times(event); |
1907 | if (event->state == PERF_EVENT_STATE_ACTIVE) | 2492 | if (event->state == PERF_EVENT_STATE_ACTIVE) |
1908 | event->pmu->read(event); | 2493 | event->pmu->read(event); |
@@ -1933,8 +2518,10 @@ static u64 perf_event_read(struct perf_event *event) | |||
1933 | * (e.g., thread is blocked), in that case | 2518 | * (e.g., thread is blocked), in that case |
1934 | * we cannot update context time | 2519 | * we cannot update context time |
1935 | */ | 2520 | */ |
1936 | if (ctx->is_active) | 2521 | if (ctx->is_active) { |
1937 | update_context_time(ctx); | 2522 | update_context_time(ctx); |
2523 | update_cgrp_time_from_event(event); | ||
2524 | } | ||
1938 | update_event_times(event); | 2525 | update_event_times(event); |
1939 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2526 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
1940 | } | 2527 | } |
@@ -2213,6 +2800,9 @@ errout: | |||
2213 | 2800 | ||
2214 | } | 2801 | } |
2215 | 2802 | ||
2803 | /* | ||
2804 | * Returns a matching context with refcount and pincount. | ||
2805 | */ | ||
2216 | static struct perf_event_context * | 2806 | static struct perf_event_context * |
2217 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | 2807 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) |
2218 | { | 2808 | { |
@@ -2237,6 +2827,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
2237 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 2827 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
2238 | ctx = &cpuctx->ctx; | 2828 | ctx = &cpuctx->ctx; |
2239 | get_ctx(ctx); | 2829 | get_ctx(ctx); |
2830 | ++ctx->pin_count; | ||
2240 | 2831 | ||
2241 | return ctx; | 2832 | return ctx; |
2242 | } | 2833 | } |
@@ -2250,6 +2841,7 @@ retry: | |||
2250 | ctx = perf_lock_task_context(task, ctxn, &flags); | 2841 | ctx = perf_lock_task_context(task, ctxn, &flags); |
2251 | if (ctx) { | 2842 | if (ctx) { |
2252 | unclone_ctx(ctx); | 2843 | unclone_ctx(ctx); |
2844 | ++ctx->pin_count; | ||
2253 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2845 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
2254 | } | 2846 | } |
2255 | 2847 | ||
@@ -2271,8 +2863,10 @@ retry: | |||
2271 | err = -ESRCH; | 2863 | err = -ESRCH; |
2272 | else if (task->perf_event_ctxp[ctxn]) | 2864 | else if (task->perf_event_ctxp[ctxn]) |
2273 | err = -EAGAIN; | 2865 | err = -EAGAIN; |
2274 | else | 2866 | else { |
2867 | ++ctx->pin_count; | ||
2275 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | 2868 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); |
2869 | } | ||
2276 | mutex_unlock(&task->perf_event_mutex); | 2870 | mutex_unlock(&task->perf_event_mutex); |
2277 | 2871 | ||
2278 | if (unlikely(err)) { | 2872 | if (unlikely(err)) { |
@@ -2312,7 +2906,7 @@ static void free_event(struct perf_event *event) | |||
2312 | 2906 | ||
2313 | if (!event->parent) { | 2907 | if (!event->parent) { |
2314 | if (event->attach_state & PERF_ATTACH_TASK) | 2908 | if (event->attach_state & PERF_ATTACH_TASK) |
2315 | jump_label_dec(&perf_task_events); | 2909 | jump_label_dec(&perf_sched_events); |
2316 | if (event->attr.mmap || event->attr.mmap_data) | 2910 | if (event->attr.mmap || event->attr.mmap_data) |
2317 | atomic_dec(&nr_mmap_events); | 2911 | atomic_dec(&nr_mmap_events); |
2318 | if (event->attr.comm) | 2912 | if (event->attr.comm) |
@@ -2321,6 +2915,10 @@ static void free_event(struct perf_event *event) | |||
2321 | atomic_dec(&nr_task_events); | 2915 | atomic_dec(&nr_task_events); |
2322 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) | 2916 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) |
2323 | put_callchain_buffers(); | 2917 | put_callchain_buffers(); |
2918 | if (is_cgroup_event(event)) { | ||
2919 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | ||
2920 | jump_label_dec(&perf_sched_events); | ||
2921 | } | ||
2324 | } | 2922 | } |
2325 | 2923 | ||
2326 | if (event->buffer) { | 2924 | if (event->buffer) { |
@@ -2328,6 +2926,9 @@ static void free_event(struct perf_event *event) | |||
2328 | event->buffer = NULL; | 2926 | event->buffer = NULL; |
2329 | } | 2927 | } |
2330 | 2928 | ||
2929 | if (is_cgroup_event(event)) | ||
2930 | perf_detach_cgroup(event); | ||
2931 | |||
2331 | if (event->destroy) | 2932 | if (event->destroy) |
2332 | event->destroy(event); | 2933 | event->destroy(event); |
2333 | 2934 | ||
@@ -4395,26 +4996,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
4395 | if (unlikely(!is_sampling_event(event))) | 4996 | if (unlikely(!is_sampling_event(event))) |
4396 | return 0; | 4997 | return 0; |
4397 | 4998 | ||
4398 | if (!throttle) { | 4999 | if (unlikely(hwc->interrupts >= max_samples_per_tick)) { |
4399 | hwc->interrupts++; | 5000 | if (throttle) { |
4400 | } else { | 5001 | hwc->interrupts = MAX_INTERRUPTS; |
4401 | if (hwc->interrupts != MAX_INTERRUPTS) { | 5002 | perf_log_throttle(event, 0); |
4402 | hwc->interrupts++; | ||
4403 | if (HZ * hwc->interrupts > | ||
4404 | (u64)sysctl_perf_event_sample_rate) { | ||
4405 | hwc->interrupts = MAX_INTERRUPTS; | ||
4406 | perf_log_throttle(event, 0); | ||
4407 | ret = 1; | ||
4408 | } | ||
4409 | } else { | ||
4410 | /* | ||
4411 | * Keep re-disabling events even though on the previous | ||
4412 | * pass we disabled it - just in case we raced with a | ||
4413 | * sched-in and the event got enabled again: | ||
4414 | */ | ||
4415 | ret = 1; | 5003 | ret = 1; |
4416 | } | 5004 | } |
4417 | } | 5005 | } else |
5006 | hwc->interrupts++; | ||
4418 | 5007 | ||
4419 | if (event->attr.freq) { | 5008 | if (event->attr.freq) { |
4420 | u64 now = perf_clock(); | 5009 | u64 now = perf_clock(); |
@@ -4556,7 +5145,7 @@ static int perf_exclude_event(struct perf_event *event, | |||
4556 | struct pt_regs *regs) | 5145 | struct pt_regs *regs) |
4557 | { | 5146 | { |
4558 | if (event->hw.state & PERF_HES_STOPPED) | 5147 | if (event->hw.state & PERF_HES_STOPPED) |
4559 | return 0; | 5148 | return 1; |
4560 | 5149 | ||
4561 | if (regs) { | 5150 | if (regs) { |
4562 | if (event->attr.exclude_user && user_mode(regs)) | 5151 | if (event->attr.exclude_user && user_mode(regs)) |
@@ -4912,6 +5501,8 @@ static int perf_tp_event_match(struct perf_event *event, | |||
4912 | struct perf_sample_data *data, | 5501 | struct perf_sample_data *data, |
4913 | struct pt_regs *regs) | 5502 | struct pt_regs *regs) |
4914 | { | 5503 | { |
5504 | if (event->hw.state & PERF_HES_STOPPED) | ||
5505 | return 0; | ||
4915 | /* | 5506 | /* |
4916 | * All tracepoints are from kernel-space. | 5507 | * All tracepoints are from kernel-space. |
4917 | */ | 5508 | */ |
@@ -5051,6 +5642,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
5051 | u64 period; | 5642 | u64 period; |
5052 | 5643 | ||
5053 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); | 5644 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); |
5645 | |||
5646 | if (event->state != PERF_EVENT_STATE_ACTIVE) | ||
5647 | return HRTIMER_NORESTART; | ||
5648 | |||
5054 | event->pmu->read(event); | 5649 | event->pmu->read(event); |
5055 | 5650 | ||
5056 | perf_sample_data_init(&data, 0); | 5651 | perf_sample_data_init(&data, 0); |
@@ -5077,9 +5672,6 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) | |||
5077 | if (!is_sampling_event(event)) | 5672 | if (!is_sampling_event(event)) |
5078 | return; | 5673 | return; |
5079 | 5674 | ||
5080 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
5081 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
5082 | |||
5083 | period = local64_read(&hwc->period_left); | 5675 | period = local64_read(&hwc->period_left); |
5084 | if (period) { | 5676 | if (period) { |
5085 | if (period < 0) | 5677 | if (period < 0) |
@@ -5106,6 +5698,30 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) | |||
5106 | } | 5698 | } |
5107 | } | 5699 | } |
5108 | 5700 | ||
5701 | static void perf_swevent_init_hrtimer(struct perf_event *event) | ||
5702 | { | ||
5703 | struct hw_perf_event *hwc = &event->hw; | ||
5704 | |||
5705 | if (!is_sampling_event(event)) | ||
5706 | return; | ||
5707 | |||
5708 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
5709 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
5710 | |||
5711 | /* | ||
5712 | * Since hrtimers have a fixed rate, we can do a static freq->period | ||
5713 | * mapping and avoid the whole period adjust feedback stuff. | ||
5714 | */ | ||
5715 | if (event->attr.freq) { | ||
5716 | long freq = event->attr.sample_freq; | ||
5717 | |||
5718 | event->attr.sample_period = NSEC_PER_SEC / freq; | ||
5719 | hwc->sample_period = event->attr.sample_period; | ||
5720 | local64_set(&hwc->period_left, hwc->sample_period); | ||
5721 | event->attr.freq = 0; | ||
5722 | } | ||
5723 | } | ||
5724 | |||
5109 | /* | 5725 | /* |
5110 | * Software event: cpu wall time clock | 5726 | * Software event: cpu wall time clock |
5111 | */ | 5727 | */ |
@@ -5158,6 +5774,8 @@ static int cpu_clock_event_init(struct perf_event *event) | |||
5158 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | 5774 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) |
5159 | return -ENOENT; | 5775 | return -ENOENT; |
5160 | 5776 | ||
5777 | perf_swevent_init_hrtimer(event); | ||
5778 | |||
5161 | return 0; | 5779 | return 0; |
5162 | } | 5780 | } |
5163 | 5781 | ||
@@ -5213,16 +5831,9 @@ static void task_clock_event_del(struct perf_event *event, int flags) | |||
5213 | 5831 | ||
5214 | static void task_clock_event_read(struct perf_event *event) | 5832 | static void task_clock_event_read(struct perf_event *event) |
5215 | { | 5833 | { |
5216 | u64 time; | 5834 | u64 now = perf_clock(); |
5217 | 5835 | u64 delta = now - event->ctx->timestamp; | |
5218 | if (!in_nmi()) { | 5836 | u64 time = event->ctx->time + delta; |
5219 | update_context_time(event->ctx); | ||
5220 | time = event->ctx->time; | ||
5221 | } else { | ||
5222 | u64 now = perf_clock(); | ||
5223 | u64 delta = now - event->ctx->timestamp; | ||
5224 | time = event->ctx->time + delta; | ||
5225 | } | ||
5226 | 5837 | ||
5227 | task_clock_event_update(event, time); | 5838 | task_clock_event_update(event, time); |
5228 | } | 5839 | } |
@@ -5235,6 +5846,8 @@ static int task_clock_event_init(struct perf_event *event) | |||
5235 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | 5846 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) |
5236 | return -ENOENT; | 5847 | return -ENOENT; |
5237 | 5848 | ||
5849 | perf_swevent_init_hrtimer(event); | ||
5850 | |||
5238 | return 0; | 5851 | return 0; |
5239 | } | 5852 | } |
5240 | 5853 | ||
@@ -5506,17 +6119,22 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
5506 | { | 6119 | { |
5507 | struct pmu *pmu = NULL; | 6120 | struct pmu *pmu = NULL; |
5508 | int idx; | 6121 | int idx; |
6122 | int ret; | ||
5509 | 6123 | ||
5510 | idx = srcu_read_lock(&pmus_srcu); | 6124 | idx = srcu_read_lock(&pmus_srcu); |
5511 | 6125 | ||
5512 | rcu_read_lock(); | 6126 | rcu_read_lock(); |
5513 | pmu = idr_find(&pmu_idr, event->attr.type); | 6127 | pmu = idr_find(&pmu_idr, event->attr.type); |
5514 | rcu_read_unlock(); | 6128 | rcu_read_unlock(); |
5515 | if (pmu) | 6129 | if (pmu) { |
6130 | ret = pmu->event_init(event); | ||
6131 | if (ret) | ||
6132 | pmu = ERR_PTR(ret); | ||
5516 | goto unlock; | 6133 | goto unlock; |
6134 | } | ||
5517 | 6135 | ||
5518 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 6136 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
5519 | int ret = pmu->event_init(event); | 6137 | ret = pmu->event_init(event); |
5520 | if (!ret) | 6138 | if (!ret) |
5521 | goto unlock; | 6139 | goto unlock; |
5522 | 6140 | ||
@@ -5642,7 +6260,7 @@ done: | |||
5642 | 6260 | ||
5643 | if (!event->parent) { | 6261 | if (!event->parent) { |
5644 | if (event->attach_state & PERF_ATTACH_TASK) | 6262 | if (event->attach_state & PERF_ATTACH_TASK) |
5645 | jump_label_inc(&perf_task_events); | 6263 | jump_label_inc(&perf_sched_events); |
5646 | if (event->attr.mmap || event->attr.mmap_data) | 6264 | if (event->attr.mmap || event->attr.mmap_data) |
5647 | atomic_inc(&nr_mmap_events); | 6265 | atomic_inc(&nr_mmap_events); |
5648 | if (event->attr.comm) | 6266 | if (event->attr.comm) |
@@ -5817,7 +6435,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5817 | int err; | 6435 | int err; |
5818 | 6436 | ||
5819 | /* for future expandability... */ | 6437 | /* for future expandability... */ |
5820 | if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) | 6438 | if (flags & ~PERF_FLAG_ALL) |
5821 | return -EINVAL; | 6439 | return -EINVAL; |
5822 | 6440 | ||
5823 | err = perf_copy_attr(attr_uptr, &attr); | 6441 | err = perf_copy_attr(attr_uptr, &attr); |
@@ -5834,6 +6452,15 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5834 | return -EINVAL; | 6452 | return -EINVAL; |
5835 | } | 6453 | } |
5836 | 6454 | ||
6455 | /* | ||
6456 | * In cgroup mode, the pid argument is used to pass the fd | ||
6457 | * opened to the cgroup directory in cgroupfs. The cpu argument | ||
6458 | * designates the cpu on which to monitor threads from that | ||
6459 | * cgroup. | ||
6460 | */ | ||
6461 | if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) | ||
6462 | return -EINVAL; | ||
6463 | |||
5837 | event_fd = get_unused_fd_flags(O_RDWR); | 6464 | event_fd = get_unused_fd_flags(O_RDWR); |
5838 | if (event_fd < 0) | 6465 | if (event_fd < 0) |
5839 | return event_fd; | 6466 | return event_fd; |
@@ -5851,7 +6478,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5851 | group_leader = NULL; | 6478 | group_leader = NULL; |
5852 | } | 6479 | } |
5853 | 6480 | ||
5854 | if (pid != -1) { | 6481 | if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) { |
5855 | task = find_lively_task_by_vpid(pid); | 6482 | task = find_lively_task_by_vpid(pid); |
5856 | if (IS_ERR(task)) { | 6483 | if (IS_ERR(task)) { |
5857 | err = PTR_ERR(task); | 6484 | err = PTR_ERR(task); |
@@ -5865,6 +6492,19 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5865 | goto err_task; | 6492 | goto err_task; |
5866 | } | 6493 | } |
5867 | 6494 | ||
6495 | if (flags & PERF_FLAG_PID_CGROUP) { | ||
6496 | err = perf_cgroup_connect(pid, event, &attr, group_leader); | ||
6497 | if (err) | ||
6498 | goto err_alloc; | ||
6499 | /* | ||
6500 | * one more event: | ||
6501 | * - that has cgroup constraint on event->cpu | ||
6502 | * - that may need work on context switch | ||
6503 | */ | ||
6504 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); | ||
6505 | jump_label_inc(&perf_sched_events); | ||
6506 | } | ||
6507 | |||
5868 | /* | 6508 | /* |
5869 | * Special case software events and allow them to be part of | 6509 | * Special case software events and allow them to be part of |
5870 | * any hardware group. | 6510 | * any hardware group. |
@@ -5903,6 +6543,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5903 | goto err_alloc; | 6543 | goto err_alloc; |
5904 | } | 6544 | } |
5905 | 6545 | ||
6546 | if (task) { | ||
6547 | put_task_struct(task); | ||
6548 | task = NULL; | ||
6549 | } | ||
6550 | |||
5906 | /* | 6551 | /* |
5907 | * Look up the group leader (we will attach this event to it): | 6552 | * Look up the group leader (we will attach this event to it): |
5908 | */ | 6553 | */ |
@@ -5950,10 +6595,10 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5950 | struct perf_event_context *gctx = group_leader->ctx; | 6595 | struct perf_event_context *gctx = group_leader->ctx; |
5951 | 6596 | ||
5952 | mutex_lock(&gctx->mutex); | 6597 | mutex_lock(&gctx->mutex); |
5953 | perf_event_remove_from_context(group_leader); | 6598 | perf_remove_from_context(group_leader); |
5954 | list_for_each_entry(sibling, &group_leader->sibling_list, | 6599 | list_for_each_entry(sibling, &group_leader->sibling_list, |
5955 | group_entry) { | 6600 | group_entry) { |
5956 | perf_event_remove_from_context(sibling); | 6601 | perf_remove_from_context(sibling); |
5957 | put_ctx(gctx); | 6602 | put_ctx(gctx); |
5958 | } | 6603 | } |
5959 | mutex_unlock(&gctx->mutex); | 6604 | mutex_unlock(&gctx->mutex); |
@@ -5976,6 +6621,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5976 | 6621 | ||
5977 | perf_install_in_context(ctx, event, cpu); | 6622 | perf_install_in_context(ctx, event, cpu); |
5978 | ++ctx->generation; | 6623 | ++ctx->generation; |
6624 | perf_unpin_context(ctx); | ||
5979 | mutex_unlock(&ctx->mutex); | 6625 | mutex_unlock(&ctx->mutex); |
5980 | 6626 | ||
5981 | event->owner = current; | 6627 | event->owner = current; |
@@ -6001,6 +6647,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6001 | return event_fd; | 6647 | return event_fd; |
6002 | 6648 | ||
6003 | err_context: | 6649 | err_context: |
6650 | perf_unpin_context(ctx); | ||
6004 | put_ctx(ctx); | 6651 | put_ctx(ctx); |
6005 | err_alloc: | 6652 | err_alloc: |
6006 | free_event(event); | 6653 | free_event(event); |
@@ -6051,6 +6698,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
6051 | mutex_lock(&ctx->mutex); | 6698 | mutex_lock(&ctx->mutex); |
6052 | perf_install_in_context(ctx, event, cpu); | 6699 | perf_install_in_context(ctx, event, cpu); |
6053 | ++ctx->generation; | 6700 | ++ctx->generation; |
6701 | perf_unpin_context(ctx); | ||
6054 | mutex_unlock(&ctx->mutex); | 6702 | mutex_unlock(&ctx->mutex); |
6055 | 6703 | ||
6056 | return event; | 6704 | return event; |
@@ -6102,17 +6750,20 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
6102 | struct perf_event_context *child_ctx, | 6750 | struct perf_event_context *child_ctx, |
6103 | struct task_struct *child) | 6751 | struct task_struct *child) |
6104 | { | 6752 | { |
6105 | struct perf_event *parent_event; | 6753 | if (child_event->parent) { |
6754 | raw_spin_lock_irq(&child_ctx->lock); | ||
6755 | perf_group_detach(child_event); | ||
6756 | raw_spin_unlock_irq(&child_ctx->lock); | ||
6757 | } | ||
6106 | 6758 | ||
6107 | perf_event_remove_from_context(child_event); | 6759 | perf_remove_from_context(child_event); |
6108 | 6760 | ||
6109 | parent_event = child_event->parent; | ||
6110 | /* | 6761 | /* |
6111 | * It can happen that parent exits first, and has events | 6762 | * It can happen that the parent exits first, and has events |
6112 | * that are still around due to the child reference. These | 6763 | * that are still around due to the child reference. These |
6113 | * events need to be zapped - but otherwise linger. | 6764 | * events need to be zapped. |
6114 | */ | 6765 | */ |
6115 | if (parent_event) { | 6766 | if (child_event->parent) { |
6116 | sync_child_event(child_event, child); | 6767 | sync_child_event(child_event, child); |
6117 | free_event(child_event); | 6768 | free_event(child_event); |
6118 | } | 6769 | } |
@@ -6411,7 +7062,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
6411 | return 0; | 7062 | return 0; |
6412 | } | 7063 | } |
6413 | 7064 | ||
6414 | child_ctx = child->perf_event_ctxp[ctxn]; | 7065 | child_ctx = child->perf_event_ctxp[ctxn]; |
6415 | if (!child_ctx) { | 7066 | if (!child_ctx) { |
6416 | /* | 7067 | /* |
6417 | * This is executed from the parent task context, so | 7068 | * This is executed from the parent task context, so |
@@ -6526,6 +7177,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6526 | mutex_unlock(&parent_ctx->mutex); | 7177 | mutex_unlock(&parent_ctx->mutex); |
6527 | 7178 | ||
6528 | perf_unpin_context(parent_ctx); | 7179 | perf_unpin_context(parent_ctx); |
7180 | put_ctx(parent_ctx); | ||
6529 | 7181 | ||
6530 | return ret; | 7182 | return ret; |
6531 | } | 7183 | } |
@@ -6595,9 +7247,9 @@ static void __perf_event_exit_context(void *__info) | |||
6595 | perf_pmu_rotate_stop(ctx->pmu); | 7247 | perf_pmu_rotate_stop(ctx->pmu); |
6596 | 7248 | ||
6597 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 7249 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
6598 | __perf_event_remove_from_context(event); | 7250 | __perf_remove_from_context(event); |
6599 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | 7251 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) |
6600 | __perf_event_remove_from_context(event); | 7252 | __perf_remove_from_context(event); |
6601 | } | 7253 | } |
6602 | 7254 | ||
6603 | static void perf_event_exit_cpu_context(int cpu) | 7255 | static void perf_event_exit_cpu_context(int cpu) |
@@ -6721,3 +7373,83 @@ unlock: | |||
6721 | return ret; | 7373 | return ret; |
6722 | } | 7374 | } |
6723 | device_initcall(perf_event_sysfs_init); | 7375 | device_initcall(perf_event_sysfs_init); |
7376 | |||
7377 | #ifdef CONFIG_CGROUP_PERF | ||
7378 | static struct cgroup_subsys_state *perf_cgroup_create( | ||
7379 | struct cgroup_subsys *ss, struct cgroup *cont) | ||
7380 | { | ||
7381 | struct perf_cgroup *jc; | ||
7382 | |||
7383 | jc = kzalloc(sizeof(*jc), GFP_KERNEL); | ||
7384 | if (!jc) | ||
7385 | return ERR_PTR(-ENOMEM); | ||
7386 | |||
7387 | jc->info = alloc_percpu(struct perf_cgroup_info); | ||
7388 | if (!jc->info) { | ||
7389 | kfree(jc); | ||
7390 | return ERR_PTR(-ENOMEM); | ||
7391 | } | ||
7392 | |||
7393 | return &jc->css; | ||
7394 | } | ||
7395 | |||
7396 | static void perf_cgroup_destroy(struct cgroup_subsys *ss, | ||
7397 | struct cgroup *cont) | ||
7398 | { | ||
7399 | struct perf_cgroup *jc; | ||
7400 | jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), | ||
7401 | struct perf_cgroup, css); | ||
7402 | free_percpu(jc->info); | ||
7403 | kfree(jc); | ||
7404 | } | ||
7405 | |||
7406 | static int __perf_cgroup_move(void *info) | ||
7407 | { | ||
7408 | struct task_struct *task = info; | ||
7409 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); | ||
7410 | return 0; | ||
7411 | } | ||
7412 | |||
7413 | static void perf_cgroup_move(struct task_struct *task) | ||
7414 | { | ||
7415 | task_function_call(task, __perf_cgroup_move, task); | ||
7416 | } | ||
7417 | |||
7418 | static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
7419 | struct cgroup *old_cgrp, struct task_struct *task, | ||
7420 | bool threadgroup) | ||
7421 | { | ||
7422 | perf_cgroup_move(task); | ||
7423 | if (threadgroup) { | ||
7424 | struct task_struct *c; | ||
7425 | rcu_read_lock(); | ||
7426 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
7427 | perf_cgroup_move(c); | ||
7428 | } | ||
7429 | rcu_read_unlock(); | ||
7430 | } | ||
7431 | } | ||
7432 | |||
7433 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
7434 | struct cgroup *old_cgrp, struct task_struct *task) | ||
7435 | { | ||
7436 | /* | ||
7437 | * cgroup_exit() is called in the copy_process() failure path. | ||
7438 | * Ignore this case since the task hasn't ran yet, this avoids | ||
7439 | * trying to poke a half freed task state from generic code. | ||
7440 | */ | ||
7441 | if (!(task->flags & PF_EXITING)) | ||
7442 | return; | ||
7443 | |||
7444 | perf_cgroup_move(task); | ||
7445 | } | ||
7446 | |||
7447 | struct cgroup_subsys perf_subsys = { | ||
7448 | .name = "perf_event", | ||
7449 | .subsys_id = perf_subsys_id, | ||
7450 | .create = perf_cgroup_create, | ||
7451 | .destroy = perf_cgroup_destroy, | ||
7452 | .exit = perf_cgroup_exit, | ||
7453 | .attach = perf_cgroup_attach, | ||
7454 | }; | ||
7455 | #endif /* CONFIG_CGROUP_PERF */ | ||