diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 1014 |
1 files changed, 687 insertions, 327 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 0f86feb6db0c..e73e53c7582f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
22 | #include <linux/vmstat.h> | 22 | #include <linux/vmstat.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
24 | #include <linux/rculist.h> | 25 | #include <linux/rculist.h> |
25 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
@@ -27,13 +28,15 @@ | |||
27 | #include <linux/anon_inodes.h> | 28 | #include <linux/anon_inodes.h> |
28 | #include <linux/kernel_stat.h> | 29 | #include <linux/kernel_stat.h> |
29 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/ftrace_event.h> | ||
32 | #include <linux/hw_breakpoint.h> | ||
30 | 33 | ||
31 | #include <asm/irq_regs.h> | 34 | #include <asm/irq_regs.h> |
32 | 35 | ||
33 | /* | 36 | /* |
34 | * Each CPU has a list of per CPU events: | 37 | * Each CPU has a list of per CPU events: |
35 | */ | 38 | */ |
36 | DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | 39 | static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); |
37 | 40 | ||
38 | int perf_max_events __read_mostly = 1; | 41 | int perf_max_events __read_mostly = 1; |
39 | static int perf_reserved_percpu __read_mostly; | 42 | static int perf_reserved_percpu __read_mostly; |
@@ -243,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
243 | put_ctx(ctx); | 246 | put_ctx(ctx); |
244 | } | 247 | } |
245 | 248 | ||
249 | static inline u64 perf_clock(void) | ||
250 | { | ||
251 | return cpu_clock(smp_processor_id()); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Update the record of the current time in a context. | ||
256 | */ | ||
257 | static void update_context_time(struct perf_event_context *ctx) | ||
258 | { | ||
259 | u64 now = perf_clock(); | ||
260 | |||
261 | ctx->time += now - ctx->timestamp; | ||
262 | ctx->timestamp = now; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Update the total_time_enabled and total_time_running fields for a event. | ||
267 | */ | ||
268 | static void update_event_times(struct perf_event *event) | ||
269 | { | ||
270 | struct perf_event_context *ctx = event->ctx; | ||
271 | u64 run_end; | ||
272 | |||
273 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
274 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
275 | return; | ||
276 | |||
277 | if (ctx->is_active) | ||
278 | run_end = ctx->time; | ||
279 | else | ||
280 | run_end = event->tstamp_stopped; | ||
281 | |||
282 | event->total_time_enabled = run_end - event->tstamp_enabled; | ||
283 | |||
284 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
285 | run_end = event->tstamp_stopped; | ||
286 | else | ||
287 | run_end = ctx->time; | ||
288 | |||
289 | event->total_time_running = run_end - event->tstamp_running; | ||
290 | } | ||
291 | |||
246 | /* | 292 | /* |
247 | * Add a event from the lists for its context. | 293 | * Add a event from the lists for its context. |
248 | * Must be called with ctx->mutex and ctx->lock held. | 294 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -291,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
291 | if (event->group_leader != event) | 337 | if (event->group_leader != event) |
292 | event->group_leader->nr_siblings--; | 338 | event->group_leader->nr_siblings--; |
293 | 339 | ||
340 | update_event_times(event); | ||
341 | |||
342 | /* | ||
343 | * If event was in error state, then keep it | ||
344 | * that way, otherwise bogus counts will be | ||
345 | * returned on read(). The only way to get out | ||
346 | * of error state is by explicit re-enabling | ||
347 | * of the event | ||
348 | */ | ||
349 | if (event->state > PERF_EVENT_STATE_OFF) | ||
350 | event->state = PERF_EVENT_STATE_OFF; | ||
351 | |||
294 | /* | 352 | /* |
295 | * If this was a group event with sibling events then | 353 | * If this was a group event with sibling events then |
296 | * upgrade the siblings to singleton events by adding them | 354 | * upgrade the siblings to singleton events by adding them |
@@ -418,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event) | |||
418 | if (!task) { | 476 | if (!task) { |
419 | /* | 477 | /* |
420 | * Per cpu events are removed via an smp call and | 478 | * Per cpu events are removed via an smp call and |
421 | * the removal is always sucessful. | 479 | * the removal is always successful. |
422 | */ | 480 | */ |
423 | smp_call_function_single(event->cpu, | 481 | smp_call_function_single(event->cpu, |
424 | __perf_event_remove_from_context, | 482 | __perf_event_remove_from_context, |
@@ -444,50 +502,11 @@ retry: | |||
444 | * can remove the event safely, if the call above did not | 502 | * can remove the event safely, if the call above did not |
445 | * succeed. | 503 | * succeed. |
446 | */ | 504 | */ |
447 | if (!list_empty(&event->group_entry)) { | 505 | if (!list_empty(&event->group_entry)) |
448 | list_del_event(event, ctx); | 506 | list_del_event(event, ctx); |
449 | } | ||
450 | spin_unlock_irq(&ctx->lock); | 507 | spin_unlock_irq(&ctx->lock); |
451 | } | 508 | } |
452 | 509 | ||
453 | static inline u64 perf_clock(void) | ||
454 | { | ||
455 | return cpu_clock(smp_processor_id()); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * Update the record of the current time in a context. | ||
460 | */ | ||
461 | static void update_context_time(struct perf_event_context *ctx) | ||
462 | { | ||
463 | u64 now = perf_clock(); | ||
464 | |||
465 | ctx->time += now - ctx->timestamp; | ||
466 | ctx->timestamp = now; | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * Update the total_time_enabled and total_time_running fields for a event. | ||
471 | */ | ||
472 | static void update_event_times(struct perf_event *event) | ||
473 | { | ||
474 | struct perf_event_context *ctx = event->ctx; | ||
475 | u64 run_end; | ||
476 | |||
477 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
478 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
479 | return; | ||
480 | |||
481 | event->total_time_enabled = ctx->time - event->tstamp_enabled; | ||
482 | |||
483 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
484 | run_end = event->tstamp_stopped; | ||
485 | else | ||
486 | run_end = ctx->time; | ||
487 | |||
488 | event->total_time_running = run_end - event->tstamp_running; | ||
489 | } | ||
490 | |||
491 | /* | 510 | /* |
492 | * Update total_time_enabled and total_time_running for all events in a group. | 511 | * Update total_time_enabled and total_time_running for all events in a group. |
493 | */ | 512 | */ |
@@ -548,7 +567,7 @@ static void __perf_event_disable(void *info) | |||
548 | * is the current context on this CPU and preemption is disabled, | 567 | * is the current context on this CPU and preemption is disabled, |
549 | * hence we can't get into perf_event_task_sched_out for this context. | 568 | * hence we can't get into perf_event_task_sched_out for this context. |
550 | */ | 569 | */ |
551 | static void perf_event_disable(struct perf_event *event) | 570 | void perf_event_disable(struct perf_event *event) |
552 | { | 571 | { |
553 | struct perf_event_context *ctx = event->ctx; | 572 | struct perf_event_context *ctx = event->ctx; |
554 | struct task_struct *task = ctx->task; | 573 | struct task_struct *task = ctx->task; |
@@ -826,7 +845,7 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
826 | if (!task) { | 845 | if (!task) { |
827 | /* | 846 | /* |
828 | * Per cpu events are installed via an smp call and | 847 | * Per cpu events are installed via an smp call and |
829 | * the install is always sucessful. | 848 | * the install is always successful. |
830 | */ | 849 | */ |
831 | smp_call_function_single(cpu, __perf_install_in_context, | 850 | smp_call_function_single(cpu, __perf_install_in_context, |
832 | event, 1); | 851 | event, 1); |
@@ -952,7 +971,7 @@ static void __perf_event_enable(void *info) | |||
952 | * perf_event_for_each_child or perf_event_for_each as described | 971 | * perf_event_for_each_child or perf_event_for_each as described |
953 | * for perf_event_disable. | 972 | * for perf_event_disable. |
954 | */ | 973 | */ |
955 | static void perf_event_enable(struct perf_event *event) | 974 | void perf_event_enable(struct perf_event *event) |
956 | { | 975 | { |
957 | struct perf_event_context *ctx = event->ctx; | 976 | struct perf_event_context *ctx = event->ctx; |
958 | struct task_struct *task = ctx->task; | 977 | struct task_struct *task = ctx->task; |
@@ -1031,12 +1050,8 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1031 | 1050 | ||
1032 | perf_disable(); | 1051 | perf_disable(); |
1033 | if (ctx->nr_active) { | 1052 | if (ctx->nr_active) { |
1034 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1053 | list_for_each_entry(event, &ctx->group_list, group_entry) |
1035 | if (event != event->group_leader) | 1054 | group_sched_out(event, cpuctx, ctx); |
1036 | event_sched_out(event, cpuctx, ctx); | ||
1037 | else | ||
1038 | group_sched_out(event, cpuctx, ctx); | ||
1039 | } | ||
1040 | } | 1055 | } |
1041 | perf_enable(); | 1056 | perf_enable(); |
1042 | out: | 1057 | out: |
@@ -1062,8 +1077,6 @@ static int context_equiv(struct perf_event_context *ctx1, | |||
1062 | && !ctx1->pin_count && !ctx2->pin_count; | 1077 | && !ctx1->pin_count && !ctx2->pin_count; |
1063 | } | 1078 | } |
1064 | 1079 | ||
1065 | static void __perf_event_read(void *event); | ||
1066 | |||
1067 | static void __perf_event_sync_stat(struct perf_event *event, | 1080 | static void __perf_event_sync_stat(struct perf_event *event, |
1068 | struct perf_event *next_event) | 1081 | struct perf_event *next_event) |
1069 | { | 1082 | { |
@@ -1081,8 +1094,8 @@ static void __perf_event_sync_stat(struct perf_event *event, | |||
1081 | */ | 1094 | */ |
1082 | switch (event->state) { | 1095 | switch (event->state) { |
1083 | case PERF_EVENT_STATE_ACTIVE: | 1096 | case PERF_EVENT_STATE_ACTIVE: |
1084 | __perf_event_read(event); | 1097 | event->pmu->read(event); |
1085 | break; | 1098 | /* fall-through */ |
1086 | 1099 | ||
1087 | case PERF_EVENT_STATE_INACTIVE: | 1100 | case PERF_EVENT_STATE_INACTIVE: |
1088 | update_event_times(event); | 1101 | update_event_times(event); |
@@ -1121,6 +1134,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1121 | if (!ctx->nr_stat) | 1134 | if (!ctx->nr_stat) |
1122 | return; | 1135 | return; |
1123 | 1136 | ||
1137 | update_context_time(ctx); | ||
1138 | |||
1124 | event = list_first_entry(&ctx->event_list, | 1139 | event = list_first_entry(&ctx->event_list, |
1125 | struct perf_event, event_entry); | 1140 | struct perf_event, event_entry); |
1126 | 1141 | ||
@@ -1164,8 +1179,6 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1164 | if (likely(!ctx || !cpuctx->task_ctx)) | 1179 | if (likely(!ctx || !cpuctx->task_ctx)) |
1165 | return; | 1180 | return; |
1166 | 1181 | ||
1167 | update_context_time(ctx); | ||
1168 | |||
1169 | rcu_read_lock(); | 1182 | rcu_read_lock(); |
1170 | parent = rcu_dereference(ctx->parent_ctx); | 1183 | parent = rcu_dereference(ctx->parent_ctx); |
1171 | next_ctx = next->perf_event_ctxp; | 1184 | next_ctx = next->perf_event_ctxp; |
@@ -1258,12 +1271,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1258 | if (event->cpu != -1 && event->cpu != cpu) | 1271 | if (event->cpu != -1 && event->cpu != cpu) |
1259 | continue; | 1272 | continue; |
1260 | 1273 | ||
1261 | if (event != event->group_leader) | 1274 | if (group_can_go_on(event, cpuctx, 1)) |
1262 | event_sched_in(event, cpuctx, ctx, cpu); | 1275 | group_sched_in(event, cpuctx, ctx, cpu); |
1263 | else { | ||
1264 | if (group_can_go_on(event, cpuctx, 1)) | ||
1265 | group_sched_in(event, cpuctx, ctx, cpu); | ||
1266 | } | ||
1267 | 1276 | ||
1268 | /* | 1277 | /* |
1269 | * If this pinned group hasn't been scheduled, | 1278 | * If this pinned group hasn't been scheduled, |
@@ -1291,15 +1300,9 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1291 | if (event->cpu != -1 && event->cpu != cpu) | 1300 | if (event->cpu != -1 && event->cpu != cpu) |
1292 | continue; | 1301 | continue; |
1293 | 1302 | ||
1294 | if (event != event->group_leader) { | 1303 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
1295 | if (event_sched_in(event, cpuctx, ctx, cpu)) | 1304 | if (group_sched_in(event, cpuctx, ctx, cpu)) |
1296 | can_add_hw = 0; | 1305 | can_add_hw = 0; |
1297 | } else { | ||
1298 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | ||
1299 | if (group_sched_in(event, cpuctx, ctx, cpu)) | ||
1300 | can_add_hw = 0; | ||
1301 | } | ||
1302 | } | ||
1303 | } | 1306 | } |
1304 | perf_enable(); | 1307 | perf_enable(); |
1305 | out: | 1308 | out: |
@@ -1368,7 +1371,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1368 | u64 interrupts, freq; | 1371 | u64 interrupts, freq; |
1369 | 1372 | ||
1370 | spin_lock(&ctx->lock); | 1373 | spin_lock(&ctx->lock); |
1371 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1374 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
1372 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1375 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
1373 | continue; | 1376 | continue; |
1374 | 1377 | ||
@@ -1528,7 +1531,6 @@ static void __perf_event_read(void *info) | |||
1528 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1531 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1529 | struct perf_event *event = info; | 1532 | struct perf_event *event = info; |
1530 | struct perf_event_context *ctx = event->ctx; | 1533 | struct perf_event_context *ctx = event->ctx; |
1531 | unsigned long flags; | ||
1532 | 1534 | ||
1533 | /* | 1535 | /* |
1534 | * If this is a task context, we need to check whether it is | 1536 | * If this is a task context, we need to check whether it is |
@@ -1540,12 +1542,12 @@ static void __perf_event_read(void *info) | |||
1540 | if (ctx->task && cpuctx->task_ctx != ctx) | 1542 | if (ctx->task && cpuctx->task_ctx != ctx) |
1541 | return; | 1543 | return; |
1542 | 1544 | ||
1543 | local_irq_save(flags); | 1545 | spin_lock(&ctx->lock); |
1544 | if (ctx->is_active) | 1546 | update_context_time(ctx); |
1545 | update_context_time(ctx); | ||
1546 | event->pmu->read(event); | ||
1547 | update_event_times(event); | 1547 | update_event_times(event); |
1548 | local_irq_restore(flags); | 1548 | spin_unlock(&ctx->lock); |
1549 | |||
1550 | event->pmu->read(event); | ||
1549 | } | 1551 | } |
1550 | 1552 | ||
1551 | static u64 perf_event_read(struct perf_event *event) | 1553 | static u64 perf_event_read(struct perf_event *event) |
@@ -1558,7 +1560,13 @@ static u64 perf_event_read(struct perf_event *event) | |||
1558 | smp_call_function_single(event->oncpu, | 1560 | smp_call_function_single(event->oncpu, |
1559 | __perf_event_read, event, 1); | 1561 | __perf_event_read, event, 1); |
1560 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { | 1562 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { |
1563 | struct perf_event_context *ctx = event->ctx; | ||
1564 | unsigned long flags; | ||
1565 | |||
1566 | spin_lock_irqsave(&ctx->lock, flags); | ||
1567 | update_context_time(ctx); | ||
1561 | update_event_times(event); | 1568 | update_event_times(event); |
1569 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
1562 | } | 1570 | } |
1563 | 1571 | ||
1564 | return atomic64_read(&event->count); | 1572 | return atomic64_read(&event->count); |
@@ -1571,7 +1579,6 @@ static void | |||
1571 | __perf_event_init_context(struct perf_event_context *ctx, | 1579 | __perf_event_init_context(struct perf_event_context *ctx, |
1572 | struct task_struct *task) | 1580 | struct task_struct *task) |
1573 | { | 1581 | { |
1574 | memset(ctx, 0, sizeof(*ctx)); | ||
1575 | spin_lock_init(&ctx->lock); | 1582 | spin_lock_init(&ctx->lock); |
1576 | mutex_init(&ctx->mutex); | 1583 | mutex_init(&ctx->mutex); |
1577 | INIT_LIST_HEAD(&ctx->group_list); | 1584 | INIT_LIST_HEAD(&ctx->group_list); |
@@ -1646,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1646 | } | 1653 | } |
1647 | 1654 | ||
1648 | if (!ctx) { | 1655 | if (!ctx) { |
1649 | ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 1656 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
1650 | err = -ENOMEM; | 1657 | err = -ENOMEM; |
1651 | if (!ctx) | 1658 | if (!ctx) |
1652 | goto errout; | 1659 | goto errout; |
@@ -1671,6 +1678,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1671 | return ERR_PTR(err); | 1678 | return ERR_PTR(err); |
1672 | } | 1679 | } |
1673 | 1680 | ||
1681 | static void perf_event_free_filter(struct perf_event *event); | ||
1682 | |||
1674 | static void free_event_rcu(struct rcu_head *head) | 1683 | static void free_event_rcu(struct rcu_head *head) |
1675 | { | 1684 | { |
1676 | struct perf_event *event; | 1685 | struct perf_event *event; |
@@ -1678,6 +1687,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
1678 | event = container_of(head, struct perf_event, rcu_head); | 1687 | event = container_of(head, struct perf_event, rcu_head); |
1679 | if (event->ns) | 1688 | if (event->ns) |
1680 | put_pid_ns(event->ns); | 1689 | put_pid_ns(event->ns); |
1690 | perf_event_free_filter(event); | ||
1681 | kfree(event); | 1691 | kfree(event); |
1682 | } | 1692 | } |
1683 | 1693 | ||
@@ -1709,16 +1719,10 @@ static void free_event(struct perf_event *event) | |||
1709 | call_rcu(&event->rcu_head, free_event_rcu); | 1719 | call_rcu(&event->rcu_head, free_event_rcu); |
1710 | } | 1720 | } |
1711 | 1721 | ||
1712 | /* | 1722 | int perf_event_release_kernel(struct perf_event *event) |
1713 | * Called when the last reference to the file is gone. | ||
1714 | */ | ||
1715 | static int perf_release(struct inode *inode, struct file *file) | ||
1716 | { | 1723 | { |
1717 | struct perf_event *event = file->private_data; | ||
1718 | struct perf_event_context *ctx = event->ctx; | 1724 | struct perf_event_context *ctx = event->ctx; |
1719 | 1725 | ||
1720 | file->private_data = NULL; | ||
1721 | |||
1722 | WARN_ON_ONCE(ctx->parent_ctx); | 1726 | WARN_ON_ONCE(ctx->parent_ctx); |
1723 | mutex_lock(&ctx->mutex); | 1727 | mutex_lock(&ctx->mutex); |
1724 | perf_event_remove_from_context(event); | 1728 | perf_event_remove_from_context(event); |
@@ -1733,6 +1737,19 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1733 | 1737 | ||
1734 | return 0; | 1738 | return 0; |
1735 | } | 1739 | } |
1740 | EXPORT_SYMBOL_GPL(perf_event_release_kernel); | ||
1741 | |||
1742 | /* | ||
1743 | * Called when the last reference to the file is gone. | ||
1744 | */ | ||
1745 | static int perf_release(struct inode *inode, struct file *file) | ||
1746 | { | ||
1747 | struct perf_event *event = file->private_data; | ||
1748 | |||
1749 | file->private_data = NULL; | ||
1750 | |||
1751 | return perf_event_release_kernel(event); | ||
1752 | } | ||
1736 | 1753 | ||
1737 | static int perf_event_read_size(struct perf_event *event) | 1754 | static int perf_event_read_size(struct perf_event *event) |
1738 | { | 1755 | { |
@@ -1759,91 +1776,94 @@ static int perf_event_read_size(struct perf_event *event) | |||
1759 | return size; | 1776 | return size; |
1760 | } | 1777 | } |
1761 | 1778 | ||
1762 | static u64 perf_event_read_value(struct perf_event *event) | 1779 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
1763 | { | 1780 | { |
1764 | struct perf_event *child; | 1781 | struct perf_event *child; |
1765 | u64 total = 0; | 1782 | u64 total = 0; |
1766 | 1783 | ||
1784 | *enabled = 0; | ||
1785 | *running = 0; | ||
1786 | |||
1787 | mutex_lock(&event->child_mutex); | ||
1767 | total += perf_event_read(event); | 1788 | total += perf_event_read(event); |
1768 | list_for_each_entry(child, &event->child_list, child_list) | 1789 | *enabled += event->total_time_enabled + |
1790 | atomic64_read(&event->child_total_time_enabled); | ||
1791 | *running += event->total_time_running + | ||
1792 | atomic64_read(&event->child_total_time_running); | ||
1793 | |||
1794 | list_for_each_entry(child, &event->child_list, child_list) { | ||
1769 | total += perf_event_read(child); | 1795 | total += perf_event_read(child); |
1796 | *enabled += child->total_time_enabled; | ||
1797 | *running += child->total_time_running; | ||
1798 | } | ||
1799 | mutex_unlock(&event->child_mutex); | ||
1770 | 1800 | ||
1771 | return total; | 1801 | return total; |
1772 | } | 1802 | } |
1773 | 1803 | EXPORT_SYMBOL_GPL(perf_event_read_value); | |
1774 | static int perf_event_read_entry(struct perf_event *event, | ||
1775 | u64 read_format, char __user *buf) | ||
1776 | { | ||
1777 | int n = 0, count = 0; | ||
1778 | u64 values[2]; | ||
1779 | |||
1780 | values[n++] = perf_event_read_value(event); | ||
1781 | if (read_format & PERF_FORMAT_ID) | ||
1782 | values[n++] = primary_event_id(event); | ||
1783 | |||
1784 | count = n * sizeof(u64); | ||
1785 | |||
1786 | if (copy_to_user(buf, values, count)) | ||
1787 | return -EFAULT; | ||
1788 | |||
1789 | return count; | ||
1790 | } | ||
1791 | 1804 | ||
1792 | static int perf_event_read_group(struct perf_event *event, | 1805 | static int perf_event_read_group(struct perf_event *event, |
1793 | u64 read_format, char __user *buf) | 1806 | u64 read_format, char __user *buf) |
1794 | { | 1807 | { |
1795 | struct perf_event *leader = event->group_leader, *sub; | 1808 | struct perf_event *leader = event->group_leader, *sub; |
1796 | int n = 0, size = 0, err = -EFAULT; | 1809 | int n = 0, size = 0, ret = -EFAULT; |
1797 | u64 values[3]; | 1810 | struct perf_event_context *ctx = leader->ctx; |
1811 | u64 values[5]; | ||
1812 | u64 count, enabled, running; | ||
1813 | |||
1814 | mutex_lock(&ctx->mutex); | ||
1815 | count = perf_event_read_value(leader, &enabled, &running); | ||
1798 | 1816 | ||
1799 | values[n++] = 1 + leader->nr_siblings; | 1817 | values[n++] = 1 + leader->nr_siblings; |
1800 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1818 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1801 | values[n++] = leader->total_time_enabled + | 1819 | values[n++] = enabled; |
1802 | atomic64_read(&leader->child_total_time_enabled); | 1820 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1803 | } | 1821 | values[n++] = running; |
1804 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 1822 | values[n++] = count; |
1805 | values[n++] = leader->total_time_running + | 1823 | if (read_format & PERF_FORMAT_ID) |
1806 | atomic64_read(&leader->child_total_time_running); | 1824 | values[n++] = primary_event_id(leader); |
1807 | } | ||
1808 | 1825 | ||
1809 | size = n * sizeof(u64); | 1826 | size = n * sizeof(u64); |
1810 | 1827 | ||
1811 | if (copy_to_user(buf, values, size)) | 1828 | if (copy_to_user(buf, values, size)) |
1812 | return -EFAULT; | 1829 | goto unlock; |
1813 | |||
1814 | err = perf_event_read_entry(leader, read_format, buf + size); | ||
1815 | if (err < 0) | ||
1816 | return err; | ||
1817 | 1830 | ||
1818 | size += err; | 1831 | ret = size; |
1819 | 1832 | ||
1820 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 1833 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
1821 | err = perf_event_read_entry(sub, read_format, | 1834 | n = 0; |
1822 | buf + size); | 1835 | |
1823 | if (err < 0) | 1836 | values[n++] = perf_event_read_value(sub, &enabled, &running); |
1824 | return err; | 1837 | if (read_format & PERF_FORMAT_ID) |
1838 | values[n++] = primary_event_id(sub); | ||
1839 | |||
1840 | size = n * sizeof(u64); | ||
1841 | |||
1842 | if (copy_to_user(buf + ret, values, size)) { | ||
1843 | ret = -EFAULT; | ||
1844 | goto unlock; | ||
1845 | } | ||
1825 | 1846 | ||
1826 | size += err; | 1847 | ret += size; |
1827 | } | 1848 | } |
1849 | unlock: | ||
1850 | mutex_unlock(&ctx->mutex); | ||
1828 | 1851 | ||
1829 | return size; | 1852 | return ret; |
1830 | } | 1853 | } |
1831 | 1854 | ||
1832 | static int perf_event_read_one(struct perf_event *event, | 1855 | static int perf_event_read_one(struct perf_event *event, |
1833 | u64 read_format, char __user *buf) | 1856 | u64 read_format, char __user *buf) |
1834 | { | 1857 | { |
1858 | u64 enabled, running; | ||
1835 | u64 values[4]; | 1859 | u64 values[4]; |
1836 | int n = 0; | 1860 | int n = 0; |
1837 | 1861 | ||
1838 | values[n++] = perf_event_read_value(event); | 1862 | values[n++] = perf_event_read_value(event, &enabled, &running); |
1839 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1863 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1840 | values[n++] = event->total_time_enabled + | 1864 | values[n++] = enabled; |
1841 | atomic64_read(&event->child_total_time_enabled); | 1865 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1842 | } | 1866 | values[n++] = running; |
1843 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
1844 | values[n++] = event->total_time_running + | ||
1845 | atomic64_read(&event->child_total_time_running); | ||
1846 | } | ||
1847 | if (read_format & PERF_FORMAT_ID) | 1867 | if (read_format & PERF_FORMAT_ID) |
1848 | values[n++] = primary_event_id(event); | 1868 | values[n++] = primary_event_id(event); |
1849 | 1869 | ||
@@ -1874,12 +1894,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
1874 | return -ENOSPC; | 1894 | return -ENOSPC; |
1875 | 1895 | ||
1876 | WARN_ON_ONCE(event->ctx->parent_ctx); | 1896 | WARN_ON_ONCE(event->ctx->parent_ctx); |
1877 | mutex_lock(&event->child_mutex); | ||
1878 | if (read_format & PERF_FORMAT_GROUP) | 1897 | if (read_format & PERF_FORMAT_GROUP) |
1879 | ret = perf_event_read_group(event, read_format, buf); | 1898 | ret = perf_event_read_group(event, read_format, buf); |
1880 | else | 1899 | else |
1881 | ret = perf_event_read_one(event, read_format, buf); | 1900 | ret = perf_event_read_one(event, read_format, buf); |
1882 | mutex_unlock(&event->child_mutex); | ||
1883 | 1901 | ||
1884 | return ret; | 1902 | return ret; |
1885 | } | 1903 | } |
@@ -1987,7 +2005,8 @@ unlock: | |||
1987 | return ret; | 2005 | return ret; |
1988 | } | 2006 | } |
1989 | 2007 | ||
1990 | int perf_event_set_output(struct perf_event *event, int output_fd); | 2008 | static int perf_event_set_output(struct perf_event *event, int output_fd); |
2009 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | ||
1991 | 2010 | ||
1992 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 2011 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
1993 | { | 2012 | { |
@@ -2015,6 +2034,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
2015 | case PERF_EVENT_IOC_SET_OUTPUT: | 2034 | case PERF_EVENT_IOC_SET_OUTPUT: |
2016 | return perf_event_set_output(event, arg); | 2035 | return perf_event_set_output(event, arg); |
2017 | 2036 | ||
2037 | case PERF_EVENT_IOC_SET_FILTER: | ||
2038 | return perf_event_set_filter(event, (void __user *)arg); | ||
2039 | |||
2018 | default: | 2040 | default: |
2019 | return -ENOTTY; | 2041 | return -ENOTTY; |
2020 | } | 2042 | } |
@@ -2105,49 +2127,31 @@ unlock: | |||
2105 | rcu_read_unlock(); | 2127 | rcu_read_unlock(); |
2106 | } | 2128 | } |
2107 | 2129 | ||
2108 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 2130 | static unsigned long perf_data_size(struct perf_mmap_data *data) |
2109 | { | 2131 | { |
2110 | struct perf_event *event = vma->vm_file->private_data; | 2132 | return data->nr_pages << (PAGE_SHIFT + data->data_order); |
2111 | struct perf_mmap_data *data; | 2133 | } |
2112 | int ret = VM_FAULT_SIGBUS; | ||
2113 | |||
2114 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2115 | if (vmf->pgoff == 0) | ||
2116 | ret = 0; | ||
2117 | return ret; | ||
2118 | } | ||
2119 | |||
2120 | rcu_read_lock(); | ||
2121 | data = rcu_dereference(event->data); | ||
2122 | if (!data) | ||
2123 | goto unlock; | ||
2124 | |||
2125 | if (vmf->pgoff == 0) { | ||
2126 | vmf->page = virt_to_page(data->user_page); | ||
2127 | } else { | ||
2128 | int nr = vmf->pgoff - 1; | ||
2129 | |||
2130 | if ((unsigned)nr > data->nr_pages) | ||
2131 | goto unlock; | ||
2132 | 2134 | ||
2133 | if (vmf->flags & FAULT_FLAG_WRITE) | 2135 | #ifndef CONFIG_PERF_USE_VMALLOC |
2134 | goto unlock; | ||
2135 | 2136 | ||
2136 | vmf->page = virt_to_page(data->data_pages[nr]); | 2137 | /* |
2137 | } | 2138 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. |
2139 | */ | ||
2138 | 2140 | ||
2139 | get_page(vmf->page); | 2141 | static struct page * |
2140 | vmf->page->mapping = vma->vm_file->f_mapping; | 2142 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) |
2141 | vmf->page->index = vmf->pgoff; | 2143 | { |
2144 | if (pgoff > data->nr_pages) | ||
2145 | return NULL; | ||
2142 | 2146 | ||
2143 | ret = 0; | 2147 | if (pgoff == 0) |
2144 | unlock: | 2148 | return virt_to_page(data->user_page); |
2145 | rcu_read_unlock(); | ||
2146 | 2149 | ||
2147 | return ret; | 2150 | return virt_to_page(data->data_pages[pgoff - 1]); |
2148 | } | 2151 | } |
2149 | 2152 | ||
2150 | static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | 2153 | static struct perf_mmap_data * |
2154 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2151 | { | 2155 | { |
2152 | struct perf_mmap_data *data; | 2156 | struct perf_mmap_data *data; |
2153 | unsigned long size; | 2157 | unsigned long size; |
@@ -2172,19 +2176,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
2172 | goto fail_data_pages; | 2176 | goto fail_data_pages; |
2173 | } | 2177 | } |
2174 | 2178 | ||
2179 | data->data_order = 0; | ||
2175 | data->nr_pages = nr_pages; | 2180 | data->nr_pages = nr_pages; |
2176 | atomic_set(&data->lock, -1); | ||
2177 | 2181 | ||
2178 | if (event->attr.watermark) { | 2182 | return data; |
2179 | data->watermark = min_t(long, PAGE_SIZE * nr_pages, | ||
2180 | event->attr.wakeup_watermark); | ||
2181 | } | ||
2182 | if (!data->watermark) | ||
2183 | data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); | ||
2184 | |||
2185 | rcu_assign_pointer(event->data, data); | ||
2186 | |||
2187 | return 0; | ||
2188 | 2183 | ||
2189 | fail_data_pages: | 2184 | fail_data_pages: |
2190 | for (i--; i >= 0; i--) | 2185 | for (i--; i >= 0; i--) |
@@ -2196,7 +2191,7 @@ fail_user_page: | |||
2196 | kfree(data); | 2191 | kfree(data); |
2197 | 2192 | ||
2198 | fail: | 2193 | fail: |
2199 | return -ENOMEM; | 2194 | return NULL; |
2200 | } | 2195 | } |
2201 | 2196 | ||
2202 | static void perf_mmap_free_page(unsigned long addr) | 2197 | static void perf_mmap_free_page(unsigned long addr) |
@@ -2207,28 +2202,170 @@ static void perf_mmap_free_page(unsigned long addr) | |||
2207 | __free_page(page); | 2202 | __free_page(page); |
2208 | } | 2203 | } |
2209 | 2204 | ||
2210 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | 2205 | static void perf_mmap_data_free(struct perf_mmap_data *data) |
2211 | { | 2206 | { |
2212 | struct perf_mmap_data *data; | ||
2213 | int i; | 2207 | int i; |
2214 | 2208 | ||
2215 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2216 | |||
2217 | perf_mmap_free_page((unsigned long)data->user_page); | 2209 | perf_mmap_free_page((unsigned long)data->user_page); |
2218 | for (i = 0; i < data->nr_pages; i++) | 2210 | for (i = 0; i < data->nr_pages; i++) |
2219 | perf_mmap_free_page((unsigned long)data->data_pages[i]); | 2211 | perf_mmap_free_page((unsigned long)data->data_pages[i]); |
2212 | kfree(data); | ||
2213 | } | ||
2214 | |||
2215 | #else | ||
2216 | |||
2217 | /* | ||
2218 | * Back perf_mmap() with vmalloc memory. | ||
2219 | * | ||
2220 | * Required for architectures that have d-cache aliasing issues. | ||
2221 | */ | ||
2222 | |||
2223 | static struct page * | ||
2224 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) | ||
2225 | { | ||
2226 | if (pgoff > (1UL << data->data_order)) | ||
2227 | return NULL; | ||
2228 | |||
2229 | return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); | ||
2230 | } | ||
2231 | |||
2232 | static void perf_mmap_unmark_page(void *addr) | ||
2233 | { | ||
2234 | struct page *page = vmalloc_to_page(addr); | ||
2235 | |||
2236 | page->mapping = NULL; | ||
2237 | } | ||
2238 | |||
2239 | static void perf_mmap_data_free_work(struct work_struct *work) | ||
2240 | { | ||
2241 | struct perf_mmap_data *data; | ||
2242 | void *base; | ||
2243 | int i, nr; | ||
2244 | |||
2245 | data = container_of(work, struct perf_mmap_data, work); | ||
2246 | nr = 1 << data->data_order; | ||
2220 | 2247 | ||
2248 | base = data->user_page; | ||
2249 | for (i = 0; i < nr + 1; i++) | ||
2250 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
2251 | |||
2252 | vfree(base); | ||
2221 | kfree(data); | 2253 | kfree(data); |
2222 | } | 2254 | } |
2223 | 2255 | ||
2224 | static void perf_mmap_data_free(struct perf_event *event) | 2256 | static void perf_mmap_data_free(struct perf_mmap_data *data) |
2257 | { | ||
2258 | schedule_work(&data->work); | ||
2259 | } | ||
2260 | |||
2261 | static struct perf_mmap_data * | ||
2262 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2263 | { | ||
2264 | struct perf_mmap_data *data; | ||
2265 | unsigned long size; | ||
2266 | void *all_buf; | ||
2267 | |||
2268 | WARN_ON(atomic_read(&event->mmap_count)); | ||
2269 | |||
2270 | size = sizeof(struct perf_mmap_data); | ||
2271 | size += sizeof(void *); | ||
2272 | |||
2273 | data = kzalloc(size, GFP_KERNEL); | ||
2274 | if (!data) | ||
2275 | goto fail; | ||
2276 | |||
2277 | INIT_WORK(&data->work, perf_mmap_data_free_work); | ||
2278 | |||
2279 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
2280 | if (!all_buf) | ||
2281 | goto fail_all_buf; | ||
2282 | |||
2283 | data->user_page = all_buf; | ||
2284 | data->data_pages[0] = all_buf + PAGE_SIZE; | ||
2285 | data->data_order = ilog2(nr_pages); | ||
2286 | data->nr_pages = 1; | ||
2287 | |||
2288 | return data; | ||
2289 | |||
2290 | fail_all_buf: | ||
2291 | kfree(data); | ||
2292 | |||
2293 | fail: | ||
2294 | return NULL; | ||
2295 | } | ||
2296 | |||
2297 | #endif | ||
2298 | |||
2299 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
2300 | { | ||
2301 | struct perf_event *event = vma->vm_file->private_data; | ||
2302 | struct perf_mmap_data *data; | ||
2303 | int ret = VM_FAULT_SIGBUS; | ||
2304 | |||
2305 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2306 | if (vmf->pgoff == 0) | ||
2307 | ret = 0; | ||
2308 | return ret; | ||
2309 | } | ||
2310 | |||
2311 | rcu_read_lock(); | ||
2312 | data = rcu_dereference(event->data); | ||
2313 | if (!data) | ||
2314 | goto unlock; | ||
2315 | |||
2316 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) | ||
2317 | goto unlock; | ||
2318 | |||
2319 | vmf->page = perf_mmap_to_page(data, vmf->pgoff); | ||
2320 | if (!vmf->page) | ||
2321 | goto unlock; | ||
2322 | |||
2323 | get_page(vmf->page); | ||
2324 | vmf->page->mapping = vma->vm_file->f_mapping; | ||
2325 | vmf->page->index = vmf->pgoff; | ||
2326 | |||
2327 | ret = 0; | ||
2328 | unlock: | ||
2329 | rcu_read_unlock(); | ||
2330 | |||
2331 | return ret; | ||
2332 | } | ||
2333 | |||
2334 | static void | ||
2335 | perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | ||
2336 | { | ||
2337 | long max_size = perf_data_size(data); | ||
2338 | |||
2339 | atomic_set(&data->lock, -1); | ||
2340 | |||
2341 | if (event->attr.watermark) { | ||
2342 | data->watermark = min_t(long, max_size, | ||
2343 | event->attr.wakeup_watermark); | ||
2344 | } | ||
2345 | |||
2346 | if (!data->watermark) | ||
2347 | data->watermark = max_size / 2; | ||
2348 | |||
2349 | |||
2350 | rcu_assign_pointer(event->data, data); | ||
2351 | } | ||
2352 | |||
2353 | static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) | ||
2354 | { | ||
2355 | struct perf_mmap_data *data; | ||
2356 | |||
2357 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2358 | perf_mmap_data_free(data); | ||
2359 | } | ||
2360 | |||
2361 | static void perf_mmap_data_release(struct perf_event *event) | ||
2225 | { | 2362 | { |
2226 | struct perf_mmap_data *data = event->data; | 2363 | struct perf_mmap_data *data = event->data; |
2227 | 2364 | ||
2228 | WARN_ON(atomic_read(&event->mmap_count)); | 2365 | WARN_ON(atomic_read(&event->mmap_count)); |
2229 | 2366 | ||
2230 | rcu_assign_pointer(event->data, NULL); | 2367 | rcu_assign_pointer(event->data, NULL); |
2231 | call_rcu(&data->rcu_head, __perf_mmap_data_free); | 2368 | call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); |
2232 | } | 2369 | } |
2233 | 2370 | ||
2234 | static void perf_mmap_open(struct vm_area_struct *vma) | 2371 | static void perf_mmap_open(struct vm_area_struct *vma) |
@@ -2244,11 +2381,12 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
2244 | 2381 | ||
2245 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2382 | WARN_ON_ONCE(event->ctx->parent_ctx); |
2246 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 2383 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
2384 | unsigned long size = perf_data_size(event->data); | ||
2247 | struct user_struct *user = current_user(); | 2385 | struct user_struct *user = current_user(); |
2248 | 2386 | ||
2249 | atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); | 2387 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
2250 | vma->vm_mm->locked_vm -= event->data->nr_locked; | 2388 | vma->vm_mm->locked_vm -= event->data->nr_locked; |
2251 | perf_mmap_data_free(event); | 2389 | perf_mmap_data_release(event); |
2252 | mutex_unlock(&event->mmap_mutex); | 2390 | mutex_unlock(&event->mmap_mutex); |
2253 | } | 2391 | } |
2254 | } | 2392 | } |
@@ -2266,6 +2404,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2266 | unsigned long user_locked, user_lock_limit; | 2404 | unsigned long user_locked, user_lock_limit; |
2267 | struct user_struct *user = current_user(); | 2405 | struct user_struct *user = current_user(); |
2268 | unsigned long locked, lock_limit; | 2406 | unsigned long locked, lock_limit; |
2407 | struct perf_mmap_data *data; | ||
2269 | unsigned long vma_size; | 2408 | unsigned long vma_size; |
2270 | unsigned long nr_pages; | 2409 | unsigned long nr_pages; |
2271 | long user_extra, extra; | 2410 | long user_extra, extra; |
@@ -2328,10 +2467,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2328 | } | 2467 | } |
2329 | 2468 | ||
2330 | WARN_ON(event->data); | 2469 | WARN_ON(event->data); |
2331 | ret = perf_mmap_data_alloc(event, nr_pages); | 2470 | |
2332 | if (ret) | 2471 | data = perf_mmap_data_alloc(event, nr_pages); |
2472 | ret = -ENOMEM; | ||
2473 | if (!data) | ||
2333 | goto unlock; | 2474 | goto unlock; |
2334 | 2475 | ||
2476 | ret = 0; | ||
2477 | perf_mmap_data_init(event, data); | ||
2478 | |||
2335 | atomic_set(&event->mmap_count, 1); | 2479 | atomic_set(&event->mmap_count, 1); |
2336 | atomic_long_add(user_extra, &user->locked_vm); | 2480 | atomic_long_add(user_extra, &user->locked_vm); |
2337 | vma->vm_mm->locked_vm += extra; | 2481 | vma->vm_mm->locked_vm += extra; |
@@ -2519,7 +2663,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, | |||
2519 | if (!data->writable) | 2663 | if (!data->writable) |
2520 | return true; | 2664 | return true; |
2521 | 2665 | ||
2522 | mask = (data->nr_pages << PAGE_SHIFT) - 1; | 2666 | mask = perf_data_size(data) - 1; |
2523 | 2667 | ||
2524 | offset = (offset - tail) & mask; | 2668 | offset = (offset - tail) & mask; |
2525 | head = (head - tail) & mask; | 2669 | head = (head - tail) & mask; |
@@ -2558,20 +2702,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
2558 | static void perf_output_lock(struct perf_output_handle *handle) | 2702 | static void perf_output_lock(struct perf_output_handle *handle) |
2559 | { | 2703 | { |
2560 | struct perf_mmap_data *data = handle->data; | 2704 | struct perf_mmap_data *data = handle->data; |
2561 | int cpu; | 2705 | int cur, cpu = get_cpu(); |
2562 | 2706 | ||
2563 | handle->locked = 0; | 2707 | handle->locked = 0; |
2564 | 2708 | ||
2565 | local_irq_save(handle->flags); | 2709 | for (;;) { |
2566 | cpu = smp_processor_id(); | 2710 | cur = atomic_cmpxchg(&data->lock, -1, cpu); |
2567 | 2711 | if (cur == -1) { | |
2568 | if (in_nmi() && atomic_read(&data->lock) == cpu) | 2712 | handle->locked = 1; |
2569 | return; | 2713 | break; |
2714 | } | ||
2715 | if (cur == cpu) | ||
2716 | break; | ||
2570 | 2717 | ||
2571 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
2572 | cpu_relax(); | 2718 | cpu_relax(); |
2573 | 2719 | } | |
2574 | handle->locked = 1; | ||
2575 | } | 2720 | } |
2576 | 2721 | ||
2577 | static void perf_output_unlock(struct perf_output_handle *handle) | 2722 | static void perf_output_unlock(struct perf_output_handle *handle) |
@@ -2617,14 +2762,14 @@ again: | |||
2617 | if (atomic_xchg(&data->wakeup, 0)) | 2762 | if (atomic_xchg(&data->wakeup, 0)) |
2618 | perf_output_wakeup(handle); | 2763 | perf_output_wakeup(handle); |
2619 | out: | 2764 | out: |
2620 | local_irq_restore(handle->flags); | 2765 | put_cpu(); |
2621 | } | 2766 | } |
2622 | 2767 | ||
2623 | void perf_output_copy(struct perf_output_handle *handle, | 2768 | void perf_output_copy(struct perf_output_handle *handle, |
2624 | const void *buf, unsigned int len) | 2769 | const void *buf, unsigned int len) |
2625 | { | 2770 | { |
2626 | unsigned int pages_mask; | 2771 | unsigned int pages_mask; |
2627 | unsigned int offset; | 2772 | unsigned long offset; |
2628 | unsigned int size; | 2773 | unsigned int size; |
2629 | void **pages; | 2774 | void **pages; |
2630 | 2775 | ||
@@ -2633,12 +2778,14 @@ void perf_output_copy(struct perf_output_handle *handle, | |||
2633 | pages = handle->data->data_pages; | 2778 | pages = handle->data->data_pages; |
2634 | 2779 | ||
2635 | do { | 2780 | do { |
2636 | unsigned int page_offset; | 2781 | unsigned long page_offset; |
2782 | unsigned long page_size; | ||
2637 | int nr; | 2783 | int nr; |
2638 | 2784 | ||
2639 | nr = (offset >> PAGE_SHIFT) & pages_mask; | 2785 | nr = (offset >> PAGE_SHIFT) & pages_mask; |
2640 | page_offset = offset & (PAGE_SIZE - 1); | 2786 | page_size = 1UL << (handle->data->data_order + PAGE_SHIFT); |
2641 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | 2787 | page_offset = offset & (page_size - 1); |
2788 | size = min_t(unsigned int, page_size - page_offset, len); | ||
2642 | 2789 | ||
2643 | memcpy(pages[nr] + page_offset, buf, size); | 2790 | memcpy(pages[nr] + page_offset, buf, size); |
2644 | 2791 | ||
@@ -3126,15 +3273,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
3126 | { | 3273 | { |
3127 | struct perf_event *event; | 3274 | struct perf_event *event; |
3128 | 3275 | ||
3129 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3130 | return; | ||
3131 | |||
3132 | rcu_read_lock(); | ||
3133 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3276 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3134 | if (perf_event_task_match(event)) | 3277 | if (perf_event_task_match(event)) |
3135 | perf_event_task_output(event, task_event); | 3278 | perf_event_task_output(event, task_event); |
3136 | } | 3279 | } |
3137 | rcu_read_unlock(); | ||
3138 | } | 3280 | } |
3139 | 3281 | ||
3140 | static void perf_event_task_event(struct perf_task_event *task_event) | 3282 | static void perf_event_task_event(struct perf_task_event *task_event) |
@@ -3142,11 +3284,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3142 | struct perf_cpu_context *cpuctx; | 3284 | struct perf_cpu_context *cpuctx; |
3143 | struct perf_event_context *ctx = task_event->task_ctx; | 3285 | struct perf_event_context *ctx = task_event->task_ctx; |
3144 | 3286 | ||
3287 | rcu_read_lock(); | ||
3145 | cpuctx = &get_cpu_var(perf_cpu_context); | 3288 | cpuctx = &get_cpu_var(perf_cpu_context); |
3146 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3289 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3147 | put_cpu_var(perf_cpu_context); | 3290 | put_cpu_var(perf_cpu_context); |
3148 | 3291 | ||
3149 | rcu_read_lock(); | ||
3150 | if (!ctx) | 3292 | if (!ctx) |
3151 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); | 3293 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); |
3152 | if (ctx) | 3294 | if (ctx) |
@@ -3238,15 +3380,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx, | |||
3238 | { | 3380 | { |
3239 | struct perf_event *event; | 3381 | struct perf_event *event; |
3240 | 3382 | ||
3241 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3242 | return; | ||
3243 | |||
3244 | rcu_read_lock(); | ||
3245 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3383 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3246 | if (perf_event_comm_match(event)) | 3384 | if (perf_event_comm_match(event)) |
3247 | perf_event_comm_output(event, comm_event); | 3385 | perf_event_comm_output(event, comm_event); |
3248 | } | 3386 | } |
3249 | rcu_read_unlock(); | ||
3250 | } | 3387 | } |
3251 | 3388 | ||
3252 | static void perf_event_comm_event(struct perf_comm_event *comm_event) | 3389 | static void perf_event_comm_event(struct perf_comm_event *comm_event) |
@@ -3257,7 +3394,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3257 | char comm[TASK_COMM_LEN]; | 3394 | char comm[TASK_COMM_LEN]; |
3258 | 3395 | ||
3259 | memset(comm, 0, sizeof(comm)); | 3396 | memset(comm, 0, sizeof(comm)); |
3260 | strncpy(comm, comm_event->task->comm, sizeof(comm)); | 3397 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); |
3261 | size = ALIGN(strlen(comm)+1, sizeof(u64)); | 3398 | size = ALIGN(strlen(comm)+1, sizeof(u64)); |
3262 | 3399 | ||
3263 | comm_event->comm = comm; | 3400 | comm_event->comm = comm; |
@@ -3265,11 +3402,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3265 | 3402 | ||
3266 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 3403 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3267 | 3404 | ||
3405 | rcu_read_lock(); | ||
3268 | cpuctx = &get_cpu_var(perf_cpu_context); | 3406 | cpuctx = &get_cpu_var(perf_cpu_context); |
3269 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3407 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3270 | put_cpu_var(perf_cpu_context); | 3408 | put_cpu_var(perf_cpu_context); |
3271 | 3409 | ||
3272 | rcu_read_lock(); | ||
3273 | /* | 3410 | /* |
3274 | * doesn't really matter which of the child contexts the | 3411 | * doesn't really matter which of the child contexts the |
3275 | * events ends up in. | 3412 | * events ends up in. |
@@ -3362,15 +3499,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx, | |||
3362 | { | 3499 | { |
3363 | struct perf_event *event; | 3500 | struct perf_event *event; |
3364 | 3501 | ||
3365 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3366 | return; | ||
3367 | |||
3368 | rcu_read_lock(); | ||
3369 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3502 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3370 | if (perf_event_mmap_match(event, mmap_event)) | 3503 | if (perf_event_mmap_match(event, mmap_event)) |
3371 | perf_event_mmap_output(event, mmap_event); | 3504 | perf_event_mmap_output(event, mmap_event); |
3372 | } | 3505 | } |
3373 | rcu_read_unlock(); | ||
3374 | } | 3506 | } |
3375 | 3507 | ||
3376 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | 3508 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) |
@@ -3426,11 +3558,11 @@ got_name: | |||
3426 | 3558 | ||
3427 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 3559 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
3428 | 3560 | ||
3561 | rcu_read_lock(); | ||
3429 | cpuctx = &get_cpu_var(perf_cpu_context); | 3562 | cpuctx = &get_cpu_var(perf_cpu_context); |
3430 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); | 3563 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); |
3431 | put_cpu_var(perf_cpu_context); | 3564 | put_cpu_var(perf_cpu_context); |
3432 | 3565 | ||
3433 | rcu_read_lock(); | ||
3434 | /* | 3566 | /* |
3435 | * doesn't really matter which of the child contexts the | 3567 | * doesn't really matter which of the child contexts the |
3436 | * events ends up in. | 3568 | * events ends up in. |
@@ -3569,7 +3701,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3569 | perf_event_disable(event); | 3701 | perf_event_disable(event); |
3570 | } | 3702 | } |
3571 | 3703 | ||
3572 | perf_event_output(event, nmi, data, regs); | 3704 | if (event->overflow_handler) |
3705 | event->overflow_handler(event, nmi, data, regs); | ||
3706 | else | ||
3707 | perf_event_output(event, nmi, data, regs); | ||
3708 | |||
3573 | return ret; | 3709 | return ret; |
3574 | } | 3710 | } |
3575 | 3711 | ||
@@ -3614,16 +3750,16 @@ again: | |||
3614 | return nr; | 3750 | return nr; |
3615 | } | 3751 | } |
3616 | 3752 | ||
3617 | static void perf_swevent_overflow(struct perf_event *event, | 3753 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, |
3618 | int nmi, struct perf_sample_data *data, | 3754 | int nmi, struct perf_sample_data *data, |
3619 | struct pt_regs *regs) | 3755 | struct pt_regs *regs) |
3620 | { | 3756 | { |
3621 | struct hw_perf_event *hwc = &event->hw; | 3757 | struct hw_perf_event *hwc = &event->hw; |
3622 | int throttle = 0; | 3758 | int throttle = 0; |
3623 | u64 overflow; | ||
3624 | 3759 | ||
3625 | data->period = event->hw.last_period; | 3760 | data->period = event->hw.last_period; |
3626 | overflow = perf_swevent_set_period(event); | 3761 | if (!overflow) |
3762 | overflow = perf_swevent_set_period(event); | ||
3627 | 3763 | ||
3628 | if (hwc->interrupts == MAX_INTERRUPTS) | 3764 | if (hwc->interrupts == MAX_INTERRUPTS) |
3629 | return; | 3765 | return; |
@@ -3656,14 +3792,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
3656 | 3792 | ||
3657 | atomic64_add(nr, &event->count); | 3793 | atomic64_add(nr, &event->count); |
3658 | 3794 | ||
3795 | if (!regs) | ||
3796 | return; | ||
3797 | |||
3659 | if (!hwc->sample_period) | 3798 | if (!hwc->sample_period) |
3660 | return; | 3799 | return; |
3661 | 3800 | ||
3662 | if (!regs) | 3801 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
3802 | return perf_swevent_overflow(event, 1, nmi, data, regs); | ||
3803 | |||
3804 | if (atomic64_add_negative(nr, &hwc->period_left)) | ||
3663 | return; | 3805 | return; |
3664 | 3806 | ||
3665 | if (!atomic64_add_negative(nr, &hwc->period_left)) | 3807 | perf_swevent_overflow(event, 0, nmi, data, regs); |
3666 | perf_swevent_overflow(event, nmi, data, regs); | ||
3667 | } | 3808 | } |
3668 | 3809 | ||
3669 | static int perf_swevent_is_counting(struct perf_event *event) | 3810 | static int perf_swevent_is_counting(struct perf_event *event) |
@@ -3696,25 +3837,44 @@ static int perf_swevent_is_counting(struct perf_event *event) | |||
3696 | return 1; | 3837 | return 1; |
3697 | } | 3838 | } |
3698 | 3839 | ||
3840 | static int perf_tp_event_match(struct perf_event *event, | ||
3841 | struct perf_sample_data *data); | ||
3842 | |||
3843 | static int perf_exclude_event(struct perf_event *event, | ||
3844 | struct pt_regs *regs) | ||
3845 | { | ||
3846 | if (regs) { | ||
3847 | if (event->attr.exclude_user && user_mode(regs)) | ||
3848 | return 1; | ||
3849 | |||
3850 | if (event->attr.exclude_kernel && !user_mode(regs)) | ||
3851 | return 1; | ||
3852 | } | ||
3853 | |||
3854 | return 0; | ||
3855 | } | ||
3856 | |||
3699 | static int perf_swevent_match(struct perf_event *event, | 3857 | static int perf_swevent_match(struct perf_event *event, |
3700 | enum perf_type_id type, | 3858 | enum perf_type_id type, |
3701 | u32 event_id, struct pt_regs *regs) | 3859 | u32 event_id, |
3860 | struct perf_sample_data *data, | ||
3861 | struct pt_regs *regs) | ||
3702 | { | 3862 | { |
3703 | if (!perf_swevent_is_counting(event)) | 3863 | if (!perf_swevent_is_counting(event)) |
3704 | return 0; | 3864 | return 0; |
3705 | 3865 | ||
3706 | if (event->attr.type != type) | 3866 | if (event->attr.type != type) |
3707 | return 0; | 3867 | return 0; |
3868 | |||
3708 | if (event->attr.config != event_id) | 3869 | if (event->attr.config != event_id) |
3709 | return 0; | 3870 | return 0; |
3710 | 3871 | ||
3711 | if (regs) { | 3872 | if (perf_exclude_event(event, regs)) |
3712 | if (event->attr.exclude_user && user_mode(regs)) | 3873 | return 0; |
3713 | return 0; | ||
3714 | 3874 | ||
3715 | if (event->attr.exclude_kernel && !user_mode(regs)) | 3875 | if (event->attr.type == PERF_TYPE_TRACEPOINT && |
3716 | return 0; | 3876 | !perf_tp_event_match(event, data)) |
3717 | } | 3877 | return 0; |
3718 | 3878 | ||
3719 | return 1; | 3879 | return 1; |
3720 | } | 3880 | } |
@@ -3727,49 +3887,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, | |||
3727 | { | 3887 | { |
3728 | struct perf_event *event; | 3888 | struct perf_event *event; |
3729 | 3889 | ||
3730 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3731 | return; | ||
3732 | |||
3733 | rcu_read_lock(); | ||
3734 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3890 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3735 | if (perf_swevent_match(event, type, event_id, regs)) | 3891 | if (perf_swevent_match(event, type, event_id, data, regs)) |
3736 | perf_swevent_add(event, nr, nmi, data, regs); | 3892 | perf_swevent_add(event, nr, nmi, data, regs); |
3737 | } | 3893 | } |
3738 | rcu_read_unlock(); | ||
3739 | } | 3894 | } |
3740 | 3895 | ||
3741 | static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) | 3896 | int perf_swevent_get_recursion_context(void) |
3742 | { | 3897 | { |
3898 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | ||
3899 | int rctx; | ||
3900 | |||
3743 | if (in_nmi()) | 3901 | if (in_nmi()) |
3744 | return &cpuctx->recursion[3]; | 3902 | rctx = 3; |
3903 | else if (in_irq()) | ||
3904 | rctx = 2; | ||
3905 | else if (in_softirq()) | ||
3906 | rctx = 1; | ||
3907 | else | ||
3908 | rctx = 0; | ||
3745 | 3909 | ||
3746 | if (in_irq()) | 3910 | if (cpuctx->recursion[rctx]) { |
3747 | return &cpuctx->recursion[2]; | 3911 | put_cpu_var(perf_cpu_context); |
3912 | return -1; | ||
3913 | } | ||
3748 | 3914 | ||
3749 | if (in_softirq()) | 3915 | cpuctx->recursion[rctx]++; |
3750 | return &cpuctx->recursion[1]; | 3916 | barrier(); |
3751 | 3917 | ||
3752 | return &cpuctx->recursion[0]; | 3918 | return rctx; |
3753 | } | 3919 | } |
3920 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | ||
3921 | |||
3922 | void perf_swevent_put_recursion_context(int rctx) | ||
3923 | { | ||
3924 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3925 | barrier(); | ||
3926 | cpuctx->recursion[rctx]--; | ||
3927 | put_cpu_var(perf_cpu_context); | ||
3928 | } | ||
3929 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | ||
3754 | 3930 | ||
3755 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | 3931 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
3756 | u64 nr, int nmi, | 3932 | u64 nr, int nmi, |
3757 | struct perf_sample_data *data, | 3933 | struct perf_sample_data *data, |
3758 | struct pt_regs *regs) | 3934 | struct pt_regs *regs) |
3759 | { | 3935 | { |
3760 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 3936 | struct perf_cpu_context *cpuctx; |
3761 | int *recursion = perf_swevent_recursion_context(cpuctx); | ||
3762 | struct perf_event_context *ctx; | 3937 | struct perf_event_context *ctx; |
3763 | 3938 | ||
3764 | if (*recursion) | 3939 | cpuctx = &__get_cpu_var(perf_cpu_context); |
3765 | goto out; | 3940 | rcu_read_lock(); |
3766 | |||
3767 | (*recursion)++; | ||
3768 | barrier(); | ||
3769 | |||
3770 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, | 3941 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, |
3771 | nr, nmi, data, regs); | 3942 | nr, nmi, data, regs); |
3772 | rcu_read_lock(); | ||
3773 | /* | 3943 | /* |
3774 | * doesn't really matter which of the child contexts the | 3944 | * doesn't really matter which of the child contexts the |
3775 | * events ends up in. | 3945 | * events ends up in. |
@@ -3778,23 +3948,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
3778 | if (ctx) | 3948 | if (ctx) |
3779 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); | 3949 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); |
3780 | rcu_read_unlock(); | 3950 | rcu_read_unlock(); |
3781 | |||
3782 | barrier(); | ||
3783 | (*recursion)--; | ||
3784 | |||
3785 | out: | ||
3786 | put_cpu_var(perf_cpu_context); | ||
3787 | } | 3951 | } |
3788 | 3952 | ||
3789 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 3953 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
3790 | struct pt_regs *regs, u64 addr) | 3954 | struct pt_regs *regs, u64 addr) |
3791 | { | 3955 | { |
3792 | struct perf_sample_data data = { | 3956 | struct perf_sample_data data; |
3793 | .addr = addr, | 3957 | int rctx; |
3794 | }; | ||
3795 | 3958 | ||
3796 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, | 3959 | rctx = perf_swevent_get_recursion_context(); |
3797 | &data, regs); | 3960 | if (rctx < 0) |
3961 | return; | ||
3962 | |||
3963 | data.addr = addr; | ||
3964 | data.raw = NULL; | ||
3965 | |||
3966 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | ||
3967 | |||
3968 | perf_swevent_put_recursion_context(rctx); | ||
3798 | } | 3969 | } |
3799 | 3970 | ||
3800 | static void perf_swevent_read(struct perf_event *event) | 3971 | static void perf_swevent_read(struct perf_event *event) |
@@ -3839,6 +4010,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3839 | event->pmu->read(event); | 4010 | event->pmu->read(event); |
3840 | 4011 | ||
3841 | data.addr = 0; | 4012 | data.addr = 0; |
4013 | data.raw = NULL; | ||
4014 | data.period = event->hw.last_period; | ||
3842 | regs = get_irq_regs(); | 4015 | regs = get_irq_regs(); |
3843 | /* | 4016 | /* |
3844 | * In case we exclude kernel IPs or are somehow not in interrupt | 4017 | * In case we exclude kernel IPs or are somehow not in interrupt |
@@ -3849,8 +4022,9 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3849 | regs = task_pt_regs(current); | 4022 | regs = task_pt_regs(current); |
3850 | 4023 | ||
3851 | if (regs) { | 4024 | if (regs) { |
3852 | if (perf_event_overflow(event, 0, &data, regs)) | 4025 | if (!(event->attr.exclude_idle && current->pid == 0)) |
3853 | ret = HRTIMER_NORESTART; | 4026 | if (perf_event_overflow(event, 0, &data, regs)) |
4027 | ret = HRTIMER_NORESTART; | ||
3854 | } | 4028 | } |
3855 | 4029 | ||
3856 | period = max_t(u64, 10000, event->hw.sample_period); | 4030 | period = max_t(u64, 10000, event->hw.sample_period); |
@@ -3859,6 +4033,42 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3859 | return ret; | 4033 | return ret; |
3860 | } | 4034 | } |
3861 | 4035 | ||
4036 | static void perf_swevent_start_hrtimer(struct perf_event *event) | ||
4037 | { | ||
4038 | struct hw_perf_event *hwc = &event->hw; | ||
4039 | |||
4040 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
4041 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4042 | if (hwc->sample_period) { | ||
4043 | u64 period; | ||
4044 | |||
4045 | if (hwc->remaining) { | ||
4046 | if (hwc->remaining < 0) | ||
4047 | period = 10000; | ||
4048 | else | ||
4049 | period = hwc->remaining; | ||
4050 | hwc->remaining = 0; | ||
4051 | } else { | ||
4052 | period = max_t(u64, 10000, hwc->sample_period); | ||
4053 | } | ||
4054 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4055 | ns_to_ktime(period), 0, | ||
4056 | HRTIMER_MODE_REL, 0); | ||
4057 | } | ||
4058 | } | ||
4059 | |||
4060 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | ||
4061 | { | ||
4062 | struct hw_perf_event *hwc = &event->hw; | ||
4063 | |||
4064 | if (hwc->sample_period) { | ||
4065 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4066 | hwc->remaining = ktime_to_ns(remaining); | ||
4067 | |||
4068 | hrtimer_cancel(&hwc->hrtimer); | ||
4069 | } | ||
4070 | } | ||
4071 | |||
3862 | /* | 4072 | /* |
3863 | * Software event: cpu wall time clock | 4073 | * Software event: cpu wall time clock |
3864 | */ | 4074 | */ |
@@ -3870,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event) | |||
3870 | u64 now; | 4080 | u64 now; |
3871 | 4081 | ||
3872 | now = cpu_clock(cpu); | 4082 | now = cpu_clock(cpu); |
3873 | prev = atomic64_read(&event->hw.prev_count); | 4083 | prev = atomic64_xchg(&event->hw.prev_count, now); |
3874 | atomic64_set(&event->hw.prev_count, now); | ||
3875 | atomic64_add(now - prev, &event->count); | 4084 | atomic64_add(now - prev, &event->count); |
3876 | } | 4085 | } |
3877 | 4086 | ||
@@ -3881,22 +4090,14 @@ static int cpu_clock_perf_event_enable(struct perf_event *event) | |||
3881 | int cpu = raw_smp_processor_id(); | 4090 | int cpu = raw_smp_processor_id(); |
3882 | 4091 | ||
3883 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); | 4092 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); |
3884 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4093 | perf_swevent_start_hrtimer(event); |
3885 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
3886 | if (hwc->sample_period) { | ||
3887 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3888 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3889 | ns_to_ktime(period), 0, | ||
3890 | HRTIMER_MODE_REL, 0); | ||
3891 | } | ||
3892 | 4094 | ||
3893 | return 0; | 4095 | return 0; |
3894 | } | 4096 | } |
3895 | 4097 | ||
3896 | static void cpu_clock_perf_event_disable(struct perf_event *event) | 4098 | static void cpu_clock_perf_event_disable(struct perf_event *event) |
3897 | { | 4099 | { |
3898 | if (event->hw.sample_period) | 4100 | perf_swevent_cancel_hrtimer(event); |
3899 | hrtimer_cancel(&event->hw.hrtimer); | ||
3900 | cpu_clock_perf_event_update(event); | 4101 | cpu_clock_perf_event_update(event); |
3901 | } | 4102 | } |
3902 | 4103 | ||
@@ -3933,22 +4134,15 @@ static int task_clock_perf_event_enable(struct perf_event *event) | |||
3933 | now = event->ctx->time; | 4134 | now = event->ctx->time; |
3934 | 4135 | ||
3935 | atomic64_set(&hwc->prev_count, now); | 4136 | atomic64_set(&hwc->prev_count, now); |
3936 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4137 | |
3937 | hwc->hrtimer.function = perf_swevent_hrtimer; | 4138 | perf_swevent_start_hrtimer(event); |
3938 | if (hwc->sample_period) { | ||
3939 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3940 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3941 | ns_to_ktime(period), 0, | ||
3942 | HRTIMER_MODE_REL, 0); | ||
3943 | } | ||
3944 | 4139 | ||
3945 | return 0; | 4140 | return 0; |
3946 | } | 4141 | } |
3947 | 4142 | ||
3948 | static void task_clock_perf_event_disable(struct perf_event *event) | 4143 | static void task_clock_perf_event_disable(struct perf_event *event) |
3949 | { | 4144 | { |
3950 | if (event->hw.sample_period) | 4145 | perf_swevent_cancel_hrtimer(event); |
3951 | hrtimer_cancel(&event->hw.hrtimer); | ||
3952 | task_clock_perf_event_update(event, event->ctx->time); | 4146 | task_clock_perf_event_update(event, event->ctx->time); |
3953 | 4147 | ||
3954 | } | 4148 | } |
@@ -3976,6 +4170,7 @@ static const struct pmu perf_ops_task_clock = { | |||
3976 | }; | 4170 | }; |
3977 | 4171 | ||
3978 | #ifdef CONFIG_EVENT_PROFILE | 4172 | #ifdef CONFIG_EVENT_PROFILE |
4173 | |||
3979 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4174 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
3980 | int entry_size) | 4175 | int entry_size) |
3981 | { | 4176 | { |
@@ -3994,13 +4189,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | |||
3994 | if (!regs) | 4189 | if (!regs) |
3995 | regs = task_pt_regs(current); | 4190 | regs = task_pt_regs(current); |
3996 | 4191 | ||
4192 | /* Trace events already protected against recursion */ | ||
3997 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, | 4193 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, |
3998 | &data, regs); | 4194 | &data, regs); |
3999 | } | 4195 | } |
4000 | EXPORT_SYMBOL_GPL(perf_tp_event); | 4196 | EXPORT_SYMBOL_GPL(perf_tp_event); |
4001 | 4197 | ||
4002 | extern int ftrace_profile_enable(int); | 4198 | static int perf_tp_event_match(struct perf_event *event, |
4003 | extern void ftrace_profile_disable(int); | 4199 | struct perf_sample_data *data) |
4200 | { | ||
4201 | void *record = data->raw->data; | ||
4202 | |||
4203 | if (likely(!event->filter) || filter_match_preds(event->filter, record)) | ||
4204 | return 1; | ||
4205 | return 0; | ||
4206 | } | ||
4004 | 4207 | ||
4005 | static void tp_perf_event_destroy(struct perf_event *event) | 4208 | static void tp_perf_event_destroy(struct perf_event *event) |
4006 | { | 4209 | { |
@@ -4025,11 +4228,93 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4025 | 4228 | ||
4026 | return &perf_ops_generic; | 4229 | return &perf_ops_generic; |
4027 | } | 4230 | } |
4231 | |||
4232 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | ||
4233 | { | ||
4234 | char *filter_str; | ||
4235 | int ret; | ||
4236 | |||
4237 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
4238 | return -EINVAL; | ||
4239 | |||
4240 | filter_str = strndup_user(arg, PAGE_SIZE); | ||
4241 | if (IS_ERR(filter_str)) | ||
4242 | return PTR_ERR(filter_str); | ||
4243 | |||
4244 | ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); | ||
4245 | |||
4246 | kfree(filter_str); | ||
4247 | return ret; | ||
4248 | } | ||
4249 | |||
4250 | static void perf_event_free_filter(struct perf_event *event) | ||
4251 | { | ||
4252 | ftrace_profile_free_filter(event); | ||
4253 | } | ||
4254 | |||
4028 | #else | 4255 | #else |
4256 | |||
4257 | static int perf_tp_event_match(struct perf_event *event, | ||
4258 | struct perf_sample_data *data) | ||
4259 | { | ||
4260 | return 1; | ||
4261 | } | ||
4262 | |||
4029 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4263 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
4030 | { | 4264 | { |
4031 | return NULL; | 4265 | return NULL; |
4032 | } | 4266 | } |
4267 | |||
4268 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | ||
4269 | { | ||
4270 | return -ENOENT; | ||
4271 | } | ||
4272 | |||
4273 | static void perf_event_free_filter(struct perf_event *event) | ||
4274 | { | ||
4275 | } | ||
4276 | |||
4277 | #endif /* CONFIG_EVENT_PROFILE */ | ||
4278 | |||
4279 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
4280 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4281 | { | ||
4282 | release_bp_slot(event); | ||
4283 | } | ||
4284 | |||
4285 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4286 | { | ||
4287 | int err; | ||
4288 | |||
4289 | err = register_perf_hw_breakpoint(bp); | ||
4290 | if (err) | ||
4291 | return ERR_PTR(err); | ||
4292 | |||
4293 | bp->destroy = bp_perf_event_destroy; | ||
4294 | |||
4295 | return &perf_ops_bp; | ||
4296 | } | ||
4297 | |||
4298 | void perf_bp_event(struct perf_event *bp, void *data) | ||
4299 | { | ||
4300 | struct perf_sample_data sample; | ||
4301 | struct pt_regs *regs = data; | ||
4302 | |||
4303 | sample.raw = NULL; | ||
4304 | sample.addr = bp->attr.bp_addr; | ||
4305 | |||
4306 | if (!perf_exclude_event(bp, regs)) | ||
4307 | perf_swevent_add(bp, 1, 1, &sample, regs); | ||
4308 | } | ||
4309 | #else | ||
4310 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4311 | { | ||
4312 | return NULL; | ||
4313 | } | ||
4314 | |||
4315 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4316 | { | ||
4317 | } | ||
4033 | #endif | 4318 | #endif |
4034 | 4319 | ||
4035 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4320 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
@@ -4076,6 +4361,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
4076 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | 4361 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: |
4077 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | 4362 | case PERF_COUNT_SW_CONTEXT_SWITCHES: |
4078 | case PERF_COUNT_SW_CPU_MIGRATIONS: | 4363 | case PERF_COUNT_SW_CPU_MIGRATIONS: |
4364 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | ||
4365 | case PERF_COUNT_SW_EMULATION_FAULTS: | ||
4079 | if (!event->parent) { | 4366 | if (!event->parent) { |
4080 | atomic_inc(&perf_swevent_enabled[event_id]); | 4367 | atomic_inc(&perf_swevent_enabled[event_id]); |
4081 | event->destroy = sw_perf_event_destroy; | 4368 | event->destroy = sw_perf_event_destroy; |
@@ -4096,6 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4096 | struct perf_event_context *ctx, | 4383 | struct perf_event_context *ctx, |
4097 | struct perf_event *group_leader, | 4384 | struct perf_event *group_leader, |
4098 | struct perf_event *parent_event, | 4385 | struct perf_event *parent_event, |
4386 | perf_overflow_handler_t overflow_handler, | ||
4099 | gfp_t gfpflags) | 4387 | gfp_t gfpflags) |
4100 | { | 4388 | { |
4101 | const struct pmu *pmu; | 4389 | const struct pmu *pmu; |
@@ -4138,6 +4426,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4138 | 4426 | ||
4139 | event->state = PERF_EVENT_STATE_INACTIVE; | 4427 | event->state = PERF_EVENT_STATE_INACTIVE; |
4140 | 4428 | ||
4429 | if (!overflow_handler && parent_event) | ||
4430 | overflow_handler = parent_event->overflow_handler; | ||
4431 | |||
4432 | event->overflow_handler = overflow_handler; | ||
4433 | |||
4141 | if (attr->disabled) | 4434 | if (attr->disabled) |
4142 | event->state = PERF_EVENT_STATE_OFF; | 4435 | event->state = PERF_EVENT_STATE_OFF; |
4143 | 4436 | ||
@@ -4172,6 +4465,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4172 | pmu = tp_perf_event_init(event); | 4465 | pmu = tp_perf_event_init(event); |
4173 | break; | 4466 | break; |
4174 | 4467 | ||
4468 | case PERF_TYPE_BREAKPOINT: | ||
4469 | pmu = bp_perf_event_init(event); | ||
4470 | break; | ||
4471 | |||
4472 | |||
4175 | default: | 4473 | default: |
4176 | break; | 4474 | break; |
4177 | } | 4475 | } |
@@ -4284,7 +4582,7 @@ err_size: | |||
4284 | goto out; | 4582 | goto out; |
4285 | } | 4583 | } |
4286 | 4584 | ||
4287 | int perf_event_set_output(struct perf_event *event, int output_fd) | 4585 | static int perf_event_set_output(struct perf_event *event, int output_fd) |
4288 | { | 4586 | { |
4289 | struct perf_event *output_event = NULL; | 4587 | struct perf_event *output_event = NULL; |
4290 | struct file *output_file = NULL; | 4588 | struct file *output_file = NULL; |
@@ -4414,7 +4712,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
4414 | } | 4712 | } |
4415 | 4713 | ||
4416 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | 4714 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
4417 | NULL, GFP_KERNEL); | 4715 | NULL, NULL, GFP_KERNEL); |
4418 | err = PTR_ERR(event); | 4716 | err = PTR_ERR(event); |
4419 | if (IS_ERR(event)) | 4717 | if (IS_ERR(event)) |
4420 | goto err_put_context; | 4718 | goto err_put_context; |
@@ -4462,6 +4760,61 @@ err_put_context: | |||
4462 | return err; | 4760 | return err; |
4463 | } | 4761 | } |
4464 | 4762 | ||
4763 | /** | ||
4764 | * perf_event_create_kernel_counter | ||
4765 | * | ||
4766 | * @attr: attributes of the counter to create | ||
4767 | * @cpu: cpu in which the counter is bound | ||
4768 | * @pid: task to profile | ||
4769 | */ | ||
4770 | struct perf_event * | ||
4771 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | ||
4772 | pid_t pid, | ||
4773 | perf_overflow_handler_t overflow_handler) | ||
4774 | { | ||
4775 | struct perf_event *event; | ||
4776 | struct perf_event_context *ctx; | ||
4777 | int err; | ||
4778 | |||
4779 | /* | ||
4780 | * Get the target context (task or percpu): | ||
4781 | */ | ||
4782 | |||
4783 | ctx = find_get_context(pid, cpu); | ||
4784 | if (IS_ERR(ctx)) { | ||
4785 | err = PTR_ERR(ctx); | ||
4786 | goto err_exit; | ||
4787 | } | ||
4788 | |||
4789 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
4790 | NULL, overflow_handler, GFP_KERNEL); | ||
4791 | if (IS_ERR(event)) { | ||
4792 | err = PTR_ERR(event); | ||
4793 | goto err_put_context; | ||
4794 | } | ||
4795 | |||
4796 | event->filp = NULL; | ||
4797 | WARN_ON_ONCE(ctx->parent_ctx); | ||
4798 | mutex_lock(&ctx->mutex); | ||
4799 | perf_install_in_context(ctx, event, cpu); | ||
4800 | ++ctx->generation; | ||
4801 | mutex_unlock(&ctx->mutex); | ||
4802 | |||
4803 | event->owner = current; | ||
4804 | get_task_struct(current); | ||
4805 | mutex_lock(¤t->perf_event_mutex); | ||
4806 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
4807 | mutex_unlock(¤t->perf_event_mutex); | ||
4808 | |||
4809 | return event; | ||
4810 | |||
4811 | err_put_context: | ||
4812 | put_ctx(ctx); | ||
4813 | err_exit: | ||
4814 | return ERR_PTR(err); | ||
4815 | } | ||
4816 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | ||
4817 | |||
4465 | /* | 4818 | /* |
4466 | * inherit a event from parent task to child task: | 4819 | * inherit a event from parent task to child task: |
4467 | */ | 4820 | */ |
@@ -4487,7 +4840,7 @@ inherit_event(struct perf_event *parent_event, | |||
4487 | child_event = perf_event_alloc(&parent_event->attr, | 4840 | child_event = perf_event_alloc(&parent_event->attr, |
4488 | parent_event->cpu, child_ctx, | 4841 | parent_event->cpu, child_ctx, |
4489 | group_leader, parent_event, | 4842 | group_leader, parent_event, |
4490 | GFP_KERNEL); | 4843 | NULL, GFP_KERNEL); |
4491 | if (IS_ERR(child_event)) | 4844 | if (IS_ERR(child_event)) |
4492 | return child_event; | 4845 | return child_event; |
4493 | get_ctx(child_ctx); | 4846 | get_ctx(child_ctx); |
@@ -4505,6 +4858,8 @@ inherit_event(struct perf_event *parent_event, | |||
4505 | if (parent_event->attr.freq) | 4858 | if (parent_event->attr.freq) |
4506 | child_event->hw.sample_period = parent_event->hw.sample_period; | 4859 | child_event->hw.sample_period = parent_event->hw.sample_period; |
4507 | 4860 | ||
4861 | child_event->overflow_handler = parent_event->overflow_handler; | ||
4862 | |||
4508 | /* | 4863 | /* |
4509 | * Link it up in the child's context: | 4864 | * Link it up in the child's context: |
4510 | */ | 4865 | */ |
@@ -4594,7 +4949,6 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
4594 | { | 4949 | { |
4595 | struct perf_event *parent_event; | 4950 | struct perf_event *parent_event; |
4596 | 4951 | ||
4597 | update_event_times(child_event); | ||
4598 | perf_event_remove_from_context(child_event); | 4952 | perf_event_remove_from_context(child_event); |
4599 | 4953 | ||
4600 | parent_event = child_event->parent; | 4954 | parent_event = child_event->parent; |
@@ -4646,6 +5000,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
4646 | * the events from it. | 5000 | * the events from it. |
4647 | */ | 5001 | */ |
4648 | unclone_ctx(child_ctx); | 5002 | unclone_ctx(child_ctx); |
5003 | update_context_time(child_ctx); | ||
4649 | spin_unlock_irqrestore(&child_ctx->lock, flags); | 5004 | spin_unlock_irqrestore(&child_ctx->lock, flags); |
4650 | 5005 | ||
4651 | /* | 5006 | /* |
@@ -4729,7 +5084,7 @@ again: | |||
4729 | */ | 5084 | */ |
4730 | int perf_event_init_task(struct task_struct *child) | 5085 | int perf_event_init_task(struct task_struct *child) |
4731 | { | 5086 | { |
4732 | struct perf_event_context *child_ctx, *parent_ctx; | 5087 | struct perf_event_context *child_ctx = NULL, *parent_ctx; |
4733 | struct perf_event_context *cloned_ctx; | 5088 | struct perf_event_context *cloned_ctx; |
4734 | struct perf_event *event; | 5089 | struct perf_event *event; |
4735 | struct task_struct *parent = current; | 5090 | struct task_struct *parent = current; |
@@ -4745,20 +5100,6 @@ int perf_event_init_task(struct task_struct *child) | |||
4745 | return 0; | 5100 | return 0; |
4746 | 5101 | ||
4747 | /* | 5102 | /* |
4748 | * This is executed from the parent task context, so inherit | ||
4749 | * events that have been marked for cloning. | ||
4750 | * First allocate and initialize a context for the child. | ||
4751 | */ | ||
4752 | |||
4753 | child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | ||
4754 | if (!child_ctx) | ||
4755 | return -ENOMEM; | ||
4756 | |||
4757 | __perf_event_init_context(child_ctx, child); | ||
4758 | child->perf_event_ctxp = child_ctx; | ||
4759 | get_task_struct(child); | ||
4760 | |||
4761 | /* | ||
4762 | * If the parent's context is a clone, pin it so it won't get | 5103 | * If the parent's context is a clone, pin it so it won't get |
4763 | * swapped under us. | 5104 | * swapped under us. |
4764 | */ | 5105 | */ |
@@ -4781,15 +5122,33 @@ int perf_event_init_task(struct task_struct *child) | |||
4781 | * We dont have to disable NMIs - we are only looking at | 5122 | * We dont have to disable NMIs - we are only looking at |
4782 | * the list, not manipulating it: | 5123 | * the list, not manipulating it: |
4783 | */ | 5124 | */ |
4784 | list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) { | 5125 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { |
4785 | if (event != event->group_leader) | ||
4786 | continue; | ||
4787 | 5126 | ||
4788 | if (!event->attr.inherit) { | 5127 | if (!event->attr.inherit) { |
4789 | inherited_all = 0; | 5128 | inherited_all = 0; |
4790 | continue; | 5129 | continue; |
4791 | } | 5130 | } |
4792 | 5131 | ||
5132 | if (!child->perf_event_ctxp) { | ||
5133 | /* | ||
5134 | * This is executed from the parent task context, so | ||
5135 | * inherit events that have been marked for cloning. | ||
5136 | * First allocate and initialize a context for the | ||
5137 | * child. | ||
5138 | */ | ||
5139 | |||
5140 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5141 | GFP_KERNEL); | ||
5142 | if (!child_ctx) { | ||
5143 | ret = -ENOMEM; | ||
5144 | goto exit; | ||
5145 | } | ||
5146 | |||
5147 | __perf_event_init_context(child_ctx, child); | ||
5148 | child->perf_event_ctxp = child_ctx; | ||
5149 | get_task_struct(child); | ||
5150 | } | ||
5151 | |||
4793 | ret = inherit_group(event, parent, parent_ctx, | 5152 | ret = inherit_group(event, parent, parent_ctx, |
4794 | child, child_ctx); | 5153 | child, child_ctx); |
4795 | if (ret) { | 5154 | if (ret) { |
@@ -4818,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child) | |||
4818 | get_ctx(child_ctx->parent_ctx); | 5177 | get_ctx(child_ctx->parent_ctx); |
4819 | } | 5178 | } |
4820 | 5179 | ||
5180 | exit: | ||
4821 | mutex_unlock(&parent_ctx->mutex); | 5181 | mutex_unlock(&parent_ctx->mutex); |
4822 | 5182 | ||
4823 | perf_unpin_context(parent_ctx); | 5183 | perf_unpin_context(parent_ctx); |