diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 965 |
1 files changed, 662 insertions, 303 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 0f86feb6db0c..6b7ddba1dd64 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
22 | #include <linux/vmstat.h> | 22 | #include <linux/vmstat.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
24 | #include <linux/rculist.h> | 25 | #include <linux/rculist.h> |
25 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
@@ -27,6 +28,8 @@ | |||
27 | #include <linux/anon_inodes.h> | 28 | #include <linux/anon_inodes.h> |
28 | #include <linux/kernel_stat.h> | 29 | #include <linux/kernel_stat.h> |
29 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/ftrace_event.h> | ||
32 | #include <linux/hw_breakpoint.h> | ||
30 | 33 | ||
31 | #include <asm/irq_regs.h> | 34 | #include <asm/irq_regs.h> |
32 | 35 | ||
@@ -243,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
243 | put_ctx(ctx); | 246 | put_ctx(ctx); |
244 | } | 247 | } |
245 | 248 | ||
249 | static inline u64 perf_clock(void) | ||
250 | { | ||
251 | return cpu_clock(smp_processor_id()); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Update the record of the current time in a context. | ||
256 | */ | ||
257 | static void update_context_time(struct perf_event_context *ctx) | ||
258 | { | ||
259 | u64 now = perf_clock(); | ||
260 | |||
261 | ctx->time += now - ctx->timestamp; | ||
262 | ctx->timestamp = now; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Update the total_time_enabled and total_time_running fields for a event. | ||
267 | */ | ||
268 | static void update_event_times(struct perf_event *event) | ||
269 | { | ||
270 | struct perf_event_context *ctx = event->ctx; | ||
271 | u64 run_end; | ||
272 | |||
273 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
274 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
275 | return; | ||
276 | |||
277 | if (ctx->is_active) | ||
278 | run_end = ctx->time; | ||
279 | else | ||
280 | run_end = event->tstamp_stopped; | ||
281 | |||
282 | event->total_time_enabled = run_end - event->tstamp_enabled; | ||
283 | |||
284 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
285 | run_end = event->tstamp_stopped; | ||
286 | else | ||
287 | run_end = ctx->time; | ||
288 | |||
289 | event->total_time_running = run_end - event->tstamp_running; | ||
290 | } | ||
291 | |||
246 | /* | 292 | /* |
247 | * Add a event from the lists for its context. | 293 | * Add a event from the lists for its context. |
248 | * Must be called with ctx->mutex and ctx->lock held. | 294 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -291,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
291 | if (event->group_leader != event) | 337 | if (event->group_leader != event) |
292 | event->group_leader->nr_siblings--; | 338 | event->group_leader->nr_siblings--; |
293 | 339 | ||
340 | update_event_times(event); | ||
341 | |||
342 | /* | ||
343 | * If event was in error state, then keep it | ||
344 | * that way, otherwise bogus counts will be | ||
345 | * returned on read(). The only way to get out | ||
346 | * of error state is by explicit re-enabling | ||
347 | * of the event | ||
348 | */ | ||
349 | if (event->state > PERF_EVENT_STATE_OFF) | ||
350 | event->state = PERF_EVENT_STATE_OFF; | ||
351 | |||
294 | /* | 352 | /* |
295 | * If this was a group event with sibling events then | 353 | * If this was a group event with sibling events then |
296 | * upgrade the siblings to singleton events by adding them | 354 | * upgrade the siblings to singleton events by adding them |
@@ -444,50 +502,11 @@ retry: | |||
444 | * can remove the event safely, if the call above did not | 502 | * can remove the event safely, if the call above did not |
445 | * succeed. | 503 | * succeed. |
446 | */ | 504 | */ |
447 | if (!list_empty(&event->group_entry)) { | 505 | if (!list_empty(&event->group_entry)) |
448 | list_del_event(event, ctx); | 506 | list_del_event(event, ctx); |
449 | } | ||
450 | spin_unlock_irq(&ctx->lock); | 507 | spin_unlock_irq(&ctx->lock); |
451 | } | 508 | } |
452 | 509 | ||
453 | static inline u64 perf_clock(void) | ||
454 | { | ||
455 | return cpu_clock(smp_processor_id()); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * Update the record of the current time in a context. | ||
460 | */ | ||
461 | static void update_context_time(struct perf_event_context *ctx) | ||
462 | { | ||
463 | u64 now = perf_clock(); | ||
464 | |||
465 | ctx->time += now - ctx->timestamp; | ||
466 | ctx->timestamp = now; | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * Update the total_time_enabled and total_time_running fields for a event. | ||
471 | */ | ||
472 | static void update_event_times(struct perf_event *event) | ||
473 | { | ||
474 | struct perf_event_context *ctx = event->ctx; | ||
475 | u64 run_end; | ||
476 | |||
477 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
478 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
479 | return; | ||
480 | |||
481 | event->total_time_enabled = ctx->time - event->tstamp_enabled; | ||
482 | |||
483 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
484 | run_end = event->tstamp_stopped; | ||
485 | else | ||
486 | run_end = ctx->time; | ||
487 | |||
488 | event->total_time_running = run_end - event->tstamp_running; | ||
489 | } | ||
490 | |||
491 | /* | 510 | /* |
492 | * Update total_time_enabled and total_time_running for all events in a group. | 511 | * Update total_time_enabled and total_time_running for all events in a group. |
493 | */ | 512 | */ |
@@ -1031,12 +1050,8 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1031 | 1050 | ||
1032 | perf_disable(); | 1051 | perf_disable(); |
1033 | if (ctx->nr_active) { | 1052 | if (ctx->nr_active) { |
1034 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1053 | list_for_each_entry(event, &ctx->group_list, group_entry) |
1035 | if (event != event->group_leader) | 1054 | group_sched_out(event, cpuctx, ctx); |
1036 | event_sched_out(event, cpuctx, ctx); | ||
1037 | else | ||
1038 | group_sched_out(event, cpuctx, ctx); | ||
1039 | } | ||
1040 | } | 1055 | } |
1041 | perf_enable(); | 1056 | perf_enable(); |
1042 | out: | 1057 | out: |
@@ -1062,8 +1077,6 @@ static int context_equiv(struct perf_event_context *ctx1, | |||
1062 | && !ctx1->pin_count && !ctx2->pin_count; | 1077 | && !ctx1->pin_count && !ctx2->pin_count; |
1063 | } | 1078 | } |
1064 | 1079 | ||
1065 | static void __perf_event_read(void *event); | ||
1066 | |||
1067 | static void __perf_event_sync_stat(struct perf_event *event, | 1080 | static void __perf_event_sync_stat(struct perf_event *event, |
1068 | struct perf_event *next_event) | 1081 | struct perf_event *next_event) |
1069 | { | 1082 | { |
@@ -1081,8 +1094,8 @@ static void __perf_event_sync_stat(struct perf_event *event, | |||
1081 | */ | 1094 | */ |
1082 | switch (event->state) { | 1095 | switch (event->state) { |
1083 | case PERF_EVENT_STATE_ACTIVE: | 1096 | case PERF_EVENT_STATE_ACTIVE: |
1084 | __perf_event_read(event); | 1097 | event->pmu->read(event); |
1085 | break; | 1098 | /* fall-through */ |
1086 | 1099 | ||
1087 | case PERF_EVENT_STATE_INACTIVE: | 1100 | case PERF_EVENT_STATE_INACTIVE: |
1088 | update_event_times(event); | 1101 | update_event_times(event); |
@@ -1121,6 +1134,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1121 | if (!ctx->nr_stat) | 1134 | if (!ctx->nr_stat) |
1122 | return; | 1135 | return; |
1123 | 1136 | ||
1137 | update_context_time(ctx); | ||
1138 | |||
1124 | event = list_first_entry(&ctx->event_list, | 1139 | event = list_first_entry(&ctx->event_list, |
1125 | struct perf_event, event_entry); | 1140 | struct perf_event, event_entry); |
1126 | 1141 | ||
@@ -1164,8 +1179,6 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1164 | if (likely(!ctx || !cpuctx->task_ctx)) | 1179 | if (likely(!ctx || !cpuctx->task_ctx)) |
1165 | return; | 1180 | return; |
1166 | 1181 | ||
1167 | update_context_time(ctx); | ||
1168 | |||
1169 | rcu_read_lock(); | 1182 | rcu_read_lock(); |
1170 | parent = rcu_dereference(ctx->parent_ctx); | 1183 | parent = rcu_dereference(ctx->parent_ctx); |
1171 | next_ctx = next->perf_event_ctxp; | 1184 | next_ctx = next->perf_event_ctxp; |
@@ -1258,12 +1271,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1258 | if (event->cpu != -1 && event->cpu != cpu) | 1271 | if (event->cpu != -1 && event->cpu != cpu) |
1259 | continue; | 1272 | continue; |
1260 | 1273 | ||
1261 | if (event != event->group_leader) | 1274 | if (group_can_go_on(event, cpuctx, 1)) |
1262 | event_sched_in(event, cpuctx, ctx, cpu); | 1275 | group_sched_in(event, cpuctx, ctx, cpu); |
1263 | else { | ||
1264 | if (group_can_go_on(event, cpuctx, 1)) | ||
1265 | group_sched_in(event, cpuctx, ctx, cpu); | ||
1266 | } | ||
1267 | 1276 | ||
1268 | /* | 1277 | /* |
1269 | * If this pinned group hasn't been scheduled, | 1278 | * If this pinned group hasn't been scheduled, |
@@ -1291,15 +1300,9 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1291 | if (event->cpu != -1 && event->cpu != cpu) | 1300 | if (event->cpu != -1 && event->cpu != cpu) |
1292 | continue; | 1301 | continue; |
1293 | 1302 | ||
1294 | if (event != event->group_leader) { | 1303 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
1295 | if (event_sched_in(event, cpuctx, ctx, cpu)) | 1304 | if (group_sched_in(event, cpuctx, ctx, cpu)) |
1296 | can_add_hw = 0; | 1305 | can_add_hw = 0; |
1297 | } else { | ||
1298 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | ||
1299 | if (group_sched_in(event, cpuctx, ctx, cpu)) | ||
1300 | can_add_hw = 0; | ||
1301 | } | ||
1302 | } | ||
1303 | } | 1306 | } |
1304 | perf_enable(); | 1307 | perf_enable(); |
1305 | out: | 1308 | out: |
@@ -1368,7 +1371,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1368 | u64 interrupts, freq; | 1371 | u64 interrupts, freq; |
1369 | 1372 | ||
1370 | spin_lock(&ctx->lock); | 1373 | spin_lock(&ctx->lock); |
1371 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1374 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
1372 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1375 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
1373 | continue; | 1376 | continue; |
1374 | 1377 | ||
@@ -1528,7 +1531,6 @@ static void __perf_event_read(void *info) | |||
1528 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1531 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1529 | struct perf_event *event = info; | 1532 | struct perf_event *event = info; |
1530 | struct perf_event_context *ctx = event->ctx; | 1533 | struct perf_event_context *ctx = event->ctx; |
1531 | unsigned long flags; | ||
1532 | 1534 | ||
1533 | /* | 1535 | /* |
1534 | * If this is a task context, we need to check whether it is | 1536 | * If this is a task context, we need to check whether it is |
@@ -1540,12 +1542,12 @@ static void __perf_event_read(void *info) | |||
1540 | if (ctx->task && cpuctx->task_ctx != ctx) | 1542 | if (ctx->task && cpuctx->task_ctx != ctx) |
1541 | return; | 1543 | return; |
1542 | 1544 | ||
1543 | local_irq_save(flags); | 1545 | spin_lock(&ctx->lock); |
1544 | if (ctx->is_active) | 1546 | update_context_time(ctx); |
1545 | update_context_time(ctx); | ||
1546 | event->pmu->read(event); | ||
1547 | update_event_times(event); | 1547 | update_event_times(event); |
1548 | local_irq_restore(flags); | 1548 | spin_unlock(&ctx->lock); |
1549 | |||
1550 | event->pmu->read(event); | ||
1549 | } | 1551 | } |
1550 | 1552 | ||
1551 | static u64 perf_event_read(struct perf_event *event) | 1553 | static u64 perf_event_read(struct perf_event *event) |
@@ -1558,7 +1560,13 @@ static u64 perf_event_read(struct perf_event *event) | |||
1558 | smp_call_function_single(event->oncpu, | 1560 | smp_call_function_single(event->oncpu, |
1559 | __perf_event_read, event, 1); | 1561 | __perf_event_read, event, 1); |
1560 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { | 1562 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { |
1563 | struct perf_event_context *ctx = event->ctx; | ||
1564 | unsigned long flags; | ||
1565 | |||
1566 | spin_lock_irqsave(&ctx->lock, flags); | ||
1567 | update_context_time(ctx); | ||
1561 | update_event_times(event); | 1568 | update_event_times(event); |
1569 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
1562 | } | 1570 | } |
1563 | 1571 | ||
1564 | return atomic64_read(&event->count); | 1572 | return atomic64_read(&event->count); |
@@ -1671,6 +1679,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1671 | return ERR_PTR(err); | 1679 | return ERR_PTR(err); |
1672 | } | 1680 | } |
1673 | 1681 | ||
1682 | static void perf_event_free_filter(struct perf_event *event); | ||
1683 | |||
1674 | static void free_event_rcu(struct rcu_head *head) | 1684 | static void free_event_rcu(struct rcu_head *head) |
1675 | { | 1685 | { |
1676 | struct perf_event *event; | 1686 | struct perf_event *event; |
@@ -1678,6 +1688,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
1678 | event = container_of(head, struct perf_event, rcu_head); | 1688 | event = container_of(head, struct perf_event, rcu_head); |
1679 | if (event->ns) | 1689 | if (event->ns) |
1680 | put_pid_ns(event->ns); | 1690 | put_pid_ns(event->ns); |
1691 | perf_event_free_filter(event); | ||
1681 | kfree(event); | 1692 | kfree(event); |
1682 | } | 1693 | } |
1683 | 1694 | ||
@@ -1709,16 +1720,10 @@ static void free_event(struct perf_event *event) | |||
1709 | call_rcu(&event->rcu_head, free_event_rcu); | 1720 | call_rcu(&event->rcu_head, free_event_rcu); |
1710 | } | 1721 | } |
1711 | 1722 | ||
1712 | /* | 1723 | int perf_event_release_kernel(struct perf_event *event) |
1713 | * Called when the last reference to the file is gone. | ||
1714 | */ | ||
1715 | static int perf_release(struct inode *inode, struct file *file) | ||
1716 | { | 1724 | { |
1717 | struct perf_event *event = file->private_data; | ||
1718 | struct perf_event_context *ctx = event->ctx; | 1725 | struct perf_event_context *ctx = event->ctx; |
1719 | 1726 | ||
1720 | file->private_data = NULL; | ||
1721 | |||
1722 | WARN_ON_ONCE(ctx->parent_ctx); | 1727 | WARN_ON_ONCE(ctx->parent_ctx); |
1723 | mutex_lock(&ctx->mutex); | 1728 | mutex_lock(&ctx->mutex); |
1724 | perf_event_remove_from_context(event); | 1729 | perf_event_remove_from_context(event); |
@@ -1733,6 +1738,19 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1733 | 1738 | ||
1734 | return 0; | 1739 | return 0; |
1735 | } | 1740 | } |
1741 | EXPORT_SYMBOL_GPL(perf_event_release_kernel); | ||
1742 | |||
1743 | /* | ||
1744 | * Called when the last reference to the file is gone. | ||
1745 | */ | ||
1746 | static int perf_release(struct inode *inode, struct file *file) | ||
1747 | { | ||
1748 | struct perf_event *event = file->private_data; | ||
1749 | |||
1750 | file->private_data = NULL; | ||
1751 | |||
1752 | return perf_event_release_kernel(event); | ||
1753 | } | ||
1736 | 1754 | ||
1737 | static int perf_event_read_size(struct perf_event *event) | 1755 | static int perf_event_read_size(struct perf_event *event) |
1738 | { | 1756 | { |
@@ -1759,91 +1777,94 @@ static int perf_event_read_size(struct perf_event *event) | |||
1759 | return size; | 1777 | return size; |
1760 | } | 1778 | } |
1761 | 1779 | ||
1762 | static u64 perf_event_read_value(struct perf_event *event) | 1780 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
1763 | { | 1781 | { |
1764 | struct perf_event *child; | 1782 | struct perf_event *child; |
1765 | u64 total = 0; | 1783 | u64 total = 0; |
1766 | 1784 | ||
1785 | *enabled = 0; | ||
1786 | *running = 0; | ||
1787 | |||
1788 | mutex_lock(&event->child_mutex); | ||
1767 | total += perf_event_read(event); | 1789 | total += perf_event_read(event); |
1768 | list_for_each_entry(child, &event->child_list, child_list) | 1790 | *enabled += event->total_time_enabled + |
1791 | atomic64_read(&event->child_total_time_enabled); | ||
1792 | *running += event->total_time_running + | ||
1793 | atomic64_read(&event->child_total_time_running); | ||
1794 | |||
1795 | list_for_each_entry(child, &event->child_list, child_list) { | ||
1769 | total += perf_event_read(child); | 1796 | total += perf_event_read(child); |
1797 | *enabled += child->total_time_enabled; | ||
1798 | *running += child->total_time_running; | ||
1799 | } | ||
1800 | mutex_unlock(&event->child_mutex); | ||
1770 | 1801 | ||
1771 | return total; | 1802 | return total; |
1772 | } | 1803 | } |
1773 | 1804 | EXPORT_SYMBOL_GPL(perf_event_read_value); | |
1774 | static int perf_event_read_entry(struct perf_event *event, | ||
1775 | u64 read_format, char __user *buf) | ||
1776 | { | ||
1777 | int n = 0, count = 0; | ||
1778 | u64 values[2]; | ||
1779 | |||
1780 | values[n++] = perf_event_read_value(event); | ||
1781 | if (read_format & PERF_FORMAT_ID) | ||
1782 | values[n++] = primary_event_id(event); | ||
1783 | |||
1784 | count = n * sizeof(u64); | ||
1785 | |||
1786 | if (copy_to_user(buf, values, count)) | ||
1787 | return -EFAULT; | ||
1788 | |||
1789 | return count; | ||
1790 | } | ||
1791 | 1805 | ||
1792 | static int perf_event_read_group(struct perf_event *event, | 1806 | static int perf_event_read_group(struct perf_event *event, |
1793 | u64 read_format, char __user *buf) | 1807 | u64 read_format, char __user *buf) |
1794 | { | 1808 | { |
1795 | struct perf_event *leader = event->group_leader, *sub; | 1809 | struct perf_event *leader = event->group_leader, *sub; |
1796 | int n = 0, size = 0, err = -EFAULT; | 1810 | int n = 0, size = 0, ret = -EFAULT; |
1797 | u64 values[3]; | 1811 | struct perf_event_context *ctx = leader->ctx; |
1812 | u64 values[5]; | ||
1813 | u64 count, enabled, running; | ||
1814 | |||
1815 | mutex_lock(&ctx->mutex); | ||
1816 | count = perf_event_read_value(leader, &enabled, &running); | ||
1798 | 1817 | ||
1799 | values[n++] = 1 + leader->nr_siblings; | 1818 | values[n++] = 1 + leader->nr_siblings; |
1800 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1819 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1801 | values[n++] = leader->total_time_enabled + | 1820 | values[n++] = enabled; |
1802 | atomic64_read(&leader->child_total_time_enabled); | 1821 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1803 | } | 1822 | values[n++] = running; |
1804 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 1823 | values[n++] = count; |
1805 | values[n++] = leader->total_time_running + | 1824 | if (read_format & PERF_FORMAT_ID) |
1806 | atomic64_read(&leader->child_total_time_running); | 1825 | values[n++] = primary_event_id(leader); |
1807 | } | ||
1808 | 1826 | ||
1809 | size = n * sizeof(u64); | 1827 | size = n * sizeof(u64); |
1810 | 1828 | ||
1811 | if (copy_to_user(buf, values, size)) | 1829 | if (copy_to_user(buf, values, size)) |
1812 | return -EFAULT; | 1830 | goto unlock; |
1813 | |||
1814 | err = perf_event_read_entry(leader, read_format, buf + size); | ||
1815 | if (err < 0) | ||
1816 | return err; | ||
1817 | 1831 | ||
1818 | size += err; | 1832 | ret = size; |
1819 | 1833 | ||
1820 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 1834 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
1821 | err = perf_event_read_entry(sub, read_format, | 1835 | n = 0; |
1822 | buf + size); | 1836 | |
1823 | if (err < 0) | 1837 | values[n++] = perf_event_read_value(sub, &enabled, &running); |
1824 | return err; | 1838 | if (read_format & PERF_FORMAT_ID) |
1839 | values[n++] = primary_event_id(sub); | ||
1840 | |||
1841 | size = n * sizeof(u64); | ||
1842 | |||
1843 | if (copy_to_user(buf + ret, values, size)) { | ||
1844 | ret = -EFAULT; | ||
1845 | goto unlock; | ||
1846 | } | ||
1825 | 1847 | ||
1826 | size += err; | 1848 | ret += size; |
1827 | } | 1849 | } |
1850 | unlock: | ||
1851 | mutex_unlock(&ctx->mutex); | ||
1828 | 1852 | ||
1829 | return size; | 1853 | return ret; |
1830 | } | 1854 | } |
1831 | 1855 | ||
1832 | static int perf_event_read_one(struct perf_event *event, | 1856 | static int perf_event_read_one(struct perf_event *event, |
1833 | u64 read_format, char __user *buf) | 1857 | u64 read_format, char __user *buf) |
1834 | { | 1858 | { |
1859 | u64 enabled, running; | ||
1835 | u64 values[4]; | 1860 | u64 values[4]; |
1836 | int n = 0; | 1861 | int n = 0; |
1837 | 1862 | ||
1838 | values[n++] = perf_event_read_value(event); | 1863 | values[n++] = perf_event_read_value(event, &enabled, &running); |
1839 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1864 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1840 | values[n++] = event->total_time_enabled + | 1865 | values[n++] = enabled; |
1841 | atomic64_read(&event->child_total_time_enabled); | 1866 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1842 | } | 1867 | values[n++] = running; |
1843 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
1844 | values[n++] = event->total_time_running + | ||
1845 | atomic64_read(&event->child_total_time_running); | ||
1846 | } | ||
1847 | if (read_format & PERF_FORMAT_ID) | 1868 | if (read_format & PERF_FORMAT_ID) |
1848 | values[n++] = primary_event_id(event); | 1869 | values[n++] = primary_event_id(event); |
1849 | 1870 | ||
@@ -1874,12 +1895,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
1874 | return -ENOSPC; | 1895 | return -ENOSPC; |
1875 | 1896 | ||
1876 | WARN_ON_ONCE(event->ctx->parent_ctx); | 1897 | WARN_ON_ONCE(event->ctx->parent_ctx); |
1877 | mutex_lock(&event->child_mutex); | ||
1878 | if (read_format & PERF_FORMAT_GROUP) | 1898 | if (read_format & PERF_FORMAT_GROUP) |
1879 | ret = perf_event_read_group(event, read_format, buf); | 1899 | ret = perf_event_read_group(event, read_format, buf); |
1880 | else | 1900 | else |
1881 | ret = perf_event_read_one(event, read_format, buf); | 1901 | ret = perf_event_read_one(event, read_format, buf); |
1882 | mutex_unlock(&event->child_mutex); | ||
1883 | 1902 | ||
1884 | return ret; | 1903 | return ret; |
1885 | } | 1904 | } |
@@ -1987,7 +2006,8 @@ unlock: | |||
1987 | return ret; | 2006 | return ret; |
1988 | } | 2007 | } |
1989 | 2008 | ||
1990 | int perf_event_set_output(struct perf_event *event, int output_fd); | 2009 | static int perf_event_set_output(struct perf_event *event, int output_fd); |
2010 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | ||
1991 | 2011 | ||
1992 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 2012 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
1993 | { | 2013 | { |
@@ -2015,6 +2035,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
2015 | case PERF_EVENT_IOC_SET_OUTPUT: | 2035 | case PERF_EVENT_IOC_SET_OUTPUT: |
2016 | return perf_event_set_output(event, arg); | 2036 | return perf_event_set_output(event, arg); |
2017 | 2037 | ||
2038 | case PERF_EVENT_IOC_SET_FILTER: | ||
2039 | return perf_event_set_filter(event, (void __user *)arg); | ||
2040 | |||
2018 | default: | 2041 | default: |
2019 | return -ENOTTY; | 2042 | return -ENOTTY; |
2020 | } | 2043 | } |
@@ -2105,49 +2128,31 @@ unlock: | |||
2105 | rcu_read_unlock(); | 2128 | rcu_read_unlock(); |
2106 | } | 2129 | } |
2107 | 2130 | ||
2108 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 2131 | static unsigned long perf_data_size(struct perf_mmap_data *data) |
2109 | { | 2132 | { |
2110 | struct perf_event *event = vma->vm_file->private_data; | 2133 | return data->nr_pages << (PAGE_SHIFT + data->data_order); |
2111 | struct perf_mmap_data *data; | 2134 | } |
2112 | int ret = VM_FAULT_SIGBUS; | ||
2113 | |||
2114 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2115 | if (vmf->pgoff == 0) | ||
2116 | ret = 0; | ||
2117 | return ret; | ||
2118 | } | ||
2119 | |||
2120 | rcu_read_lock(); | ||
2121 | data = rcu_dereference(event->data); | ||
2122 | if (!data) | ||
2123 | goto unlock; | ||
2124 | |||
2125 | if (vmf->pgoff == 0) { | ||
2126 | vmf->page = virt_to_page(data->user_page); | ||
2127 | } else { | ||
2128 | int nr = vmf->pgoff - 1; | ||
2129 | |||
2130 | if ((unsigned)nr > data->nr_pages) | ||
2131 | goto unlock; | ||
2132 | 2135 | ||
2133 | if (vmf->flags & FAULT_FLAG_WRITE) | 2136 | #ifndef CONFIG_PERF_USE_VMALLOC |
2134 | goto unlock; | ||
2135 | 2137 | ||
2136 | vmf->page = virt_to_page(data->data_pages[nr]); | 2138 | /* |
2137 | } | 2139 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. |
2140 | */ | ||
2138 | 2141 | ||
2139 | get_page(vmf->page); | 2142 | static struct page * |
2140 | vmf->page->mapping = vma->vm_file->f_mapping; | 2143 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) |
2141 | vmf->page->index = vmf->pgoff; | 2144 | { |
2145 | if (pgoff > data->nr_pages) | ||
2146 | return NULL; | ||
2142 | 2147 | ||
2143 | ret = 0; | 2148 | if (pgoff == 0) |
2144 | unlock: | 2149 | return virt_to_page(data->user_page); |
2145 | rcu_read_unlock(); | ||
2146 | 2150 | ||
2147 | return ret; | 2151 | return virt_to_page(data->data_pages[pgoff - 1]); |
2148 | } | 2152 | } |
2149 | 2153 | ||
2150 | static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | 2154 | static struct perf_mmap_data * |
2155 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2151 | { | 2156 | { |
2152 | struct perf_mmap_data *data; | 2157 | struct perf_mmap_data *data; |
2153 | unsigned long size; | 2158 | unsigned long size; |
@@ -2172,19 +2177,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
2172 | goto fail_data_pages; | 2177 | goto fail_data_pages; |
2173 | } | 2178 | } |
2174 | 2179 | ||
2180 | data->data_order = 0; | ||
2175 | data->nr_pages = nr_pages; | 2181 | data->nr_pages = nr_pages; |
2176 | atomic_set(&data->lock, -1); | ||
2177 | 2182 | ||
2178 | if (event->attr.watermark) { | 2183 | return data; |
2179 | data->watermark = min_t(long, PAGE_SIZE * nr_pages, | ||
2180 | event->attr.wakeup_watermark); | ||
2181 | } | ||
2182 | if (!data->watermark) | ||
2183 | data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); | ||
2184 | |||
2185 | rcu_assign_pointer(event->data, data); | ||
2186 | |||
2187 | return 0; | ||
2188 | 2184 | ||
2189 | fail_data_pages: | 2185 | fail_data_pages: |
2190 | for (i--; i >= 0; i--) | 2186 | for (i--; i >= 0; i--) |
@@ -2196,7 +2192,7 @@ fail_user_page: | |||
2196 | kfree(data); | 2192 | kfree(data); |
2197 | 2193 | ||
2198 | fail: | 2194 | fail: |
2199 | return -ENOMEM; | 2195 | return NULL; |
2200 | } | 2196 | } |
2201 | 2197 | ||
2202 | static void perf_mmap_free_page(unsigned long addr) | 2198 | static void perf_mmap_free_page(unsigned long addr) |
@@ -2207,28 +2203,170 @@ static void perf_mmap_free_page(unsigned long addr) | |||
2207 | __free_page(page); | 2203 | __free_page(page); |
2208 | } | 2204 | } |
2209 | 2205 | ||
2210 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | 2206 | static void perf_mmap_data_free(struct perf_mmap_data *data) |
2211 | { | 2207 | { |
2212 | struct perf_mmap_data *data; | ||
2213 | int i; | 2208 | int i; |
2214 | 2209 | ||
2215 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2216 | |||
2217 | perf_mmap_free_page((unsigned long)data->user_page); | 2210 | perf_mmap_free_page((unsigned long)data->user_page); |
2218 | for (i = 0; i < data->nr_pages; i++) | 2211 | for (i = 0; i < data->nr_pages; i++) |
2219 | perf_mmap_free_page((unsigned long)data->data_pages[i]); | 2212 | perf_mmap_free_page((unsigned long)data->data_pages[i]); |
2213 | kfree(data); | ||
2214 | } | ||
2215 | |||
2216 | #else | ||
2217 | |||
2218 | /* | ||
2219 | * Back perf_mmap() with vmalloc memory. | ||
2220 | * | ||
2221 | * Required for architectures that have d-cache aliasing issues. | ||
2222 | */ | ||
2223 | |||
2224 | static struct page * | ||
2225 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) | ||
2226 | { | ||
2227 | if (pgoff > (1UL << data->data_order)) | ||
2228 | return NULL; | ||
2229 | |||
2230 | return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); | ||
2231 | } | ||
2232 | |||
2233 | static void perf_mmap_unmark_page(void *addr) | ||
2234 | { | ||
2235 | struct page *page = vmalloc_to_page(addr); | ||
2236 | |||
2237 | page->mapping = NULL; | ||
2238 | } | ||
2239 | |||
2240 | static void perf_mmap_data_free_work(struct work_struct *work) | ||
2241 | { | ||
2242 | struct perf_mmap_data *data; | ||
2243 | void *base; | ||
2244 | int i, nr; | ||
2245 | |||
2246 | data = container_of(work, struct perf_mmap_data, work); | ||
2247 | nr = 1 << data->data_order; | ||
2248 | |||
2249 | base = data->user_page; | ||
2250 | for (i = 0; i < nr + 1; i++) | ||
2251 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
2252 | |||
2253 | vfree(base); | ||
2254 | kfree(data); | ||
2255 | } | ||
2256 | |||
2257 | static void perf_mmap_data_free(struct perf_mmap_data *data) | ||
2258 | { | ||
2259 | schedule_work(&data->work); | ||
2260 | } | ||
2261 | |||
2262 | static struct perf_mmap_data * | ||
2263 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2264 | { | ||
2265 | struct perf_mmap_data *data; | ||
2266 | unsigned long size; | ||
2267 | void *all_buf; | ||
2268 | |||
2269 | WARN_ON(atomic_read(&event->mmap_count)); | ||
2270 | |||
2271 | size = sizeof(struct perf_mmap_data); | ||
2272 | size += sizeof(void *); | ||
2273 | |||
2274 | data = kzalloc(size, GFP_KERNEL); | ||
2275 | if (!data) | ||
2276 | goto fail; | ||
2220 | 2277 | ||
2278 | INIT_WORK(&data->work, perf_mmap_data_free_work); | ||
2279 | |||
2280 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
2281 | if (!all_buf) | ||
2282 | goto fail_all_buf; | ||
2283 | |||
2284 | data->user_page = all_buf; | ||
2285 | data->data_pages[0] = all_buf + PAGE_SIZE; | ||
2286 | data->data_order = ilog2(nr_pages); | ||
2287 | data->nr_pages = 1; | ||
2288 | |||
2289 | return data; | ||
2290 | |||
2291 | fail_all_buf: | ||
2221 | kfree(data); | 2292 | kfree(data); |
2293 | |||
2294 | fail: | ||
2295 | return NULL; | ||
2296 | } | ||
2297 | |||
2298 | #endif | ||
2299 | |||
2300 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
2301 | { | ||
2302 | struct perf_event *event = vma->vm_file->private_data; | ||
2303 | struct perf_mmap_data *data; | ||
2304 | int ret = VM_FAULT_SIGBUS; | ||
2305 | |||
2306 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2307 | if (vmf->pgoff == 0) | ||
2308 | ret = 0; | ||
2309 | return ret; | ||
2310 | } | ||
2311 | |||
2312 | rcu_read_lock(); | ||
2313 | data = rcu_dereference(event->data); | ||
2314 | if (!data) | ||
2315 | goto unlock; | ||
2316 | |||
2317 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) | ||
2318 | goto unlock; | ||
2319 | |||
2320 | vmf->page = perf_mmap_to_page(data, vmf->pgoff); | ||
2321 | if (!vmf->page) | ||
2322 | goto unlock; | ||
2323 | |||
2324 | get_page(vmf->page); | ||
2325 | vmf->page->mapping = vma->vm_file->f_mapping; | ||
2326 | vmf->page->index = vmf->pgoff; | ||
2327 | |||
2328 | ret = 0; | ||
2329 | unlock: | ||
2330 | rcu_read_unlock(); | ||
2331 | |||
2332 | return ret; | ||
2222 | } | 2333 | } |
2223 | 2334 | ||
2224 | static void perf_mmap_data_free(struct perf_event *event) | 2335 | static void |
2336 | perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | ||
2337 | { | ||
2338 | long max_size = perf_data_size(data); | ||
2339 | |||
2340 | atomic_set(&data->lock, -1); | ||
2341 | |||
2342 | if (event->attr.watermark) { | ||
2343 | data->watermark = min_t(long, max_size, | ||
2344 | event->attr.wakeup_watermark); | ||
2345 | } | ||
2346 | |||
2347 | if (!data->watermark) | ||
2348 | data->watermark = max_size / 2; | ||
2349 | |||
2350 | |||
2351 | rcu_assign_pointer(event->data, data); | ||
2352 | } | ||
2353 | |||
2354 | static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) | ||
2355 | { | ||
2356 | struct perf_mmap_data *data; | ||
2357 | |||
2358 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2359 | perf_mmap_data_free(data); | ||
2360 | } | ||
2361 | |||
2362 | static void perf_mmap_data_release(struct perf_event *event) | ||
2225 | { | 2363 | { |
2226 | struct perf_mmap_data *data = event->data; | 2364 | struct perf_mmap_data *data = event->data; |
2227 | 2365 | ||
2228 | WARN_ON(atomic_read(&event->mmap_count)); | 2366 | WARN_ON(atomic_read(&event->mmap_count)); |
2229 | 2367 | ||
2230 | rcu_assign_pointer(event->data, NULL); | 2368 | rcu_assign_pointer(event->data, NULL); |
2231 | call_rcu(&data->rcu_head, __perf_mmap_data_free); | 2369 | call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); |
2232 | } | 2370 | } |
2233 | 2371 | ||
2234 | static void perf_mmap_open(struct vm_area_struct *vma) | 2372 | static void perf_mmap_open(struct vm_area_struct *vma) |
@@ -2244,11 +2382,12 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
2244 | 2382 | ||
2245 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2383 | WARN_ON_ONCE(event->ctx->parent_ctx); |
2246 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 2384 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
2385 | unsigned long size = perf_data_size(event->data); | ||
2247 | struct user_struct *user = current_user(); | 2386 | struct user_struct *user = current_user(); |
2248 | 2387 | ||
2249 | atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); | 2388 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
2250 | vma->vm_mm->locked_vm -= event->data->nr_locked; | 2389 | vma->vm_mm->locked_vm -= event->data->nr_locked; |
2251 | perf_mmap_data_free(event); | 2390 | perf_mmap_data_release(event); |
2252 | mutex_unlock(&event->mmap_mutex); | 2391 | mutex_unlock(&event->mmap_mutex); |
2253 | } | 2392 | } |
2254 | } | 2393 | } |
@@ -2266,6 +2405,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2266 | unsigned long user_locked, user_lock_limit; | 2405 | unsigned long user_locked, user_lock_limit; |
2267 | struct user_struct *user = current_user(); | 2406 | struct user_struct *user = current_user(); |
2268 | unsigned long locked, lock_limit; | 2407 | unsigned long locked, lock_limit; |
2408 | struct perf_mmap_data *data; | ||
2269 | unsigned long vma_size; | 2409 | unsigned long vma_size; |
2270 | unsigned long nr_pages; | 2410 | unsigned long nr_pages; |
2271 | long user_extra, extra; | 2411 | long user_extra, extra; |
@@ -2328,10 +2468,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2328 | } | 2468 | } |
2329 | 2469 | ||
2330 | WARN_ON(event->data); | 2470 | WARN_ON(event->data); |
2331 | ret = perf_mmap_data_alloc(event, nr_pages); | 2471 | |
2332 | if (ret) | 2472 | data = perf_mmap_data_alloc(event, nr_pages); |
2473 | ret = -ENOMEM; | ||
2474 | if (!data) | ||
2333 | goto unlock; | 2475 | goto unlock; |
2334 | 2476 | ||
2477 | ret = 0; | ||
2478 | perf_mmap_data_init(event, data); | ||
2479 | |||
2335 | atomic_set(&event->mmap_count, 1); | 2480 | atomic_set(&event->mmap_count, 1); |
2336 | atomic_long_add(user_extra, &user->locked_vm); | 2481 | atomic_long_add(user_extra, &user->locked_vm); |
2337 | vma->vm_mm->locked_vm += extra; | 2482 | vma->vm_mm->locked_vm += extra; |
@@ -2519,7 +2664,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, | |||
2519 | if (!data->writable) | 2664 | if (!data->writable) |
2520 | return true; | 2665 | return true; |
2521 | 2666 | ||
2522 | mask = (data->nr_pages << PAGE_SHIFT) - 1; | 2667 | mask = perf_data_size(data) - 1; |
2523 | 2668 | ||
2524 | offset = (offset - tail) & mask; | 2669 | offset = (offset - tail) & mask; |
2525 | head = (head - tail) & mask; | 2670 | head = (head - tail) & mask; |
@@ -2558,20 +2703,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
2558 | static void perf_output_lock(struct perf_output_handle *handle) | 2703 | static void perf_output_lock(struct perf_output_handle *handle) |
2559 | { | 2704 | { |
2560 | struct perf_mmap_data *data = handle->data; | 2705 | struct perf_mmap_data *data = handle->data; |
2561 | int cpu; | 2706 | int cur, cpu = get_cpu(); |
2562 | 2707 | ||
2563 | handle->locked = 0; | 2708 | handle->locked = 0; |
2564 | 2709 | ||
2565 | local_irq_save(handle->flags); | 2710 | for (;;) { |
2566 | cpu = smp_processor_id(); | 2711 | cur = atomic_cmpxchg(&data->lock, -1, cpu); |
2567 | 2712 | if (cur == -1) { | |
2568 | if (in_nmi() && atomic_read(&data->lock) == cpu) | 2713 | handle->locked = 1; |
2569 | return; | 2714 | break; |
2715 | } | ||
2716 | if (cur == cpu) | ||
2717 | break; | ||
2570 | 2718 | ||
2571 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
2572 | cpu_relax(); | 2719 | cpu_relax(); |
2573 | 2720 | } | |
2574 | handle->locked = 1; | ||
2575 | } | 2721 | } |
2576 | 2722 | ||
2577 | static void perf_output_unlock(struct perf_output_handle *handle) | 2723 | static void perf_output_unlock(struct perf_output_handle *handle) |
@@ -2617,14 +2763,14 @@ again: | |||
2617 | if (atomic_xchg(&data->wakeup, 0)) | 2763 | if (atomic_xchg(&data->wakeup, 0)) |
2618 | perf_output_wakeup(handle); | 2764 | perf_output_wakeup(handle); |
2619 | out: | 2765 | out: |
2620 | local_irq_restore(handle->flags); | 2766 | put_cpu(); |
2621 | } | 2767 | } |
2622 | 2768 | ||
2623 | void perf_output_copy(struct perf_output_handle *handle, | 2769 | void perf_output_copy(struct perf_output_handle *handle, |
2624 | const void *buf, unsigned int len) | 2770 | const void *buf, unsigned int len) |
2625 | { | 2771 | { |
2626 | unsigned int pages_mask; | 2772 | unsigned int pages_mask; |
2627 | unsigned int offset; | 2773 | unsigned long offset; |
2628 | unsigned int size; | 2774 | unsigned int size; |
2629 | void **pages; | 2775 | void **pages; |
2630 | 2776 | ||
@@ -2633,12 +2779,14 @@ void perf_output_copy(struct perf_output_handle *handle, | |||
2633 | pages = handle->data->data_pages; | 2779 | pages = handle->data->data_pages; |
2634 | 2780 | ||
2635 | do { | 2781 | do { |
2636 | unsigned int page_offset; | 2782 | unsigned long page_offset; |
2783 | unsigned long page_size; | ||
2637 | int nr; | 2784 | int nr; |
2638 | 2785 | ||
2639 | nr = (offset >> PAGE_SHIFT) & pages_mask; | 2786 | nr = (offset >> PAGE_SHIFT) & pages_mask; |
2640 | page_offset = offset & (PAGE_SIZE - 1); | 2787 | page_size = 1UL << (handle->data->data_order + PAGE_SHIFT); |
2641 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | 2788 | page_offset = offset & (page_size - 1); |
2789 | size = min_t(unsigned int, page_size - page_offset, len); | ||
2642 | 2790 | ||
2643 | memcpy(pages[nr] + page_offset, buf, size); | 2791 | memcpy(pages[nr] + page_offset, buf, size); |
2644 | 2792 | ||
@@ -3126,15 +3274,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
3126 | { | 3274 | { |
3127 | struct perf_event *event; | 3275 | struct perf_event *event; |
3128 | 3276 | ||
3129 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3130 | return; | ||
3131 | |||
3132 | rcu_read_lock(); | ||
3133 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3277 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3134 | if (perf_event_task_match(event)) | 3278 | if (perf_event_task_match(event)) |
3135 | perf_event_task_output(event, task_event); | 3279 | perf_event_task_output(event, task_event); |
3136 | } | 3280 | } |
3137 | rcu_read_unlock(); | ||
3138 | } | 3281 | } |
3139 | 3282 | ||
3140 | static void perf_event_task_event(struct perf_task_event *task_event) | 3283 | static void perf_event_task_event(struct perf_task_event *task_event) |
@@ -3142,11 +3285,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3142 | struct perf_cpu_context *cpuctx; | 3285 | struct perf_cpu_context *cpuctx; |
3143 | struct perf_event_context *ctx = task_event->task_ctx; | 3286 | struct perf_event_context *ctx = task_event->task_ctx; |
3144 | 3287 | ||
3288 | rcu_read_lock(); | ||
3145 | cpuctx = &get_cpu_var(perf_cpu_context); | 3289 | cpuctx = &get_cpu_var(perf_cpu_context); |
3146 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3290 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3147 | put_cpu_var(perf_cpu_context); | 3291 | put_cpu_var(perf_cpu_context); |
3148 | 3292 | ||
3149 | rcu_read_lock(); | ||
3150 | if (!ctx) | 3293 | if (!ctx) |
3151 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); | 3294 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); |
3152 | if (ctx) | 3295 | if (ctx) |
@@ -3238,15 +3381,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx, | |||
3238 | { | 3381 | { |
3239 | struct perf_event *event; | 3382 | struct perf_event *event; |
3240 | 3383 | ||
3241 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3242 | return; | ||
3243 | |||
3244 | rcu_read_lock(); | ||
3245 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3384 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3246 | if (perf_event_comm_match(event)) | 3385 | if (perf_event_comm_match(event)) |
3247 | perf_event_comm_output(event, comm_event); | 3386 | perf_event_comm_output(event, comm_event); |
3248 | } | 3387 | } |
3249 | rcu_read_unlock(); | ||
3250 | } | 3388 | } |
3251 | 3389 | ||
3252 | static void perf_event_comm_event(struct perf_comm_event *comm_event) | 3390 | static void perf_event_comm_event(struct perf_comm_event *comm_event) |
@@ -3257,7 +3395,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3257 | char comm[TASK_COMM_LEN]; | 3395 | char comm[TASK_COMM_LEN]; |
3258 | 3396 | ||
3259 | memset(comm, 0, sizeof(comm)); | 3397 | memset(comm, 0, sizeof(comm)); |
3260 | strncpy(comm, comm_event->task->comm, sizeof(comm)); | 3398 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); |
3261 | size = ALIGN(strlen(comm)+1, sizeof(u64)); | 3399 | size = ALIGN(strlen(comm)+1, sizeof(u64)); |
3262 | 3400 | ||
3263 | comm_event->comm = comm; | 3401 | comm_event->comm = comm; |
@@ -3265,11 +3403,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3265 | 3403 | ||
3266 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 3404 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3267 | 3405 | ||
3406 | rcu_read_lock(); | ||
3268 | cpuctx = &get_cpu_var(perf_cpu_context); | 3407 | cpuctx = &get_cpu_var(perf_cpu_context); |
3269 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3408 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3270 | put_cpu_var(perf_cpu_context); | 3409 | put_cpu_var(perf_cpu_context); |
3271 | 3410 | ||
3272 | rcu_read_lock(); | ||
3273 | /* | 3411 | /* |
3274 | * doesn't really matter which of the child contexts the | 3412 | * doesn't really matter which of the child contexts the |
3275 | * events ends up in. | 3413 | * events ends up in. |
@@ -3362,15 +3500,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx, | |||
3362 | { | 3500 | { |
3363 | struct perf_event *event; | 3501 | struct perf_event *event; |
3364 | 3502 | ||
3365 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3366 | return; | ||
3367 | |||
3368 | rcu_read_lock(); | ||
3369 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3503 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3370 | if (perf_event_mmap_match(event, mmap_event)) | 3504 | if (perf_event_mmap_match(event, mmap_event)) |
3371 | perf_event_mmap_output(event, mmap_event); | 3505 | perf_event_mmap_output(event, mmap_event); |
3372 | } | 3506 | } |
3373 | rcu_read_unlock(); | ||
3374 | } | 3507 | } |
3375 | 3508 | ||
3376 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | 3509 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) |
@@ -3426,11 +3559,11 @@ got_name: | |||
3426 | 3559 | ||
3427 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 3560 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
3428 | 3561 | ||
3562 | rcu_read_lock(); | ||
3429 | cpuctx = &get_cpu_var(perf_cpu_context); | 3563 | cpuctx = &get_cpu_var(perf_cpu_context); |
3430 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); | 3564 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); |
3431 | put_cpu_var(perf_cpu_context); | 3565 | put_cpu_var(perf_cpu_context); |
3432 | 3566 | ||
3433 | rcu_read_lock(); | ||
3434 | /* | 3567 | /* |
3435 | * doesn't really matter which of the child contexts the | 3568 | * doesn't really matter which of the child contexts the |
3436 | * events ends up in. | 3569 | * events ends up in. |
@@ -3569,7 +3702,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3569 | perf_event_disable(event); | 3702 | perf_event_disable(event); |
3570 | } | 3703 | } |
3571 | 3704 | ||
3572 | perf_event_output(event, nmi, data, regs); | 3705 | if (event->overflow_handler) |
3706 | event->overflow_handler(event, nmi, data, regs); | ||
3707 | else | ||
3708 | perf_event_output(event, nmi, data, regs); | ||
3709 | |||
3573 | return ret; | 3710 | return ret; |
3574 | } | 3711 | } |
3575 | 3712 | ||
@@ -3614,16 +3751,16 @@ again: | |||
3614 | return nr; | 3751 | return nr; |
3615 | } | 3752 | } |
3616 | 3753 | ||
3617 | static void perf_swevent_overflow(struct perf_event *event, | 3754 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, |
3618 | int nmi, struct perf_sample_data *data, | 3755 | int nmi, struct perf_sample_data *data, |
3619 | struct pt_regs *regs) | 3756 | struct pt_regs *regs) |
3620 | { | 3757 | { |
3621 | struct hw_perf_event *hwc = &event->hw; | 3758 | struct hw_perf_event *hwc = &event->hw; |
3622 | int throttle = 0; | 3759 | int throttle = 0; |
3623 | u64 overflow; | ||
3624 | 3760 | ||
3625 | data->period = event->hw.last_period; | 3761 | data->period = event->hw.last_period; |
3626 | overflow = perf_swevent_set_period(event); | 3762 | if (!overflow) |
3763 | overflow = perf_swevent_set_period(event); | ||
3627 | 3764 | ||
3628 | if (hwc->interrupts == MAX_INTERRUPTS) | 3765 | if (hwc->interrupts == MAX_INTERRUPTS) |
3629 | return; | 3766 | return; |
@@ -3656,14 +3793,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
3656 | 3793 | ||
3657 | atomic64_add(nr, &event->count); | 3794 | atomic64_add(nr, &event->count); |
3658 | 3795 | ||
3796 | if (!regs) | ||
3797 | return; | ||
3798 | |||
3659 | if (!hwc->sample_period) | 3799 | if (!hwc->sample_period) |
3660 | return; | 3800 | return; |
3661 | 3801 | ||
3662 | if (!regs) | 3802 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
3803 | return perf_swevent_overflow(event, 1, nmi, data, regs); | ||
3804 | |||
3805 | if (atomic64_add_negative(nr, &hwc->period_left)) | ||
3663 | return; | 3806 | return; |
3664 | 3807 | ||
3665 | if (!atomic64_add_negative(nr, &hwc->period_left)) | 3808 | perf_swevent_overflow(event, 0, nmi, data, regs); |
3666 | perf_swevent_overflow(event, nmi, data, regs); | ||
3667 | } | 3809 | } |
3668 | 3810 | ||
3669 | static int perf_swevent_is_counting(struct perf_event *event) | 3811 | static int perf_swevent_is_counting(struct perf_event *event) |
@@ -3696,25 +3838,44 @@ static int perf_swevent_is_counting(struct perf_event *event) | |||
3696 | return 1; | 3838 | return 1; |
3697 | } | 3839 | } |
3698 | 3840 | ||
3841 | static int perf_tp_event_match(struct perf_event *event, | ||
3842 | struct perf_sample_data *data); | ||
3843 | |||
3844 | static int perf_exclude_event(struct perf_event *event, | ||
3845 | struct pt_regs *regs) | ||
3846 | { | ||
3847 | if (regs) { | ||
3848 | if (event->attr.exclude_user && user_mode(regs)) | ||
3849 | return 1; | ||
3850 | |||
3851 | if (event->attr.exclude_kernel && !user_mode(regs)) | ||
3852 | return 1; | ||
3853 | } | ||
3854 | |||
3855 | return 0; | ||
3856 | } | ||
3857 | |||
3699 | static int perf_swevent_match(struct perf_event *event, | 3858 | static int perf_swevent_match(struct perf_event *event, |
3700 | enum perf_type_id type, | 3859 | enum perf_type_id type, |
3701 | u32 event_id, struct pt_regs *regs) | 3860 | u32 event_id, |
3861 | struct perf_sample_data *data, | ||
3862 | struct pt_regs *regs) | ||
3702 | { | 3863 | { |
3703 | if (!perf_swevent_is_counting(event)) | 3864 | if (!perf_swevent_is_counting(event)) |
3704 | return 0; | 3865 | return 0; |
3705 | 3866 | ||
3706 | if (event->attr.type != type) | 3867 | if (event->attr.type != type) |
3707 | return 0; | 3868 | return 0; |
3869 | |||
3708 | if (event->attr.config != event_id) | 3870 | if (event->attr.config != event_id) |
3709 | return 0; | 3871 | return 0; |
3710 | 3872 | ||
3711 | if (regs) { | 3873 | if (perf_exclude_event(event, regs)) |
3712 | if (event->attr.exclude_user && user_mode(regs)) | 3874 | return 0; |
3713 | return 0; | ||
3714 | 3875 | ||
3715 | if (event->attr.exclude_kernel && !user_mode(regs)) | 3876 | if (event->attr.type == PERF_TYPE_TRACEPOINT && |
3716 | return 0; | 3877 | !perf_tp_event_match(event, data)) |
3717 | } | 3878 | return 0; |
3718 | 3879 | ||
3719 | return 1; | 3880 | return 1; |
3720 | } | 3881 | } |
@@ -3727,49 +3888,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, | |||
3727 | { | 3888 | { |
3728 | struct perf_event *event; | 3889 | struct perf_event *event; |
3729 | 3890 | ||
3730 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3731 | return; | ||
3732 | |||
3733 | rcu_read_lock(); | ||
3734 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3891 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3735 | if (perf_swevent_match(event, type, event_id, regs)) | 3892 | if (perf_swevent_match(event, type, event_id, data, regs)) |
3736 | perf_swevent_add(event, nr, nmi, data, regs); | 3893 | perf_swevent_add(event, nr, nmi, data, regs); |
3737 | } | 3894 | } |
3738 | rcu_read_unlock(); | ||
3739 | } | 3895 | } |
3740 | 3896 | ||
3741 | static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) | 3897 | int perf_swevent_get_recursion_context(void) |
3742 | { | 3898 | { |
3899 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | ||
3900 | int rctx; | ||
3901 | |||
3743 | if (in_nmi()) | 3902 | if (in_nmi()) |
3744 | return &cpuctx->recursion[3]; | 3903 | rctx = 3; |
3904 | else if (in_irq()) | ||
3905 | rctx = 2; | ||
3906 | else if (in_softirq()) | ||
3907 | rctx = 1; | ||
3908 | else | ||
3909 | rctx = 0; | ||
3745 | 3910 | ||
3746 | if (in_irq()) | 3911 | if (cpuctx->recursion[rctx]) { |
3747 | return &cpuctx->recursion[2]; | 3912 | put_cpu_var(perf_cpu_context); |
3913 | return -1; | ||
3914 | } | ||
3748 | 3915 | ||
3749 | if (in_softirq()) | 3916 | cpuctx->recursion[rctx]++; |
3750 | return &cpuctx->recursion[1]; | 3917 | barrier(); |
3751 | 3918 | ||
3752 | return &cpuctx->recursion[0]; | 3919 | return rctx; |
3920 | } | ||
3921 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | ||
3922 | |||
3923 | void perf_swevent_put_recursion_context(int rctx) | ||
3924 | { | ||
3925 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3926 | barrier(); | ||
3927 | cpuctx->recursion[rctx]--; | ||
3928 | put_cpu_var(perf_cpu_context); | ||
3753 | } | 3929 | } |
3930 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | ||
3754 | 3931 | ||
3755 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | 3932 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
3756 | u64 nr, int nmi, | 3933 | u64 nr, int nmi, |
3757 | struct perf_sample_data *data, | 3934 | struct perf_sample_data *data, |
3758 | struct pt_regs *regs) | 3935 | struct pt_regs *regs) |
3759 | { | 3936 | { |
3760 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 3937 | struct perf_cpu_context *cpuctx; |
3761 | int *recursion = perf_swevent_recursion_context(cpuctx); | ||
3762 | struct perf_event_context *ctx; | 3938 | struct perf_event_context *ctx; |
3763 | 3939 | ||
3764 | if (*recursion) | 3940 | cpuctx = &__get_cpu_var(perf_cpu_context); |
3765 | goto out; | 3941 | rcu_read_lock(); |
3766 | |||
3767 | (*recursion)++; | ||
3768 | barrier(); | ||
3769 | |||
3770 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, | 3942 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, |
3771 | nr, nmi, data, regs); | 3943 | nr, nmi, data, regs); |
3772 | rcu_read_lock(); | ||
3773 | /* | 3944 | /* |
3774 | * doesn't really matter which of the child contexts the | 3945 | * doesn't really matter which of the child contexts the |
3775 | * events ends up in. | 3946 | * events ends up in. |
@@ -3778,23 +3949,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
3778 | if (ctx) | 3949 | if (ctx) |
3779 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); | 3950 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); |
3780 | rcu_read_unlock(); | 3951 | rcu_read_unlock(); |
3781 | |||
3782 | barrier(); | ||
3783 | (*recursion)--; | ||
3784 | |||
3785 | out: | ||
3786 | put_cpu_var(perf_cpu_context); | ||
3787 | } | 3952 | } |
3788 | 3953 | ||
3789 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 3954 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
3790 | struct pt_regs *regs, u64 addr) | 3955 | struct pt_regs *regs, u64 addr) |
3791 | { | 3956 | { |
3792 | struct perf_sample_data data = { | 3957 | struct perf_sample_data data; |
3793 | .addr = addr, | 3958 | int rctx; |
3794 | }; | ||
3795 | 3959 | ||
3796 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, | 3960 | rctx = perf_swevent_get_recursion_context(); |
3797 | &data, regs); | 3961 | if (rctx < 0) |
3962 | return; | ||
3963 | |||
3964 | data.addr = addr; | ||
3965 | data.raw = NULL; | ||
3966 | |||
3967 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | ||
3968 | |||
3969 | perf_swevent_put_recursion_context(rctx); | ||
3798 | } | 3970 | } |
3799 | 3971 | ||
3800 | static void perf_swevent_read(struct perf_event *event) | 3972 | static void perf_swevent_read(struct perf_event *event) |
@@ -3839,6 +4011,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3839 | event->pmu->read(event); | 4011 | event->pmu->read(event); |
3840 | 4012 | ||
3841 | data.addr = 0; | 4013 | data.addr = 0; |
4014 | data.period = event->hw.last_period; | ||
3842 | regs = get_irq_regs(); | 4015 | regs = get_irq_regs(); |
3843 | /* | 4016 | /* |
3844 | * In case we exclude kernel IPs or are somehow not in interrupt | 4017 | * In case we exclude kernel IPs or are somehow not in interrupt |
@@ -3849,8 +4022,9 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3849 | regs = task_pt_regs(current); | 4022 | regs = task_pt_regs(current); |
3850 | 4023 | ||
3851 | if (regs) { | 4024 | if (regs) { |
3852 | if (perf_event_overflow(event, 0, &data, regs)) | 4025 | if (!(event->attr.exclude_idle && current->pid == 0)) |
3853 | ret = HRTIMER_NORESTART; | 4026 | if (perf_event_overflow(event, 0, &data, regs)) |
4027 | ret = HRTIMER_NORESTART; | ||
3854 | } | 4028 | } |
3855 | 4029 | ||
3856 | period = max_t(u64, 10000, event->hw.sample_period); | 4030 | period = max_t(u64, 10000, event->hw.sample_period); |
@@ -3859,6 +4033,42 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3859 | return ret; | 4033 | return ret; |
3860 | } | 4034 | } |
3861 | 4035 | ||
4036 | static void perf_swevent_start_hrtimer(struct perf_event *event) | ||
4037 | { | ||
4038 | struct hw_perf_event *hwc = &event->hw; | ||
4039 | |||
4040 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
4041 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4042 | if (hwc->sample_period) { | ||
4043 | u64 period; | ||
4044 | |||
4045 | if (hwc->remaining) { | ||
4046 | if (hwc->remaining < 0) | ||
4047 | period = 10000; | ||
4048 | else | ||
4049 | period = hwc->remaining; | ||
4050 | hwc->remaining = 0; | ||
4051 | } else { | ||
4052 | period = max_t(u64, 10000, hwc->sample_period); | ||
4053 | } | ||
4054 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4055 | ns_to_ktime(period), 0, | ||
4056 | HRTIMER_MODE_REL, 0); | ||
4057 | } | ||
4058 | } | ||
4059 | |||
4060 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | ||
4061 | { | ||
4062 | struct hw_perf_event *hwc = &event->hw; | ||
4063 | |||
4064 | if (hwc->sample_period) { | ||
4065 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4066 | hwc->remaining = ktime_to_ns(remaining); | ||
4067 | |||
4068 | hrtimer_cancel(&hwc->hrtimer); | ||
4069 | } | ||
4070 | } | ||
4071 | |||
3862 | /* | 4072 | /* |
3863 | * Software event: cpu wall time clock | 4073 | * Software event: cpu wall time clock |
3864 | */ | 4074 | */ |
@@ -3881,22 +4091,14 @@ static int cpu_clock_perf_event_enable(struct perf_event *event) | |||
3881 | int cpu = raw_smp_processor_id(); | 4091 | int cpu = raw_smp_processor_id(); |
3882 | 4092 | ||
3883 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); | 4093 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); |
3884 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4094 | perf_swevent_start_hrtimer(event); |
3885 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
3886 | if (hwc->sample_period) { | ||
3887 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3888 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3889 | ns_to_ktime(period), 0, | ||
3890 | HRTIMER_MODE_REL, 0); | ||
3891 | } | ||
3892 | 4095 | ||
3893 | return 0; | 4096 | return 0; |
3894 | } | 4097 | } |
3895 | 4098 | ||
3896 | static void cpu_clock_perf_event_disable(struct perf_event *event) | 4099 | static void cpu_clock_perf_event_disable(struct perf_event *event) |
3897 | { | 4100 | { |
3898 | if (event->hw.sample_period) | 4101 | perf_swevent_cancel_hrtimer(event); |
3899 | hrtimer_cancel(&event->hw.hrtimer); | ||
3900 | cpu_clock_perf_event_update(event); | 4102 | cpu_clock_perf_event_update(event); |
3901 | } | 4103 | } |
3902 | 4104 | ||
@@ -3933,22 +4135,15 @@ static int task_clock_perf_event_enable(struct perf_event *event) | |||
3933 | now = event->ctx->time; | 4135 | now = event->ctx->time; |
3934 | 4136 | ||
3935 | atomic64_set(&hwc->prev_count, now); | 4137 | atomic64_set(&hwc->prev_count, now); |
3936 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4138 | |
3937 | hwc->hrtimer.function = perf_swevent_hrtimer; | 4139 | perf_swevent_start_hrtimer(event); |
3938 | if (hwc->sample_period) { | ||
3939 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3940 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3941 | ns_to_ktime(period), 0, | ||
3942 | HRTIMER_MODE_REL, 0); | ||
3943 | } | ||
3944 | 4140 | ||
3945 | return 0; | 4141 | return 0; |
3946 | } | 4142 | } |
3947 | 4143 | ||
3948 | static void task_clock_perf_event_disable(struct perf_event *event) | 4144 | static void task_clock_perf_event_disable(struct perf_event *event) |
3949 | { | 4145 | { |
3950 | if (event->hw.sample_period) | 4146 | perf_swevent_cancel_hrtimer(event); |
3951 | hrtimer_cancel(&event->hw.hrtimer); | ||
3952 | task_clock_perf_event_update(event, event->ctx->time); | 4147 | task_clock_perf_event_update(event, event->ctx->time); |
3953 | 4148 | ||
3954 | } | 4149 | } |
@@ -3976,6 +4171,7 @@ static const struct pmu perf_ops_task_clock = { | |||
3976 | }; | 4171 | }; |
3977 | 4172 | ||
3978 | #ifdef CONFIG_EVENT_PROFILE | 4173 | #ifdef CONFIG_EVENT_PROFILE |
4174 | |||
3979 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4175 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
3980 | int entry_size) | 4176 | int entry_size) |
3981 | { | 4177 | { |
@@ -3994,13 +4190,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | |||
3994 | if (!regs) | 4190 | if (!regs) |
3995 | regs = task_pt_regs(current); | 4191 | regs = task_pt_regs(current); |
3996 | 4192 | ||
4193 | /* Trace events already protected against recursion */ | ||
3997 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, | 4194 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, |
3998 | &data, regs); | 4195 | &data, regs); |
3999 | } | 4196 | } |
4000 | EXPORT_SYMBOL_GPL(perf_tp_event); | 4197 | EXPORT_SYMBOL_GPL(perf_tp_event); |
4001 | 4198 | ||
4002 | extern int ftrace_profile_enable(int); | 4199 | static int perf_tp_event_match(struct perf_event *event, |
4003 | extern void ftrace_profile_disable(int); | 4200 | struct perf_sample_data *data) |
4201 | { | ||
4202 | void *record = data->raw->data; | ||
4203 | |||
4204 | if (likely(!event->filter) || filter_match_preds(event->filter, record)) | ||
4205 | return 1; | ||
4206 | return 0; | ||
4207 | } | ||
4004 | 4208 | ||
4005 | static void tp_perf_event_destroy(struct perf_event *event) | 4209 | static void tp_perf_event_destroy(struct perf_event *event) |
4006 | { | 4210 | { |
@@ -4025,11 +4229,99 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4025 | 4229 | ||
4026 | return &perf_ops_generic; | 4230 | return &perf_ops_generic; |
4027 | } | 4231 | } |
4232 | |||
4233 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | ||
4234 | { | ||
4235 | char *filter_str; | ||
4236 | int ret; | ||
4237 | |||
4238 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
4239 | return -EINVAL; | ||
4240 | |||
4241 | filter_str = strndup_user(arg, PAGE_SIZE); | ||
4242 | if (IS_ERR(filter_str)) | ||
4243 | return PTR_ERR(filter_str); | ||
4244 | |||
4245 | ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); | ||
4246 | |||
4247 | kfree(filter_str); | ||
4248 | return ret; | ||
4249 | } | ||
4250 | |||
4251 | static void perf_event_free_filter(struct perf_event *event) | ||
4252 | { | ||
4253 | ftrace_profile_free_filter(event); | ||
4254 | } | ||
4255 | |||
4028 | #else | 4256 | #else |
4257 | |||
4258 | static int perf_tp_event_match(struct perf_event *event, | ||
4259 | struct perf_sample_data *data) | ||
4260 | { | ||
4261 | return 1; | ||
4262 | } | ||
4263 | |||
4029 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4264 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
4030 | { | 4265 | { |
4031 | return NULL; | 4266 | return NULL; |
4032 | } | 4267 | } |
4268 | |||
4269 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | ||
4270 | { | ||
4271 | return -ENOENT; | ||
4272 | } | ||
4273 | |||
4274 | static void perf_event_free_filter(struct perf_event *event) | ||
4275 | { | ||
4276 | } | ||
4277 | |||
4278 | #endif /* CONFIG_EVENT_PROFILE */ | ||
4279 | |||
4280 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
4281 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4282 | { | ||
4283 | release_bp_slot(event); | ||
4284 | } | ||
4285 | |||
4286 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4287 | { | ||
4288 | int err; | ||
4289 | /* | ||
4290 | * The breakpoint is already filled if we haven't created the counter | ||
4291 | * through perf syscall | ||
4292 | * FIXME: manage to get trigerred to NULL if it comes from syscalls | ||
4293 | */ | ||
4294 | if (!bp->callback) | ||
4295 | err = register_perf_hw_breakpoint(bp); | ||
4296 | else | ||
4297 | err = __register_perf_hw_breakpoint(bp); | ||
4298 | if (err) | ||
4299 | return ERR_PTR(err); | ||
4300 | |||
4301 | bp->destroy = bp_perf_event_destroy; | ||
4302 | |||
4303 | return &perf_ops_bp; | ||
4304 | } | ||
4305 | |||
4306 | void perf_bp_event(struct perf_event *bp, void *data) | ||
4307 | { | ||
4308 | struct perf_sample_data sample; | ||
4309 | struct pt_regs *regs = data; | ||
4310 | |||
4311 | sample.addr = bp->attr.bp_addr; | ||
4312 | |||
4313 | if (!perf_exclude_event(bp, regs)) | ||
4314 | perf_swevent_add(bp, 1, 1, &sample, regs); | ||
4315 | } | ||
4316 | #else | ||
4317 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4318 | { | ||
4319 | return NULL; | ||
4320 | } | ||
4321 | |||
4322 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4323 | { | ||
4324 | } | ||
4033 | #endif | 4325 | #endif |
4034 | 4326 | ||
4035 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4327 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
@@ -4076,6 +4368,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
4076 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | 4368 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: |
4077 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | 4369 | case PERF_COUNT_SW_CONTEXT_SWITCHES: |
4078 | case PERF_COUNT_SW_CPU_MIGRATIONS: | 4370 | case PERF_COUNT_SW_CPU_MIGRATIONS: |
4371 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | ||
4372 | case PERF_COUNT_SW_EMULATION_FAULTS: | ||
4079 | if (!event->parent) { | 4373 | if (!event->parent) { |
4080 | atomic_inc(&perf_swevent_enabled[event_id]); | 4374 | atomic_inc(&perf_swevent_enabled[event_id]); |
4081 | event->destroy = sw_perf_event_destroy; | 4375 | event->destroy = sw_perf_event_destroy; |
@@ -4096,6 +4390,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4096 | struct perf_event_context *ctx, | 4390 | struct perf_event_context *ctx, |
4097 | struct perf_event *group_leader, | 4391 | struct perf_event *group_leader, |
4098 | struct perf_event *parent_event, | 4392 | struct perf_event *parent_event, |
4393 | perf_callback_t callback, | ||
4099 | gfp_t gfpflags) | 4394 | gfp_t gfpflags) |
4100 | { | 4395 | { |
4101 | const struct pmu *pmu; | 4396 | const struct pmu *pmu; |
@@ -4138,6 +4433,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4138 | 4433 | ||
4139 | event->state = PERF_EVENT_STATE_INACTIVE; | 4434 | event->state = PERF_EVENT_STATE_INACTIVE; |
4140 | 4435 | ||
4436 | if (!callback && parent_event) | ||
4437 | callback = parent_event->callback; | ||
4438 | |||
4439 | event->callback = callback; | ||
4440 | |||
4141 | if (attr->disabled) | 4441 | if (attr->disabled) |
4142 | event->state = PERF_EVENT_STATE_OFF; | 4442 | event->state = PERF_EVENT_STATE_OFF; |
4143 | 4443 | ||
@@ -4172,6 +4472,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4172 | pmu = tp_perf_event_init(event); | 4472 | pmu = tp_perf_event_init(event); |
4173 | break; | 4473 | break; |
4174 | 4474 | ||
4475 | case PERF_TYPE_BREAKPOINT: | ||
4476 | pmu = bp_perf_event_init(event); | ||
4477 | break; | ||
4478 | |||
4479 | |||
4175 | default: | 4480 | default: |
4176 | break; | 4481 | break; |
4177 | } | 4482 | } |
@@ -4284,7 +4589,7 @@ err_size: | |||
4284 | goto out; | 4589 | goto out; |
4285 | } | 4590 | } |
4286 | 4591 | ||
4287 | int perf_event_set_output(struct perf_event *event, int output_fd) | 4592 | static int perf_event_set_output(struct perf_event *event, int output_fd) |
4288 | { | 4593 | { |
4289 | struct perf_event *output_event = NULL; | 4594 | struct perf_event *output_event = NULL; |
4290 | struct file *output_file = NULL; | 4595 | struct file *output_file = NULL; |
@@ -4414,7 +4719,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
4414 | } | 4719 | } |
4415 | 4720 | ||
4416 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | 4721 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
4417 | NULL, GFP_KERNEL); | 4722 | NULL, NULL, GFP_KERNEL); |
4418 | err = PTR_ERR(event); | 4723 | err = PTR_ERR(event); |
4419 | if (IS_ERR(event)) | 4724 | if (IS_ERR(event)) |
4420 | goto err_put_context; | 4725 | goto err_put_context; |
@@ -4462,6 +4767,60 @@ err_put_context: | |||
4462 | return err; | 4767 | return err; |
4463 | } | 4768 | } |
4464 | 4769 | ||
4770 | /** | ||
4771 | * perf_event_create_kernel_counter | ||
4772 | * | ||
4773 | * @attr: attributes of the counter to create | ||
4774 | * @cpu: cpu in which the counter is bound | ||
4775 | * @pid: task to profile | ||
4776 | */ | ||
4777 | struct perf_event * | ||
4778 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | ||
4779 | pid_t pid, perf_callback_t callback) | ||
4780 | { | ||
4781 | struct perf_event *event; | ||
4782 | struct perf_event_context *ctx; | ||
4783 | int err; | ||
4784 | |||
4785 | /* | ||
4786 | * Get the target context (task or percpu): | ||
4787 | */ | ||
4788 | |||
4789 | ctx = find_get_context(pid, cpu); | ||
4790 | if (IS_ERR(ctx)) { | ||
4791 | err = PTR_ERR(ctx); | ||
4792 | goto err_exit; | ||
4793 | } | ||
4794 | |||
4795 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
4796 | NULL, callback, GFP_KERNEL); | ||
4797 | if (IS_ERR(event)) { | ||
4798 | err = PTR_ERR(event); | ||
4799 | goto err_put_context; | ||
4800 | } | ||
4801 | |||
4802 | event->filp = NULL; | ||
4803 | WARN_ON_ONCE(ctx->parent_ctx); | ||
4804 | mutex_lock(&ctx->mutex); | ||
4805 | perf_install_in_context(ctx, event, cpu); | ||
4806 | ++ctx->generation; | ||
4807 | mutex_unlock(&ctx->mutex); | ||
4808 | |||
4809 | event->owner = current; | ||
4810 | get_task_struct(current); | ||
4811 | mutex_lock(¤t->perf_event_mutex); | ||
4812 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
4813 | mutex_unlock(¤t->perf_event_mutex); | ||
4814 | |||
4815 | return event; | ||
4816 | |||
4817 | err_put_context: | ||
4818 | put_ctx(ctx); | ||
4819 | err_exit: | ||
4820 | return ERR_PTR(err); | ||
4821 | } | ||
4822 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | ||
4823 | |||
4465 | /* | 4824 | /* |
4466 | * inherit a event from parent task to child task: | 4825 | * inherit a event from parent task to child task: |
4467 | */ | 4826 | */ |
@@ -4487,7 +4846,7 @@ inherit_event(struct perf_event *parent_event, | |||
4487 | child_event = perf_event_alloc(&parent_event->attr, | 4846 | child_event = perf_event_alloc(&parent_event->attr, |
4488 | parent_event->cpu, child_ctx, | 4847 | parent_event->cpu, child_ctx, |
4489 | group_leader, parent_event, | 4848 | group_leader, parent_event, |
4490 | GFP_KERNEL); | 4849 | NULL, GFP_KERNEL); |
4491 | if (IS_ERR(child_event)) | 4850 | if (IS_ERR(child_event)) |
4492 | return child_event; | 4851 | return child_event; |
4493 | get_ctx(child_ctx); | 4852 | get_ctx(child_ctx); |
@@ -4505,6 +4864,8 @@ inherit_event(struct perf_event *parent_event, | |||
4505 | if (parent_event->attr.freq) | 4864 | if (parent_event->attr.freq) |
4506 | child_event->hw.sample_period = parent_event->hw.sample_period; | 4865 | child_event->hw.sample_period = parent_event->hw.sample_period; |
4507 | 4866 | ||
4867 | child_event->overflow_handler = parent_event->overflow_handler; | ||
4868 | |||
4508 | /* | 4869 | /* |
4509 | * Link it up in the child's context: | 4870 | * Link it up in the child's context: |
4510 | */ | 4871 | */ |
@@ -4594,7 +4955,6 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
4594 | { | 4955 | { |
4595 | struct perf_event *parent_event; | 4956 | struct perf_event *parent_event; |
4596 | 4957 | ||
4597 | update_event_times(child_event); | ||
4598 | perf_event_remove_from_context(child_event); | 4958 | perf_event_remove_from_context(child_event); |
4599 | 4959 | ||
4600 | parent_event = child_event->parent; | 4960 | parent_event = child_event->parent; |
@@ -4646,6 +5006,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
4646 | * the events from it. | 5006 | * the events from it. |
4647 | */ | 5007 | */ |
4648 | unclone_ctx(child_ctx); | 5008 | unclone_ctx(child_ctx); |
5009 | update_context_time(child_ctx); | ||
4649 | spin_unlock_irqrestore(&child_ctx->lock, flags); | 5010 | spin_unlock_irqrestore(&child_ctx->lock, flags); |
4650 | 5011 | ||
4651 | /* | 5012 | /* |
@@ -4781,9 +5142,7 @@ int perf_event_init_task(struct task_struct *child) | |||
4781 | * We dont have to disable NMIs - we are only looking at | 5142 | * We dont have to disable NMIs - we are only looking at |
4782 | * the list, not manipulating it: | 5143 | * the list, not manipulating it: |
4783 | */ | 5144 | */ |
4784 | list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) { | 5145 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { |
4785 | if (event != event->group_leader) | ||
4786 | continue; | ||
4787 | 5146 | ||
4788 | if (!event->attr.inherit) { | 5147 | if (!event->attr.inherit) { |
4789 | inherited_all = 0; | 5148 | inherited_all = 0; |