diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 19:44:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 19:44:39 -0400 |
commit | 4d4abdcb1dee03a4f9d6d2021622ed07e14dfd17 (patch) | |
tree | 4ed4c74b70240451065165fda5fb2059f8c6b1e5 /kernel | |
parent | 0342cbcfced2ee937d7c8e1c63f3d3082da7c7dc (diff) | |
parent | 7fcfd1abd6480d3b9ef17f5759c175e036e835cf (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (123 commits)
perf: Remove the nmi parameter from the oprofile_perf backend
x86, perf: Make copy_from_user_nmi() a library function
perf: Remove perf_event_attr::type check
x86, perf: P4 PMU - Fix typos in comments and style cleanup
perf tools: Make test use the preset debugfs path
perf tools: Add automated tests for events parsing
perf tools: De-opt the parse_events function
perf script: Fix display of IP address for non-callchain path
perf tools: Fix endian conversion reading event attr from file header
perf tools: Add missing 'node' alias to the hw_cache[] array
perf probe: Support adding probes on offline kernel modules
perf probe: Add probed module in front of function
perf probe: Introduce debuginfo to encapsulate dwarf information
perf-probe: Move dwarf library routines to dwarf-aux.{c, h}
perf probe: Remove redundant dwarf functions
perf probe: Move strtailcmp to string.c
perf probe: Rename DIE_FIND_CB_FOUND to DIE_FIND_CB_END
tracing/kprobe: Update symbol reference when loading module
tracing/kprobes: Support module init function probing
kprobes: Return -ENOENT if probe point doesn't exist
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/async.c | 12 | ||||
-rw-r--r-- | kernel/events/Makefile | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 938 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 10 | ||||
-rw-r--r-- | kernel/events/internal.h | 96 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 380 | ||||
-rw-r--r-- | kernel/kprobes.c | 33 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/stacktrace.c | 12 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 157 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 66 | ||||
-rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 326 | ||||
-rw-r--r-- | kernel/trace/trace.h | 61 | ||||
-rw-r--r-- | kernel/trace/trace_entries.h | 3 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 139 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_functions.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 225 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 324 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 11 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 13 | ||||
-rw-r--r-- | kernel/watchdog.c | 8 |
25 files changed, 1655 insertions, 1182 deletions
diff --git a/kernel/async.c b/kernel/async.c index cd9dbb913c77..d5fe7af0de2e 100644 --- a/kernel/async.c +++ b/kernel/async.c | |||
@@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel. | |||
49 | */ | 49 | */ |
50 | 50 | ||
51 | #include <linux/async.h> | 51 | #include <linux/async.h> |
52 | #include <linux/atomic.h> | ||
53 | #include <linux/ktime.h> | ||
52 | #include <linux/module.h> | 54 | #include <linux/module.h> |
53 | #include <linux/wait.h> | 55 | #include <linux/wait.h> |
54 | #include <linux/sched.h> | 56 | #include <linux/sched.h> |
55 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
56 | #include <linux/workqueue.h> | 58 | #include <linux/workqueue.h> |
57 | #include <asm/atomic.h> | ||
58 | 59 | ||
59 | static async_cookie_t next_cookie = 1; | 60 | static async_cookie_t next_cookie = 1; |
60 | 61 | ||
@@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work) | |||
128 | 129 | ||
129 | /* 2) run (and print duration) */ | 130 | /* 2) run (and print duration) */ |
130 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | 131 | if (initcall_debug && system_state == SYSTEM_BOOTING) { |
131 | printk("calling %lli_%pF @ %i\n", (long long)entry->cookie, | 132 | printk(KERN_DEBUG "calling %lli_%pF @ %i\n", |
133 | (long long)entry->cookie, | ||
132 | entry->func, task_pid_nr(current)); | 134 | entry->func, task_pid_nr(current)); |
133 | calltime = ktime_get(); | 135 | calltime = ktime_get(); |
134 | } | 136 | } |
@@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work) | |||
136 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | 138 | if (initcall_debug && system_state == SYSTEM_BOOTING) { |
137 | rettime = ktime_get(); | 139 | rettime = ktime_get(); |
138 | delta = ktime_sub(rettime, calltime); | 140 | delta = ktime_sub(rettime, calltime); |
139 | printk("initcall %lli_%pF returned 0 after %lld usecs\n", | 141 | printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n", |
140 | (long long)entry->cookie, | 142 | (long long)entry->cookie, |
141 | entry->func, | 143 | entry->func, |
142 | (long long)ktime_to_ns(delta) >> 10); | 144 | (long long)ktime_to_ns(delta) >> 10); |
@@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, | |||
270 | ktime_t starttime, delta, endtime; | 272 | ktime_t starttime, delta, endtime; |
271 | 273 | ||
272 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | 274 | if (initcall_debug && system_state == SYSTEM_BOOTING) { |
273 | printk("async_waiting @ %i\n", task_pid_nr(current)); | 275 | printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); |
274 | starttime = ktime_get(); | 276 | starttime = ktime_get(); |
275 | } | 277 | } |
276 | 278 | ||
@@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, | |||
280 | endtime = ktime_get(); | 282 | endtime = ktime_get(); |
281 | delta = ktime_sub(endtime, starttime); | 283 | delta = ktime_sub(endtime, starttime); |
282 | 284 | ||
283 | printk("async_continuing @ %i after %lli usec\n", | 285 | printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n", |
284 | task_pid_nr(current), | 286 | task_pid_nr(current), |
285 | (long long)ktime_to_ns(delta) >> 10); | 287 | (long long)ktime_to_ns(delta) >> 10); |
286 | } | 288 | } |
diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 1ce23d3d8394..89e5e8aa4c36 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile | |||
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER | |||
2 | CFLAGS_REMOVE_core.o = -pg | 2 | CFLAGS_REMOVE_core.o = -pg |
3 | endif | 3 | endif |
4 | 4 | ||
5 | obj-y := core.o | 5 | obj-y := core.o ring_buffer.o |
6 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 6 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 9efe7108ccaf..b8785e26ee1c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -36,6 +36,8 @@ | |||
36 | #include <linux/ftrace_event.h> | 36 | #include <linux/ftrace_event.h> |
37 | #include <linux/hw_breakpoint.h> | 37 | #include <linux/hw_breakpoint.h> |
38 | 38 | ||
39 | #include "internal.h" | ||
40 | |||
39 | #include <asm/irq_regs.h> | 41 | #include <asm/irq_regs.h> |
40 | 42 | ||
41 | struct remote_function_call { | 43 | struct remote_function_call { |
@@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx) | |||
200 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); | 202 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); |
201 | } | 203 | } |
202 | 204 | ||
205 | static void perf_ctx_lock(struct perf_cpu_context *cpuctx, | ||
206 | struct perf_event_context *ctx) | ||
207 | { | ||
208 | raw_spin_lock(&cpuctx->ctx.lock); | ||
209 | if (ctx) | ||
210 | raw_spin_lock(&ctx->lock); | ||
211 | } | ||
212 | |||
213 | static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, | ||
214 | struct perf_event_context *ctx) | ||
215 | { | ||
216 | if (ctx) | ||
217 | raw_spin_unlock(&ctx->lock); | ||
218 | raw_spin_unlock(&cpuctx->ctx.lock); | ||
219 | } | ||
220 | |||
203 | #ifdef CONFIG_CGROUP_PERF | 221 | #ifdef CONFIG_CGROUP_PERF |
204 | 222 | ||
205 | /* | 223 | /* |
@@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
340 | rcu_read_lock(); | 358 | rcu_read_lock(); |
341 | 359 | ||
342 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 360 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
343 | |||
344 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 361 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
345 | 362 | ||
346 | perf_pmu_disable(cpuctx->ctx.pmu); | ||
347 | |||
348 | /* | 363 | /* |
349 | * perf_cgroup_events says at least one | 364 | * perf_cgroup_events says at least one |
350 | * context on this CPU has cgroup events. | 365 | * context on this CPU has cgroup events. |
@@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
353 | * events for a context. | 368 | * events for a context. |
354 | */ | 369 | */ |
355 | if (cpuctx->ctx.nr_cgroups > 0) { | 370 | if (cpuctx->ctx.nr_cgroups > 0) { |
371 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
372 | perf_pmu_disable(cpuctx->ctx.pmu); | ||
356 | 373 | ||
357 | if (mode & PERF_CGROUP_SWOUT) { | 374 | if (mode & PERF_CGROUP_SWOUT) { |
358 | cpu_ctx_sched_out(cpuctx, EVENT_ALL); | 375 | cpu_ctx_sched_out(cpuctx, EVENT_ALL); |
@@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
372 | cpuctx->cgrp = perf_cgroup_from_task(task); | 389 | cpuctx->cgrp = perf_cgroup_from_task(task); |
373 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); | 390 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); |
374 | } | 391 | } |
392 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
393 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
375 | } | 394 | } |
376 | |||
377 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
378 | } | 395 | } |
379 | 396 | ||
380 | rcu_read_unlock(); | 397 | rcu_read_unlock(); |
@@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event) | |||
731 | 748 | ||
732 | /* | 749 | /* |
733 | * Update the total_time_enabled and total_time_running fields for a event. | 750 | * Update the total_time_enabled and total_time_running fields for a event. |
751 | * The caller of this function needs to hold the ctx->lock. | ||
734 | */ | 752 | */ |
735 | static void update_event_times(struct perf_event *event) | 753 | static void update_event_times(struct perf_event *event) |
736 | { | 754 | { |
@@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info) | |||
1105 | raw_spin_lock(&ctx->lock); | 1123 | raw_spin_lock(&ctx->lock); |
1106 | event_sched_out(event, cpuctx, ctx); | 1124 | event_sched_out(event, cpuctx, ctx); |
1107 | list_del_event(event, ctx); | 1125 | list_del_event(event, ctx); |
1126 | if (!ctx->nr_events && cpuctx->task_ctx == ctx) { | ||
1127 | ctx->is_active = 0; | ||
1128 | cpuctx->task_ctx = NULL; | ||
1129 | } | ||
1108 | raw_spin_unlock(&ctx->lock); | 1130 | raw_spin_unlock(&ctx->lock); |
1109 | 1131 | ||
1110 | return 0; | 1132 | return 0; |
@@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event, | |||
1454 | event->tstamp_stopped = tstamp; | 1476 | event->tstamp_stopped = tstamp; |
1455 | } | 1477 | } |
1456 | 1478 | ||
1457 | static void perf_event_context_sched_in(struct perf_event_context *ctx, | 1479 | static void task_ctx_sched_out(struct perf_event_context *ctx); |
1458 | struct task_struct *tsk); | 1480 | static void |
1481 | ctx_sched_in(struct perf_event_context *ctx, | ||
1482 | struct perf_cpu_context *cpuctx, | ||
1483 | enum event_type_t event_type, | ||
1484 | struct task_struct *task); | ||
1485 | |||
1486 | static void perf_event_sched_in(struct perf_cpu_context *cpuctx, | ||
1487 | struct perf_event_context *ctx, | ||
1488 | struct task_struct *task) | ||
1489 | { | ||
1490 | cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); | ||
1491 | if (ctx) | ||
1492 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); | ||
1493 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); | ||
1494 | if (ctx) | ||
1495 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); | ||
1496 | } | ||
1459 | 1497 | ||
1460 | /* | 1498 | /* |
1461 | * Cross CPU call to install and enable a performance event | 1499 | * Cross CPU call to install and enable a performance event |
@@ -1466,20 +1504,37 @@ static int __perf_install_in_context(void *info) | |||
1466 | { | 1504 | { |
1467 | struct perf_event *event = info; | 1505 | struct perf_event *event = info; |
1468 | struct perf_event_context *ctx = event->ctx; | 1506 | struct perf_event_context *ctx = event->ctx; |
1469 | struct perf_event *leader = event->group_leader; | ||
1470 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1507 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1471 | int err; | 1508 | struct perf_event_context *task_ctx = cpuctx->task_ctx; |
1509 | struct task_struct *task = current; | ||
1510 | |||
1511 | perf_ctx_lock(cpuctx, task_ctx); | ||
1512 | perf_pmu_disable(cpuctx->ctx.pmu); | ||
1472 | 1513 | ||
1473 | /* | 1514 | /* |
1474 | * In case we're installing a new context to an already running task, | 1515 | * If there was an active task_ctx schedule it out. |
1475 | * could also happen before perf_event_task_sched_in() on architectures | ||
1476 | * which do context switches with IRQs enabled. | ||
1477 | */ | 1516 | */ |
1478 | if (ctx->task && !cpuctx->task_ctx) | 1517 | if (task_ctx) |
1479 | perf_event_context_sched_in(ctx, ctx->task); | 1518 | task_ctx_sched_out(task_ctx); |
1519 | |||
1520 | /* | ||
1521 | * If the context we're installing events in is not the | ||
1522 | * active task_ctx, flip them. | ||
1523 | */ | ||
1524 | if (ctx->task && task_ctx != ctx) { | ||
1525 | if (task_ctx) | ||
1526 | raw_spin_unlock(&task_ctx->lock); | ||
1527 | raw_spin_lock(&ctx->lock); | ||
1528 | task_ctx = ctx; | ||
1529 | } | ||
1530 | |||
1531 | if (task_ctx) { | ||
1532 | cpuctx->task_ctx = task_ctx; | ||
1533 | task = task_ctx->task; | ||
1534 | } | ||
1535 | |||
1536 | cpu_ctx_sched_out(cpuctx, EVENT_ALL); | ||
1480 | 1537 | ||
1481 | raw_spin_lock(&ctx->lock); | ||
1482 | ctx->is_active = 1; | ||
1483 | update_context_time(ctx); | 1538 | update_context_time(ctx); |
1484 | /* | 1539 | /* |
1485 | * update cgrp time only if current cgrp | 1540 | * update cgrp time only if current cgrp |
@@ -1490,43 +1545,13 @@ static int __perf_install_in_context(void *info) | |||
1490 | 1545 | ||
1491 | add_event_to_ctx(event, ctx); | 1546 | add_event_to_ctx(event, ctx); |
1492 | 1547 | ||
1493 | if (!event_filter_match(event)) | ||
1494 | goto unlock; | ||
1495 | |||
1496 | /* | ||
1497 | * Don't put the event on if it is disabled or if | ||
1498 | * it is in a group and the group isn't on. | ||
1499 | */ | ||
1500 | if (event->state != PERF_EVENT_STATE_INACTIVE || | ||
1501 | (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) | ||
1502 | goto unlock; | ||
1503 | |||
1504 | /* | 1548 | /* |
1505 | * An exclusive event can't go on if there are already active | 1549 | * Schedule everything back in |
1506 | * hardware events, and no hardware event can go on if there | ||
1507 | * is already an exclusive event on. | ||
1508 | */ | 1550 | */ |
1509 | if (!group_can_go_on(event, cpuctx, 1)) | 1551 | perf_event_sched_in(cpuctx, task_ctx, task); |
1510 | err = -EEXIST; | ||
1511 | else | ||
1512 | err = event_sched_in(event, cpuctx, ctx); | ||
1513 | |||
1514 | if (err) { | ||
1515 | /* | ||
1516 | * This event couldn't go on. If it is in a group | ||
1517 | * then we have to pull the whole group off. | ||
1518 | * If the event group is pinned then put it in error state. | ||
1519 | */ | ||
1520 | if (leader != event) | ||
1521 | group_sched_out(leader, cpuctx, ctx); | ||
1522 | if (leader->attr.pinned) { | ||
1523 | update_group_times(leader); | ||
1524 | leader->state = PERF_EVENT_STATE_ERROR; | ||
1525 | } | ||
1526 | } | ||
1527 | 1552 | ||
1528 | unlock: | 1553 | perf_pmu_enable(cpuctx->ctx.pmu); |
1529 | raw_spin_unlock(&ctx->lock); | 1554 | perf_ctx_unlock(cpuctx, task_ctx); |
1530 | 1555 | ||
1531 | return 0; | 1556 | return 0; |
1532 | } | 1557 | } |
@@ -1739,7 +1764,7 @@ out: | |||
1739 | raw_spin_unlock_irq(&ctx->lock); | 1764 | raw_spin_unlock_irq(&ctx->lock); |
1740 | } | 1765 | } |
1741 | 1766 | ||
1742 | static int perf_event_refresh(struct perf_event *event, int refresh) | 1767 | int perf_event_refresh(struct perf_event *event, int refresh) |
1743 | { | 1768 | { |
1744 | /* | 1769 | /* |
1745 | * not supported on inherited events | 1770 | * not supported on inherited events |
@@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1752 | 1777 | ||
1753 | return 0; | 1778 | return 0; |
1754 | } | 1779 | } |
1780 | EXPORT_SYMBOL_GPL(perf_event_refresh); | ||
1755 | 1781 | ||
1756 | static void ctx_sched_out(struct perf_event_context *ctx, | 1782 | static void ctx_sched_out(struct perf_event_context *ctx, |
1757 | struct perf_cpu_context *cpuctx, | 1783 | struct perf_cpu_context *cpuctx, |
1758 | enum event_type_t event_type) | 1784 | enum event_type_t event_type) |
1759 | { | 1785 | { |
1760 | struct perf_event *event; | 1786 | struct perf_event *event; |
1787 | int is_active = ctx->is_active; | ||
1761 | 1788 | ||
1762 | raw_spin_lock(&ctx->lock); | 1789 | ctx->is_active &= ~event_type; |
1763 | perf_pmu_disable(ctx->pmu); | ||
1764 | ctx->is_active = 0; | ||
1765 | if (likely(!ctx->nr_events)) | 1790 | if (likely(!ctx->nr_events)) |
1766 | goto out; | 1791 | return; |
1792 | |||
1767 | update_context_time(ctx); | 1793 | update_context_time(ctx); |
1768 | update_cgrp_time_from_cpuctx(cpuctx); | 1794 | update_cgrp_time_from_cpuctx(cpuctx); |
1769 | |||
1770 | if (!ctx->nr_active) | 1795 | if (!ctx->nr_active) |
1771 | goto out; | 1796 | return; |
1772 | 1797 | ||
1773 | if (event_type & EVENT_PINNED) { | 1798 | perf_pmu_disable(ctx->pmu); |
1799 | if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) { | ||
1774 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | 1800 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) |
1775 | group_sched_out(event, cpuctx, ctx); | 1801 | group_sched_out(event, cpuctx, ctx); |
1776 | } | 1802 | } |
1777 | 1803 | ||
1778 | if (event_type & EVENT_FLEXIBLE) { | 1804 | if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) { |
1779 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | 1805 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) |
1780 | group_sched_out(event, cpuctx, ctx); | 1806 | group_sched_out(event, cpuctx, ctx); |
1781 | } | 1807 | } |
1782 | out: | ||
1783 | perf_pmu_enable(ctx->pmu); | 1808 | perf_pmu_enable(ctx->pmu); |
1784 | raw_spin_unlock(&ctx->lock); | ||
1785 | } | 1809 | } |
1786 | 1810 | ||
1787 | /* | 1811 | /* |
@@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
1929 | rcu_read_unlock(); | 1953 | rcu_read_unlock(); |
1930 | 1954 | ||
1931 | if (do_switch) { | 1955 | if (do_switch) { |
1956 | raw_spin_lock(&ctx->lock); | ||
1932 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); | 1957 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
1933 | cpuctx->task_ctx = NULL; | 1958 | cpuctx->task_ctx = NULL; |
1959 | raw_spin_unlock(&ctx->lock); | ||
1934 | } | 1960 | } |
1935 | } | 1961 | } |
1936 | 1962 | ||
@@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
1965 | perf_cgroup_sched_out(task); | 1991 | perf_cgroup_sched_out(task); |
1966 | } | 1992 | } |
1967 | 1993 | ||
1968 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 1994 | static void task_ctx_sched_out(struct perf_event_context *ctx) |
1969 | enum event_type_t event_type) | ||
1970 | { | 1995 | { |
1971 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1996 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1972 | 1997 | ||
@@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx, | |||
1976 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | 2001 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) |
1977 | return; | 2002 | return; |
1978 | 2003 | ||
1979 | ctx_sched_out(ctx, cpuctx, event_type); | 2004 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
1980 | cpuctx->task_ctx = NULL; | 2005 | cpuctx->task_ctx = NULL; |
1981 | } | 2006 | } |
1982 | 2007 | ||
@@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
2055 | struct task_struct *task) | 2080 | struct task_struct *task) |
2056 | { | 2081 | { |
2057 | u64 now; | 2082 | u64 now; |
2083 | int is_active = ctx->is_active; | ||
2058 | 2084 | ||
2059 | raw_spin_lock(&ctx->lock); | 2085 | ctx->is_active |= event_type; |
2060 | ctx->is_active = 1; | ||
2061 | if (likely(!ctx->nr_events)) | 2086 | if (likely(!ctx->nr_events)) |
2062 | goto out; | 2087 | return; |
2063 | 2088 | ||
2064 | now = perf_clock(); | 2089 | now = perf_clock(); |
2065 | ctx->timestamp = now; | 2090 | ctx->timestamp = now; |
@@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
2068 | * First go through the list and put on any pinned groups | 2093 | * First go through the list and put on any pinned groups |
2069 | * in order to give them the best chance of going on. | 2094 | * in order to give them the best chance of going on. |
2070 | */ | 2095 | */ |
2071 | if (event_type & EVENT_PINNED) | 2096 | if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) |
2072 | ctx_pinned_sched_in(ctx, cpuctx); | 2097 | ctx_pinned_sched_in(ctx, cpuctx); |
2073 | 2098 | ||
2074 | /* Then walk through the lower prio flexible groups */ | 2099 | /* Then walk through the lower prio flexible groups */ |
2075 | if (event_type & EVENT_FLEXIBLE) | 2100 | if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) |
2076 | ctx_flexible_sched_in(ctx, cpuctx); | 2101 | ctx_flexible_sched_in(ctx, cpuctx); |
2077 | |||
2078 | out: | ||
2079 | raw_spin_unlock(&ctx->lock); | ||
2080 | } | 2102 | } |
2081 | 2103 | ||
2082 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | 2104 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, |
@@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
2088 | ctx_sched_in(ctx, cpuctx, event_type, task); | 2110 | ctx_sched_in(ctx, cpuctx, event_type, task); |
2089 | } | 2111 | } |
2090 | 2112 | ||
2091 | static void task_ctx_sched_in(struct perf_event_context *ctx, | ||
2092 | enum event_type_t event_type) | ||
2093 | { | ||
2094 | struct perf_cpu_context *cpuctx; | ||
2095 | |||
2096 | cpuctx = __get_cpu_context(ctx); | ||
2097 | if (cpuctx->task_ctx == ctx) | ||
2098 | return; | ||
2099 | |||
2100 | ctx_sched_in(ctx, cpuctx, event_type, NULL); | ||
2101 | cpuctx->task_ctx = ctx; | ||
2102 | } | ||
2103 | |||
2104 | static void perf_event_context_sched_in(struct perf_event_context *ctx, | 2113 | static void perf_event_context_sched_in(struct perf_event_context *ctx, |
2105 | struct task_struct *task) | 2114 | struct task_struct *task) |
2106 | { | 2115 | { |
@@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2110 | if (cpuctx->task_ctx == ctx) | 2119 | if (cpuctx->task_ctx == ctx) |
2111 | return; | 2120 | return; |
2112 | 2121 | ||
2122 | perf_ctx_lock(cpuctx, ctx); | ||
2113 | perf_pmu_disable(ctx->pmu); | 2123 | perf_pmu_disable(ctx->pmu); |
2114 | /* | 2124 | /* |
2115 | * We want to keep the following priority order: | 2125 | * We want to keep the following priority order: |
@@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2118 | */ | 2128 | */ |
2119 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 2129 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
2120 | 2130 | ||
2121 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); | 2131 | perf_event_sched_in(cpuctx, ctx, task); |
2122 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); | ||
2123 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); | ||
2124 | 2132 | ||
2125 | cpuctx->task_ctx = ctx; | 2133 | cpuctx->task_ctx = ctx; |
2126 | 2134 | ||
2135 | perf_pmu_enable(ctx->pmu); | ||
2136 | perf_ctx_unlock(cpuctx, ctx); | ||
2137 | |||
2127 | /* | 2138 | /* |
2128 | * Since these rotations are per-cpu, we need to ensure the | 2139 | * Since these rotations are per-cpu, we need to ensure the |
2129 | * cpu-context we got scheduled on is actually rotating. | 2140 | * cpu-context we got scheduled on is actually rotating. |
2130 | */ | 2141 | */ |
2131 | perf_pmu_rotate_start(ctx->pmu); | 2142 | perf_pmu_rotate_start(ctx->pmu); |
2132 | perf_pmu_enable(ctx->pmu); | ||
2133 | } | 2143 | } |
2134 | 2144 | ||
2135 | /* | 2145 | /* |
@@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
2269 | u64 interrupts, now; | 2279 | u64 interrupts, now; |
2270 | s64 delta; | 2280 | s64 delta; |
2271 | 2281 | ||
2272 | raw_spin_lock(&ctx->lock); | ||
2273 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 2282 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
2274 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 2283 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
2275 | continue; | 2284 | continue; |
@@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
2301 | if (delta > 0) | 2310 | if (delta > 0) |
2302 | perf_adjust_period(event, period, delta); | 2311 | perf_adjust_period(event, period, delta); |
2303 | } | 2312 | } |
2304 | raw_spin_unlock(&ctx->lock); | ||
2305 | } | 2313 | } |
2306 | 2314 | ||
2307 | /* | 2315 | /* |
@@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
2309 | */ | 2317 | */ |
2310 | static void rotate_ctx(struct perf_event_context *ctx) | 2318 | static void rotate_ctx(struct perf_event_context *ctx) |
2311 | { | 2319 | { |
2312 | raw_spin_lock(&ctx->lock); | ||
2313 | |||
2314 | /* | 2320 | /* |
2315 | * Rotate the first entry last of non-pinned groups. Rotation might be | 2321 | * Rotate the first entry last of non-pinned groups. Rotation might be |
2316 | * disabled by the inheritance code. | 2322 | * disabled by the inheritance code. |
2317 | */ | 2323 | */ |
2318 | if (!ctx->rotate_disable) | 2324 | if (!ctx->rotate_disable) |
2319 | list_rotate_left(&ctx->flexible_groups); | 2325 | list_rotate_left(&ctx->flexible_groups); |
2320 | |||
2321 | raw_spin_unlock(&ctx->lock); | ||
2322 | } | 2326 | } |
2323 | 2327 | ||
2324 | /* | 2328 | /* |
@@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
2345 | rotate = 1; | 2349 | rotate = 1; |
2346 | } | 2350 | } |
2347 | 2351 | ||
2352 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2348 | perf_pmu_disable(cpuctx->ctx.pmu); | 2353 | perf_pmu_disable(cpuctx->ctx.pmu); |
2349 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); | 2354 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); |
2350 | if (ctx) | 2355 | if (ctx) |
@@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
2355 | 2360 | ||
2356 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 2361 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
2357 | if (ctx) | 2362 | if (ctx) |
2358 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); | 2363 | ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); |
2359 | 2364 | ||
2360 | rotate_ctx(&cpuctx->ctx); | 2365 | rotate_ctx(&cpuctx->ctx); |
2361 | if (ctx) | 2366 | if (ctx) |
2362 | rotate_ctx(ctx); | 2367 | rotate_ctx(ctx); |
2363 | 2368 | ||
2364 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); | 2369 | perf_event_sched_in(cpuctx, ctx, current); |
2365 | if (ctx) | ||
2366 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); | ||
2367 | 2370 | ||
2368 | done: | 2371 | done: |
2369 | if (remove) | 2372 | if (remove) |
2370 | list_del_init(&cpuctx->rotation_list); | 2373 | list_del_init(&cpuctx->rotation_list); |
2371 | 2374 | ||
2372 | perf_pmu_enable(cpuctx->ctx.pmu); | 2375 | perf_pmu_enable(cpuctx->ctx.pmu); |
2376 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2373 | } | 2377 | } |
2374 | 2378 | ||
2375 | void perf_event_task_tick(void) | 2379 | void perf_event_task_tick(void) |
@@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) | |||
2424 | * in. | 2428 | * in. |
2425 | */ | 2429 | */ |
2426 | perf_cgroup_sched_out(current); | 2430 | perf_cgroup_sched_out(current); |
2427 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
2428 | 2431 | ||
2429 | raw_spin_lock(&ctx->lock); | 2432 | raw_spin_lock(&ctx->lock); |
2433 | task_ctx_sched_out(ctx); | ||
2430 | 2434 | ||
2431 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 2435 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
2432 | ret = event_enable_on_exec(event, ctx); | 2436 | ret = event_enable_on_exec(event, ctx); |
@@ -2835,16 +2839,12 @@ retry: | |||
2835 | unclone_ctx(ctx); | 2839 | unclone_ctx(ctx); |
2836 | ++ctx->pin_count; | 2840 | ++ctx->pin_count; |
2837 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2841 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
2838 | } | 2842 | } else { |
2839 | |||
2840 | if (!ctx) { | ||
2841 | ctx = alloc_perf_context(pmu, task); | 2843 | ctx = alloc_perf_context(pmu, task); |
2842 | err = -ENOMEM; | 2844 | err = -ENOMEM; |
2843 | if (!ctx) | 2845 | if (!ctx) |
2844 | goto errout; | 2846 | goto errout; |
2845 | 2847 | ||
2846 | get_ctx(ctx); | ||
2847 | |||
2848 | err = 0; | 2848 | err = 0; |
2849 | mutex_lock(&task->perf_event_mutex); | 2849 | mutex_lock(&task->perf_event_mutex); |
2850 | /* | 2850 | /* |
@@ -2856,14 +2856,14 @@ retry: | |||
2856 | else if (task->perf_event_ctxp[ctxn]) | 2856 | else if (task->perf_event_ctxp[ctxn]) |
2857 | err = -EAGAIN; | 2857 | err = -EAGAIN; |
2858 | else { | 2858 | else { |
2859 | get_ctx(ctx); | ||
2859 | ++ctx->pin_count; | 2860 | ++ctx->pin_count; |
2860 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | 2861 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); |
2861 | } | 2862 | } |
2862 | mutex_unlock(&task->perf_event_mutex); | 2863 | mutex_unlock(&task->perf_event_mutex); |
2863 | 2864 | ||
2864 | if (unlikely(err)) { | 2865 | if (unlikely(err)) { |
2865 | put_task_struct(task); | 2866 | put_ctx(ctx); |
2866 | kfree(ctx); | ||
2867 | 2867 | ||
2868 | if (err == -EAGAIN) | 2868 | if (err == -EAGAIN) |
2869 | goto retry; | 2869 | goto retry; |
@@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
2890 | kfree(event); | 2890 | kfree(event); |
2891 | } | 2891 | } |
2892 | 2892 | ||
2893 | static void perf_buffer_put(struct perf_buffer *buffer); | 2893 | static void ring_buffer_put(struct ring_buffer *rb); |
2894 | 2894 | ||
2895 | static void free_event(struct perf_event *event) | 2895 | static void free_event(struct perf_event *event) |
2896 | { | 2896 | { |
@@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event) | |||
2913 | } | 2913 | } |
2914 | } | 2914 | } |
2915 | 2915 | ||
2916 | if (event->buffer) { | 2916 | if (event->rb) { |
2917 | perf_buffer_put(event->buffer); | 2917 | ring_buffer_put(event->rb); |
2918 | event->buffer = NULL; | 2918 | event->rb = NULL; |
2919 | } | 2919 | } |
2920 | 2920 | ||
2921 | if (is_cgroup_event(event)) | 2921 | if (is_cgroup_event(event)) |
@@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2934 | { | 2934 | { |
2935 | struct perf_event_context *ctx = event->ctx; | 2935 | struct perf_event_context *ctx = event->ctx; |
2936 | 2936 | ||
2937 | /* | ||
2938 | * Remove from the PMU, can't get re-enabled since we got | ||
2939 | * here because the last ref went. | ||
2940 | */ | ||
2941 | perf_event_disable(event); | ||
2942 | |||
2943 | WARN_ON_ONCE(ctx->parent_ctx); | 2937 | WARN_ON_ONCE(ctx->parent_ctx); |
2944 | /* | 2938 | /* |
2945 | * There are two ways this annotation is useful: | 2939 | * There are two ways this annotation is useful: |
@@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2956 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 2950 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); |
2957 | raw_spin_lock_irq(&ctx->lock); | 2951 | raw_spin_lock_irq(&ctx->lock); |
2958 | perf_group_detach(event); | 2952 | perf_group_detach(event); |
2959 | list_del_event(event, ctx); | ||
2960 | raw_spin_unlock_irq(&ctx->lock); | 2953 | raw_spin_unlock_irq(&ctx->lock); |
2954 | perf_remove_from_context(event); | ||
2961 | mutex_unlock(&ctx->mutex); | 2955 | mutex_unlock(&ctx->mutex); |
2962 | 2956 | ||
2963 | free_event(event); | 2957 | free_event(event); |
@@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
3149 | static unsigned int perf_poll(struct file *file, poll_table *wait) | 3143 | static unsigned int perf_poll(struct file *file, poll_table *wait) |
3150 | { | 3144 | { |
3151 | struct perf_event *event = file->private_data; | 3145 | struct perf_event *event = file->private_data; |
3152 | struct perf_buffer *buffer; | 3146 | struct ring_buffer *rb; |
3153 | unsigned int events = POLL_HUP; | 3147 | unsigned int events = POLL_HUP; |
3154 | 3148 | ||
3155 | rcu_read_lock(); | 3149 | rcu_read_lock(); |
3156 | buffer = rcu_dereference(event->buffer); | 3150 | rb = rcu_dereference(event->rb); |
3157 | if (buffer) | 3151 | if (rb) |
3158 | events = atomic_xchg(&buffer->poll, 0); | 3152 | events = atomic_xchg(&rb->poll, 0); |
3159 | rcu_read_unlock(); | 3153 | rcu_read_unlock(); |
3160 | 3154 | ||
3161 | poll_wait(file, &event->waitq, wait); | 3155 | poll_wait(file, &event->waitq, wait); |
@@ -3358,6 +3352,18 @@ static int perf_event_index(struct perf_event *event) | |||
3358 | return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; | 3352 | return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; |
3359 | } | 3353 | } |
3360 | 3354 | ||
3355 | static void calc_timer_values(struct perf_event *event, | ||
3356 | u64 *running, | ||
3357 | u64 *enabled) | ||
3358 | { | ||
3359 | u64 now, ctx_time; | ||
3360 | |||
3361 | now = perf_clock(); | ||
3362 | ctx_time = event->shadow_ctx_time + now; | ||
3363 | *enabled = ctx_time - event->tstamp_enabled; | ||
3364 | *running = ctx_time - event->tstamp_running; | ||
3365 | } | ||
3366 | |||
3361 | /* | 3367 | /* |
3362 | * Callers need to ensure there can be no nesting of this function, otherwise | 3368 | * Callers need to ensure there can be no nesting of this function, otherwise |
3363 | * the seqlock logic goes bad. We can not serialize this because the arch | 3369 | * the seqlock logic goes bad. We can not serialize this because the arch |
@@ -3366,14 +3372,25 @@ static int perf_event_index(struct perf_event *event) | |||
3366 | void perf_event_update_userpage(struct perf_event *event) | 3372 | void perf_event_update_userpage(struct perf_event *event) |
3367 | { | 3373 | { |
3368 | struct perf_event_mmap_page *userpg; | 3374 | struct perf_event_mmap_page *userpg; |
3369 | struct perf_buffer *buffer; | 3375 | struct ring_buffer *rb; |
3376 | u64 enabled, running; | ||
3370 | 3377 | ||
3371 | rcu_read_lock(); | 3378 | rcu_read_lock(); |
3372 | buffer = rcu_dereference(event->buffer); | 3379 | /* |
3373 | if (!buffer) | 3380 | * compute total_time_enabled, total_time_running |
3381 | * based on snapshot values taken when the event | ||
3382 | * was last scheduled in. | ||
3383 | * | ||
3384 | * we cannot simply called update_context_time() | ||
3385 | * because of locking issue as we can be called in | ||
3386 | * NMI context | ||
3387 | */ | ||
3388 | calc_timer_values(event, &enabled, &running); | ||
3389 | rb = rcu_dereference(event->rb); | ||
3390 | if (!rb) | ||
3374 | goto unlock; | 3391 | goto unlock; |
3375 | 3392 | ||
3376 | userpg = buffer->user_page; | 3393 | userpg = rb->user_page; |
3377 | 3394 | ||
3378 | /* | 3395 | /* |
3379 | * Disable preemption so as to not let the corresponding user-space | 3396 | * Disable preemption so as to not let the corresponding user-space |
@@ -3387,10 +3404,10 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3387 | if (event->state == PERF_EVENT_STATE_ACTIVE) | 3404 | if (event->state == PERF_EVENT_STATE_ACTIVE) |
3388 | userpg->offset -= local64_read(&event->hw.prev_count); | 3405 | userpg->offset -= local64_read(&event->hw.prev_count); |
3389 | 3406 | ||
3390 | userpg->time_enabled = event->total_time_enabled + | 3407 | userpg->time_enabled = enabled + |
3391 | atomic64_read(&event->child_total_time_enabled); | 3408 | atomic64_read(&event->child_total_time_enabled); |
3392 | 3409 | ||
3393 | userpg->time_running = event->total_time_running + | 3410 | userpg->time_running = running + |
3394 | atomic64_read(&event->child_total_time_running); | 3411 | atomic64_read(&event->child_total_time_running); |
3395 | 3412 | ||
3396 | barrier(); | 3413 | barrier(); |
@@ -3400,220 +3417,10 @@ unlock: | |||
3400 | rcu_read_unlock(); | 3417 | rcu_read_unlock(); |
3401 | } | 3418 | } |
3402 | 3419 | ||
3403 | static unsigned long perf_data_size(struct perf_buffer *buffer); | ||
3404 | |||
3405 | static void | ||
3406 | perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags) | ||
3407 | { | ||
3408 | long max_size = perf_data_size(buffer); | ||
3409 | |||
3410 | if (watermark) | ||
3411 | buffer->watermark = min(max_size, watermark); | ||
3412 | |||
3413 | if (!buffer->watermark) | ||
3414 | buffer->watermark = max_size / 2; | ||
3415 | |||
3416 | if (flags & PERF_BUFFER_WRITABLE) | ||
3417 | buffer->writable = 1; | ||
3418 | |||
3419 | atomic_set(&buffer->refcount, 1); | ||
3420 | } | ||
3421 | |||
3422 | #ifndef CONFIG_PERF_USE_VMALLOC | ||
3423 | |||
3424 | /* | ||
3425 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. | ||
3426 | */ | ||
3427 | |||
3428 | static struct page * | ||
3429 | perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) | ||
3430 | { | ||
3431 | if (pgoff > buffer->nr_pages) | ||
3432 | return NULL; | ||
3433 | |||
3434 | if (pgoff == 0) | ||
3435 | return virt_to_page(buffer->user_page); | ||
3436 | |||
3437 | return virt_to_page(buffer->data_pages[pgoff - 1]); | ||
3438 | } | ||
3439 | |||
3440 | static void *perf_mmap_alloc_page(int cpu) | ||
3441 | { | ||
3442 | struct page *page; | ||
3443 | int node; | ||
3444 | |||
3445 | node = (cpu == -1) ? cpu : cpu_to_node(cpu); | ||
3446 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | ||
3447 | if (!page) | ||
3448 | return NULL; | ||
3449 | |||
3450 | return page_address(page); | ||
3451 | } | ||
3452 | |||
3453 | static struct perf_buffer * | ||
3454 | perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
3455 | { | ||
3456 | struct perf_buffer *buffer; | ||
3457 | unsigned long size; | ||
3458 | int i; | ||
3459 | |||
3460 | size = sizeof(struct perf_buffer); | ||
3461 | size += nr_pages * sizeof(void *); | ||
3462 | |||
3463 | buffer = kzalloc(size, GFP_KERNEL); | ||
3464 | if (!buffer) | ||
3465 | goto fail; | ||
3466 | |||
3467 | buffer->user_page = perf_mmap_alloc_page(cpu); | ||
3468 | if (!buffer->user_page) | ||
3469 | goto fail_user_page; | ||
3470 | |||
3471 | for (i = 0; i < nr_pages; i++) { | ||
3472 | buffer->data_pages[i] = perf_mmap_alloc_page(cpu); | ||
3473 | if (!buffer->data_pages[i]) | ||
3474 | goto fail_data_pages; | ||
3475 | } | ||
3476 | |||
3477 | buffer->nr_pages = nr_pages; | ||
3478 | |||
3479 | perf_buffer_init(buffer, watermark, flags); | ||
3480 | |||
3481 | return buffer; | ||
3482 | |||
3483 | fail_data_pages: | ||
3484 | for (i--; i >= 0; i--) | ||
3485 | free_page((unsigned long)buffer->data_pages[i]); | ||
3486 | |||
3487 | free_page((unsigned long)buffer->user_page); | ||
3488 | |||
3489 | fail_user_page: | ||
3490 | kfree(buffer); | ||
3491 | |||
3492 | fail: | ||
3493 | return NULL; | ||
3494 | } | ||
3495 | |||
3496 | static void perf_mmap_free_page(unsigned long addr) | ||
3497 | { | ||
3498 | struct page *page = virt_to_page((void *)addr); | ||
3499 | |||
3500 | page->mapping = NULL; | ||
3501 | __free_page(page); | ||
3502 | } | ||
3503 | |||
3504 | static void perf_buffer_free(struct perf_buffer *buffer) | ||
3505 | { | ||
3506 | int i; | ||
3507 | |||
3508 | perf_mmap_free_page((unsigned long)buffer->user_page); | ||
3509 | for (i = 0; i < buffer->nr_pages; i++) | ||
3510 | perf_mmap_free_page((unsigned long)buffer->data_pages[i]); | ||
3511 | kfree(buffer); | ||
3512 | } | ||
3513 | |||
3514 | static inline int page_order(struct perf_buffer *buffer) | ||
3515 | { | ||
3516 | return 0; | ||
3517 | } | ||
3518 | |||
3519 | #else | ||
3520 | |||
3521 | /* | ||
3522 | * Back perf_mmap() with vmalloc memory. | ||
3523 | * | ||
3524 | * Required for architectures that have d-cache aliasing issues. | ||
3525 | */ | ||
3526 | |||
3527 | static inline int page_order(struct perf_buffer *buffer) | ||
3528 | { | ||
3529 | return buffer->page_order; | ||
3530 | } | ||
3531 | |||
3532 | static struct page * | ||
3533 | perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) | ||
3534 | { | ||
3535 | if (pgoff > (1UL << page_order(buffer))) | ||
3536 | return NULL; | ||
3537 | |||
3538 | return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE); | ||
3539 | } | ||
3540 | |||
3541 | static void perf_mmap_unmark_page(void *addr) | ||
3542 | { | ||
3543 | struct page *page = vmalloc_to_page(addr); | ||
3544 | |||
3545 | page->mapping = NULL; | ||
3546 | } | ||
3547 | |||
3548 | static void perf_buffer_free_work(struct work_struct *work) | ||
3549 | { | ||
3550 | struct perf_buffer *buffer; | ||
3551 | void *base; | ||
3552 | int i, nr; | ||
3553 | |||
3554 | buffer = container_of(work, struct perf_buffer, work); | ||
3555 | nr = 1 << page_order(buffer); | ||
3556 | |||
3557 | base = buffer->user_page; | ||
3558 | for (i = 0; i < nr + 1; i++) | ||
3559 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
3560 | |||
3561 | vfree(base); | ||
3562 | kfree(buffer); | ||
3563 | } | ||
3564 | |||
3565 | static void perf_buffer_free(struct perf_buffer *buffer) | ||
3566 | { | ||
3567 | schedule_work(&buffer->work); | ||
3568 | } | ||
3569 | |||
3570 | static struct perf_buffer * | ||
3571 | perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
3572 | { | ||
3573 | struct perf_buffer *buffer; | ||
3574 | unsigned long size; | ||
3575 | void *all_buf; | ||
3576 | |||
3577 | size = sizeof(struct perf_buffer); | ||
3578 | size += sizeof(void *); | ||
3579 | |||
3580 | buffer = kzalloc(size, GFP_KERNEL); | ||
3581 | if (!buffer) | ||
3582 | goto fail; | ||
3583 | |||
3584 | INIT_WORK(&buffer->work, perf_buffer_free_work); | ||
3585 | |||
3586 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
3587 | if (!all_buf) | ||
3588 | goto fail_all_buf; | ||
3589 | |||
3590 | buffer->user_page = all_buf; | ||
3591 | buffer->data_pages[0] = all_buf + PAGE_SIZE; | ||
3592 | buffer->page_order = ilog2(nr_pages); | ||
3593 | buffer->nr_pages = 1; | ||
3594 | |||
3595 | perf_buffer_init(buffer, watermark, flags); | ||
3596 | |||
3597 | return buffer; | ||
3598 | |||
3599 | fail_all_buf: | ||
3600 | kfree(buffer); | ||
3601 | |||
3602 | fail: | ||
3603 | return NULL; | ||
3604 | } | ||
3605 | |||
3606 | #endif | ||
3607 | |||
3608 | static unsigned long perf_data_size(struct perf_buffer *buffer) | ||
3609 | { | ||
3610 | return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer)); | ||
3611 | } | ||
3612 | |||
3613 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 3420 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
3614 | { | 3421 | { |
3615 | struct perf_event *event = vma->vm_file->private_data; | 3422 | struct perf_event *event = vma->vm_file->private_data; |
3616 | struct perf_buffer *buffer; | 3423 | struct ring_buffer *rb; |
3617 | int ret = VM_FAULT_SIGBUS; | 3424 | int ret = VM_FAULT_SIGBUS; |
3618 | 3425 | ||
3619 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | 3426 | if (vmf->flags & FAULT_FLAG_MKWRITE) { |
@@ -3623,14 +3430,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
3623 | } | 3430 | } |
3624 | 3431 | ||
3625 | rcu_read_lock(); | 3432 | rcu_read_lock(); |
3626 | buffer = rcu_dereference(event->buffer); | 3433 | rb = rcu_dereference(event->rb); |
3627 | if (!buffer) | 3434 | if (!rb) |
3628 | goto unlock; | 3435 | goto unlock; |
3629 | 3436 | ||
3630 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) | 3437 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) |
3631 | goto unlock; | 3438 | goto unlock; |
3632 | 3439 | ||
3633 | vmf->page = perf_mmap_to_page(buffer, vmf->pgoff); | 3440 | vmf->page = perf_mmap_to_page(rb, vmf->pgoff); |
3634 | if (!vmf->page) | 3441 | if (!vmf->page) |
3635 | goto unlock; | 3442 | goto unlock; |
3636 | 3443 | ||
@@ -3645,35 +3452,35 @@ unlock: | |||
3645 | return ret; | 3452 | return ret; |
3646 | } | 3453 | } |
3647 | 3454 | ||
3648 | static void perf_buffer_free_rcu(struct rcu_head *rcu_head) | 3455 | static void rb_free_rcu(struct rcu_head *rcu_head) |
3649 | { | 3456 | { |
3650 | struct perf_buffer *buffer; | 3457 | struct ring_buffer *rb; |
3651 | 3458 | ||
3652 | buffer = container_of(rcu_head, struct perf_buffer, rcu_head); | 3459 | rb = container_of(rcu_head, struct ring_buffer, rcu_head); |
3653 | perf_buffer_free(buffer); | 3460 | rb_free(rb); |
3654 | } | 3461 | } |
3655 | 3462 | ||
3656 | static struct perf_buffer *perf_buffer_get(struct perf_event *event) | 3463 | static struct ring_buffer *ring_buffer_get(struct perf_event *event) |
3657 | { | 3464 | { |
3658 | struct perf_buffer *buffer; | 3465 | struct ring_buffer *rb; |
3659 | 3466 | ||
3660 | rcu_read_lock(); | 3467 | rcu_read_lock(); |
3661 | buffer = rcu_dereference(event->buffer); | 3468 | rb = rcu_dereference(event->rb); |
3662 | if (buffer) { | 3469 | if (rb) { |
3663 | if (!atomic_inc_not_zero(&buffer->refcount)) | 3470 | if (!atomic_inc_not_zero(&rb->refcount)) |
3664 | buffer = NULL; | 3471 | rb = NULL; |
3665 | } | 3472 | } |
3666 | rcu_read_unlock(); | 3473 | rcu_read_unlock(); |
3667 | 3474 | ||
3668 | return buffer; | 3475 | return rb; |
3669 | } | 3476 | } |
3670 | 3477 | ||
3671 | static void perf_buffer_put(struct perf_buffer *buffer) | 3478 | static void ring_buffer_put(struct ring_buffer *rb) |
3672 | { | 3479 | { |
3673 | if (!atomic_dec_and_test(&buffer->refcount)) | 3480 | if (!atomic_dec_and_test(&rb->refcount)) |
3674 | return; | 3481 | return; |
3675 | 3482 | ||
3676 | call_rcu(&buffer->rcu_head, perf_buffer_free_rcu); | 3483 | call_rcu(&rb->rcu_head, rb_free_rcu); |
3677 | } | 3484 | } |
3678 | 3485 | ||
3679 | static void perf_mmap_open(struct vm_area_struct *vma) | 3486 | static void perf_mmap_open(struct vm_area_struct *vma) |
@@ -3688,16 +3495,16 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
3688 | struct perf_event *event = vma->vm_file->private_data; | 3495 | struct perf_event *event = vma->vm_file->private_data; |
3689 | 3496 | ||
3690 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 3497 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
3691 | unsigned long size = perf_data_size(event->buffer); | 3498 | unsigned long size = perf_data_size(event->rb); |
3692 | struct user_struct *user = event->mmap_user; | 3499 | struct user_struct *user = event->mmap_user; |
3693 | struct perf_buffer *buffer = event->buffer; | 3500 | struct ring_buffer *rb = event->rb; |
3694 | 3501 | ||
3695 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3502 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
3696 | vma->vm_mm->locked_vm -= event->mmap_locked; | 3503 | vma->vm_mm->locked_vm -= event->mmap_locked; |
3697 | rcu_assign_pointer(event->buffer, NULL); | 3504 | rcu_assign_pointer(event->rb, NULL); |
3698 | mutex_unlock(&event->mmap_mutex); | 3505 | mutex_unlock(&event->mmap_mutex); |
3699 | 3506 | ||
3700 | perf_buffer_put(buffer); | 3507 | ring_buffer_put(rb); |
3701 | free_uid(user); | 3508 | free_uid(user); |
3702 | } | 3509 | } |
3703 | } | 3510 | } |
@@ -3715,7 +3522,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3715 | unsigned long user_locked, user_lock_limit; | 3522 | unsigned long user_locked, user_lock_limit; |
3716 | struct user_struct *user = current_user(); | 3523 | struct user_struct *user = current_user(); |
3717 | unsigned long locked, lock_limit; | 3524 | unsigned long locked, lock_limit; |
3718 | struct perf_buffer *buffer; | 3525 | struct ring_buffer *rb; |
3719 | unsigned long vma_size; | 3526 | unsigned long vma_size; |
3720 | unsigned long nr_pages; | 3527 | unsigned long nr_pages; |
3721 | long user_extra, extra; | 3528 | long user_extra, extra; |
@@ -3724,7 +3531,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3724 | /* | 3531 | /* |
3725 | * Don't allow mmap() of inherited per-task counters. This would | 3532 | * Don't allow mmap() of inherited per-task counters. This would |
3726 | * create a performance issue due to all children writing to the | 3533 | * create a performance issue due to all children writing to the |
3727 | * same buffer. | 3534 | * same rb. |
3728 | */ | 3535 | */ |
3729 | if (event->cpu == -1 && event->attr.inherit) | 3536 | if (event->cpu == -1 && event->attr.inherit) |
3730 | return -EINVAL; | 3537 | return -EINVAL; |
@@ -3736,7 +3543,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3736 | nr_pages = (vma_size / PAGE_SIZE) - 1; | 3543 | nr_pages = (vma_size / PAGE_SIZE) - 1; |
3737 | 3544 | ||
3738 | /* | 3545 | /* |
3739 | * If we have buffer pages ensure they're a power-of-two number, so we | 3546 | * If we have rb pages ensure they're a power-of-two number, so we |
3740 | * can do bitmasks instead of modulo. | 3547 | * can do bitmasks instead of modulo. |
3741 | */ | 3548 | */ |
3742 | if (nr_pages != 0 && !is_power_of_2(nr_pages)) | 3549 | if (nr_pages != 0 && !is_power_of_2(nr_pages)) |
@@ -3750,9 +3557,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3750 | 3557 | ||
3751 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3558 | WARN_ON_ONCE(event->ctx->parent_ctx); |
3752 | mutex_lock(&event->mmap_mutex); | 3559 | mutex_lock(&event->mmap_mutex); |
3753 | if (event->buffer) { | 3560 | if (event->rb) { |
3754 | if (event->buffer->nr_pages == nr_pages) | 3561 | if (event->rb->nr_pages == nr_pages) |
3755 | atomic_inc(&event->buffer->refcount); | 3562 | atomic_inc(&event->rb->refcount); |
3756 | else | 3563 | else |
3757 | ret = -EINVAL; | 3564 | ret = -EINVAL; |
3758 | goto unlock; | 3565 | goto unlock; |
@@ -3782,18 +3589,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3782 | goto unlock; | 3589 | goto unlock; |
3783 | } | 3590 | } |
3784 | 3591 | ||
3785 | WARN_ON(event->buffer); | 3592 | WARN_ON(event->rb); |
3786 | 3593 | ||
3787 | if (vma->vm_flags & VM_WRITE) | 3594 | if (vma->vm_flags & VM_WRITE) |
3788 | flags |= PERF_BUFFER_WRITABLE; | 3595 | flags |= RING_BUFFER_WRITABLE; |
3789 | 3596 | ||
3790 | buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark, | 3597 | rb = rb_alloc(nr_pages, |
3791 | event->cpu, flags); | 3598 | event->attr.watermark ? event->attr.wakeup_watermark : 0, |
3792 | if (!buffer) { | 3599 | event->cpu, flags); |
3600 | |||
3601 | if (!rb) { | ||
3793 | ret = -ENOMEM; | 3602 | ret = -ENOMEM; |
3794 | goto unlock; | 3603 | goto unlock; |
3795 | } | 3604 | } |
3796 | rcu_assign_pointer(event->buffer, buffer); | 3605 | rcu_assign_pointer(event->rb, rb); |
3797 | 3606 | ||
3798 | atomic_long_add(user_extra, &user->locked_vm); | 3607 | atomic_long_add(user_extra, &user->locked_vm); |
3799 | event->mmap_locked = extra; | 3608 | event->mmap_locked = extra; |
@@ -3892,117 +3701,6 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) | |||
3892 | } | 3701 | } |
3893 | EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); | 3702 | EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); |
3894 | 3703 | ||
3895 | /* | ||
3896 | * Output | ||
3897 | */ | ||
3898 | static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail, | ||
3899 | unsigned long offset, unsigned long head) | ||
3900 | { | ||
3901 | unsigned long mask; | ||
3902 | |||
3903 | if (!buffer->writable) | ||
3904 | return true; | ||
3905 | |||
3906 | mask = perf_data_size(buffer) - 1; | ||
3907 | |||
3908 | offset = (offset - tail) & mask; | ||
3909 | head = (head - tail) & mask; | ||
3910 | |||
3911 | if ((int)(head - offset) < 0) | ||
3912 | return false; | ||
3913 | |||
3914 | return true; | ||
3915 | } | ||
3916 | |||
3917 | static void perf_output_wakeup(struct perf_output_handle *handle) | ||
3918 | { | ||
3919 | atomic_set(&handle->buffer->poll, POLL_IN); | ||
3920 | |||
3921 | if (handle->nmi) { | ||
3922 | handle->event->pending_wakeup = 1; | ||
3923 | irq_work_queue(&handle->event->pending); | ||
3924 | } else | ||
3925 | perf_event_wakeup(handle->event); | ||
3926 | } | ||
3927 | |||
3928 | /* | ||
3929 | * We need to ensure a later event_id doesn't publish a head when a former | ||
3930 | * event isn't done writing. However since we need to deal with NMIs we | ||
3931 | * cannot fully serialize things. | ||
3932 | * | ||
3933 | * We only publish the head (and generate a wakeup) when the outer-most | ||
3934 | * event completes. | ||
3935 | */ | ||
3936 | static void perf_output_get_handle(struct perf_output_handle *handle) | ||
3937 | { | ||
3938 | struct perf_buffer *buffer = handle->buffer; | ||
3939 | |||
3940 | preempt_disable(); | ||
3941 | local_inc(&buffer->nest); | ||
3942 | handle->wakeup = local_read(&buffer->wakeup); | ||
3943 | } | ||
3944 | |||
3945 | static void perf_output_put_handle(struct perf_output_handle *handle) | ||
3946 | { | ||
3947 | struct perf_buffer *buffer = handle->buffer; | ||
3948 | unsigned long head; | ||
3949 | |||
3950 | again: | ||
3951 | head = local_read(&buffer->head); | ||
3952 | |||
3953 | /* | ||
3954 | * IRQ/NMI can happen here, which means we can miss a head update. | ||
3955 | */ | ||
3956 | |||
3957 | if (!local_dec_and_test(&buffer->nest)) | ||
3958 | goto out; | ||
3959 | |||
3960 | /* | ||
3961 | * Publish the known good head. Rely on the full barrier implied | ||
3962 | * by atomic_dec_and_test() order the buffer->head read and this | ||
3963 | * write. | ||
3964 | */ | ||
3965 | buffer->user_page->data_head = head; | ||
3966 | |||
3967 | /* | ||
3968 | * Now check if we missed an update, rely on the (compiler) | ||
3969 | * barrier in atomic_dec_and_test() to re-read buffer->head. | ||
3970 | */ | ||
3971 | if (unlikely(head != local_read(&buffer->head))) { | ||
3972 | local_inc(&buffer->nest); | ||
3973 | goto again; | ||
3974 | } | ||
3975 | |||
3976 | if (handle->wakeup != local_read(&buffer->wakeup)) | ||
3977 | perf_output_wakeup(handle); | ||
3978 | |||
3979 | out: | ||
3980 | preempt_enable(); | ||
3981 | } | ||
3982 | |||
3983 | __always_inline void perf_output_copy(struct perf_output_handle *handle, | ||
3984 | const void *buf, unsigned int len) | ||
3985 | { | ||
3986 | do { | ||
3987 | unsigned long size = min_t(unsigned long, handle->size, len); | ||
3988 | |||
3989 | memcpy(handle->addr, buf, size); | ||
3990 | |||
3991 | len -= size; | ||
3992 | handle->addr += size; | ||
3993 | buf += size; | ||
3994 | handle->size -= size; | ||
3995 | if (!handle->size) { | ||
3996 | struct perf_buffer *buffer = handle->buffer; | ||
3997 | |||
3998 | handle->page++; | ||
3999 | handle->page &= buffer->nr_pages - 1; | ||
4000 | handle->addr = buffer->data_pages[handle->page]; | ||
4001 | handle->size = PAGE_SIZE << page_order(buffer); | ||
4002 | } | ||
4003 | } while (len); | ||
4004 | } | ||
4005 | |||
4006 | static void __perf_event_header__init_id(struct perf_event_header *header, | 3704 | static void __perf_event_header__init_id(struct perf_event_header *header, |
4007 | struct perf_sample_data *data, | 3705 | struct perf_sample_data *data, |
4008 | struct perf_event *event) | 3706 | struct perf_event *event) |
@@ -4033,9 +3731,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header, | |||
4033 | } | 3731 | } |
4034 | } | 3732 | } |
4035 | 3733 | ||
4036 | static void perf_event_header__init_id(struct perf_event_header *header, | 3734 | void perf_event_header__init_id(struct perf_event_header *header, |
4037 | struct perf_sample_data *data, | 3735 | struct perf_sample_data *data, |
4038 | struct perf_event *event) | 3736 | struct perf_event *event) |
4039 | { | 3737 | { |
4040 | if (event->attr.sample_id_all) | 3738 | if (event->attr.sample_id_all) |
4041 | __perf_event_header__init_id(header, data, event); | 3739 | __perf_event_header__init_id(header, data, event); |
@@ -4062,121 +3760,14 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle, | |||
4062 | perf_output_put(handle, data->cpu_entry); | 3760 | perf_output_put(handle, data->cpu_entry); |
4063 | } | 3761 | } |
4064 | 3762 | ||
4065 | static void perf_event__output_id_sample(struct perf_event *event, | 3763 | void perf_event__output_id_sample(struct perf_event *event, |
4066 | struct perf_output_handle *handle, | 3764 | struct perf_output_handle *handle, |
4067 | struct perf_sample_data *sample) | 3765 | struct perf_sample_data *sample) |
4068 | { | 3766 | { |
4069 | if (event->attr.sample_id_all) | 3767 | if (event->attr.sample_id_all) |
4070 | __perf_event__output_id_sample(handle, sample); | 3768 | __perf_event__output_id_sample(handle, sample); |
4071 | } | 3769 | } |
4072 | 3770 | ||
4073 | int perf_output_begin(struct perf_output_handle *handle, | ||
4074 | struct perf_event *event, unsigned int size, | ||
4075 | int nmi, int sample) | ||
4076 | { | ||
4077 | struct perf_buffer *buffer; | ||
4078 | unsigned long tail, offset, head; | ||
4079 | int have_lost; | ||
4080 | struct perf_sample_data sample_data; | ||
4081 | struct { | ||
4082 | struct perf_event_header header; | ||
4083 | u64 id; | ||
4084 | u64 lost; | ||
4085 | } lost_event; | ||
4086 | |||
4087 | rcu_read_lock(); | ||
4088 | /* | ||
4089 | * For inherited events we send all the output towards the parent. | ||
4090 | */ | ||
4091 | if (event->parent) | ||
4092 | event = event->parent; | ||
4093 | |||
4094 | buffer = rcu_dereference(event->buffer); | ||
4095 | if (!buffer) | ||
4096 | goto out; | ||
4097 | |||
4098 | handle->buffer = buffer; | ||
4099 | handle->event = event; | ||
4100 | handle->nmi = nmi; | ||
4101 | handle->sample = sample; | ||
4102 | |||
4103 | if (!buffer->nr_pages) | ||
4104 | goto out; | ||
4105 | |||
4106 | have_lost = local_read(&buffer->lost); | ||
4107 | if (have_lost) { | ||
4108 | lost_event.header.size = sizeof(lost_event); | ||
4109 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
4110 | event); | ||
4111 | size += lost_event.header.size; | ||
4112 | } | ||
4113 | |||
4114 | perf_output_get_handle(handle); | ||
4115 | |||
4116 | do { | ||
4117 | /* | ||
4118 | * Userspace could choose to issue a mb() before updating the | ||
4119 | * tail pointer. So that all reads will be completed before the | ||
4120 | * write is issued. | ||
4121 | */ | ||
4122 | tail = ACCESS_ONCE(buffer->user_page->data_tail); | ||
4123 | smp_rmb(); | ||
4124 | offset = head = local_read(&buffer->head); | ||
4125 | head += size; | ||
4126 | if (unlikely(!perf_output_space(buffer, tail, offset, head))) | ||
4127 | goto fail; | ||
4128 | } while (local_cmpxchg(&buffer->head, offset, head) != offset); | ||
4129 | |||
4130 | if (head - local_read(&buffer->wakeup) > buffer->watermark) | ||
4131 | local_add(buffer->watermark, &buffer->wakeup); | ||
4132 | |||
4133 | handle->page = offset >> (PAGE_SHIFT + page_order(buffer)); | ||
4134 | handle->page &= buffer->nr_pages - 1; | ||
4135 | handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1); | ||
4136 | handle->addr = buffer->data_pages[handle->page]; | ||
4137 | handle->addr += handle->size; | ||
4138 | handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size; | ||
4139 | |||
4140 | if (have_lost) { | ||
4141 | lost_event.header.type = PERF_RECORD_LOST; | ||
4142 | lost_event.header.misc = 0; | ||
4143 | lost_event.id = event->id; | ||
4144 | lost_event.lost = local_xchg(&buffer->lost, 0); | ||
4145 | |||
4146 | perf_output_put(handle, lost_event); | ||
4147 | perf_event__output_id_sample(event, handle, &sample_data); | ||
4148 | } | ||
4149 | |||
4150 | return 0; | ||
4151 | |||
4152 | fail: | ||
4153 | local_inc(&buffer->lost); | ||
4154 | perf_output_put_handle(handle); | ||
4155 | out: | ||
4156 | rcu_read_unlock(); | ||
4157 | |||
4158 | return -ENOSPC; | ||
4159 | } | ||
4160 | |||
4161 | void perf_output_end(struct perf_output_handle *handle) | ||
4162 | { | ||
4163 | struct perf_event *event = handle->event; | ||
4164 | struct perf_buffer *buffer = handle->buffer; | ||
4165 | |||
4166 | int wakeup_events = event->attr.wakeup_events; | ||
4167 | |||
4168 | if (handle->sample && wakeup_events) { | ||
4169 | int events = local_inc_return(&buffer->events); | ||
4170 | if (events >= wakeup_events) { | ||
4171 | local_sub(wakeup_events, &buffer->events); | ||
4172 | local_inc(&buffer->wakeup); | ||
4173 | } | ||
4174 | } | ||
4175 | |||
4176 | perf_output_put_handle(handle); | ||
4177 | rcu_read_unlock(); | ||
4178 | } | ||
4179 | |||
4180 | static void perf_output_read_one(struct perf_output_handle *handle, | 3771 | static void perf_output_read_one(struct perf_output_handle *handle, |
4181 | struct perf_event *event, | 3772 | struct perf_event *event, |
4182 | u64 enabled, u64 running) | 3773 | u64 enabled, u64 running) |
@@ -4197,7 +3788,7 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
4197 | if (read_format & PERF_FORMAT_ID) | 3788 | if (read_format & PERF_FORMAT_ID) |
4198 | values[n++] = primary_event_id(event); | 3789 | values[n++] = primary_event_id(event); |
4199 | 3790 | ||
4200 | perf_output_copy(handle, values, n * sizeof(u64)); | 3791 | __output_copy(handle, values, n * sizeof(u64)); |
4201 | } | 3792 | } |
4202 | 3793 | ||
4203 | /* | 3794 | /* |
@@ -4227,7 +3818,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
4227 | if (read_format & PERF_FORMAT_ID) | 3818 | if (read_format & PERF_FORMAT_ID) |
4228 | values[n++] = primary_event_id(leader); | 3819 | values[n++] = primary_event_id(leader); |
4229 | 3820 | ||
4230 | perf_output_copy(handle, values, n * sizeof(u64)); | 3821 | __output_copy(handle, values, n * sizeof(u64)); |
4231 | 3822 | ||
4232 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 3823 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
4233 | n = 0; | 3824 | n = 0; |
@@ -4239,7 +3830,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
4239 | if (read_format & PERF_FORMAT_ID) | 3830 | if (read_format & PERF_FORMAT_ID) |
4240 | values[n++] = primary_event_id(sub); | 3831 | values[n++] = primary_event_id(sub); |
4241 | 3832 | ||
4242 | perf_output_copy(handle, values, n * sizeof(u64)); | 3833 | __output_copy(handle, values, n * sizeof(u64)); |
4243 | } | 3834 | } |
4244 | } | 3835 | } |
4245 | 3836 | ||
@@ -4249,7 +3840,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
4249 | static void perf_output_read(struct perf_output_handle *handle, | 3840 | static void perf_output_read(struct perf_output_handle *handle, |
4250 | struct perf_event *event) | 3841 | struct perf_event *event) |
4251 | { | 3842 | { |
4252 | u64 enabled = 0, running = 0, now, ctx_time; | 3843 | u64 enabled = 0, running = 0; |
4253 | u64 read_format = event->attr.read_format; | 3844 | u64 read_format = event->attr.read_format; |
4254 | 3845 | ||
4255 | /* | 3846 | /* |
@@ -4261,12 +3852,8 @@ static void perf_output_read(struct perf_output_handle *handle, | |||
4261 | * because of locking issue as we are called in | 3852 | * because of locking issue as we are called in |
4262 | * NMI context | 3853 | * NMI context |
4263 | */ | 3854 | */ |
4264 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | 3855 | if (read_format & PERF_FORMAT_TOTAL_TIMES) |
4265 | now = perf_clock(); | 3856 | calc_timer_values(event, &enabled, &running); |
4266 | ctx_time = event->shadow_ctx_time + now; | ||
4267 | enabled = ctx_time - event->tstamp_enabled; | ||
4268 | running = ctx_time - event->tstamp_running; | ||
4269 | } | ||
4270 | 3857 | ||
4271 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3858 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
4272 | perf_output_read_group(handle, event, enabled, running); | 3859 | perf_output_read_group(handle, event, enabled, running); |
@@ -4319,7 +3906,7 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4319 | 3906 | ||
4320 | size *= sizeof(u64); | 3907 | size *= sizeof(u64); |
4321 | 3908 | ||
4322 | perf_output_copy(handle, data->callchain, size); | 3909 | __output_copy(handle, data->callchain, size); |
4323 | } else { | 3910 | } else { |
4324 | u64 nr = 0; | 3911 | u64 nr = 0; |
4325 | perf_output_put(handle, nr); | 3912 | perf_output_put(handle, nr); |
@@ -4329,8 +3916,8 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4329 | if (sample_type & PERF_SAMPLE_RAW) { | 3916 | if (sample_type & PERF_SAMPLE_RAW) { |
4330 | if (data->raw) { | 3917 | if (data->raw) { |
4331 | perf_output_put(handle, data->raw->size); | 3918 | perf_output_put(handle, data->raw->size); |
4332 | perf_output_copy(handle, data->raw->data, | 3919 | __output_copy(handle, data->raw->data, |
4333 | data->raw->size); | 3920 | data->raw->size); |
4334 | } else { | 3921 | } else { |
4335 | struct { | 3922 | struct { |
4336 | u32 size; | 3923 | u32 size; |
@@ -4342,6 +3929,20 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4342 | perf_output_put(handle, raw); | 3929 | perf_output_put(handle, raw); |
4343 | } | 3930 | } |
4344 | } | 3931 | } |
3932 | |||
3933 | if (!event->attr.watermark) { | ||
3934 | int wakeup_events = event->attr.wakeup_events; | ||
3935 | |||
3936 | if (wakeup_events) { | ||
3937 | struct ring_buffer *rb = handle->rb; | ||
3938 | int events = local_inc_return(&rb->events); | ||
3939 | |||
3940 | if (events >= wakeup_events) { | ||
3941 | local_sub(wakeup_events, &rb->events); | ||
3942 | local_inc(&rb->wakeup); | ||
3943 | } | ||
3944 | } | ||
3945 | } | ||
4345 | } | 3946 | } |
4346 | 3947 | ||
4347 | void perf_prepare_sample(struct perf_event_header *header, | 3948 | void perf_prepare_sample(struct perf_event_header *header, |
@@ -4386,7 +3987,7 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
4386 | } | 3987 | } |
4387 | } | 3988 | } |
4388 | 3989 | ||
4389 | static void perf_event_output(struct perf_event *event, int nmi, | 3990 | static void perf_event_output(struct perf_event *event, |
4390 | struct perf_sample_data *data, | 3991 | struct perf_sample_data *data, |
4391 | struct pt_regs *regs) | 3992 | struct pt_regs *regs) |
4392 | { | 3993 | { |
@@ -4398,7 +3999,7 @@ static void perf_event_output(struct perf_event *event, int nmi, | |||
4398 | 3999 | ||
4399 | perf_prepare_sample(&header, data, event, regs); | 4000 | perf_prepare_sample(&header, data, event, regs); |
4400 | 4001 | ||
4401 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) | 4002 | if (perf_output_begin(&handle, event, header.size)) |
4402 | goto exit; | 4003 | goto exit; |
4403 | 4004 | ||
4404 | perf_output_sample(&handle, &header, data, event); | 4005 | perf_output_sample(&handle, &header, data, event); |
@@ -4438,7 +4039,7 @@ perf_event_read_event(struct perf_event *event, | |||
4438 | int ret; | 4039 | int ret; |
4439 | 4040 | ||
4440 | perf_event_header__init_id(&read_event.header, &sample, event); | 4041 | perf_event_header__init_id(&read_event.header, &sample, event); |
4441 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); | 4042 | ret = perf_output_begin(&handle, event, read_event.header.size); |
4442 | if (ret) | 4043 | if (ret) |
4443 | return; | 4044 | return; |
4444 | 4045 | ||
@@ -4481,7 +4082,7 @@ static void perf_event_task_output(struct perf_event *event, | |||
4481 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); | 4082 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); |
4482 | 4083 | ||
4483 | ret = perf_output_begin(&handle, event, | 4084 | ret = perf_output_begin(&handle, event, |
4484 | task_event->event_id.header.size, 0, 0); | 4085 | task_event->event_id.header.size); |
4485 | if (ret) | 4086 | if (ret) |
4486 | goto out; | 4087 | goto out; |
4487 | 4088 | ||
@@ -4618,7 +4219,7 @@ static void perf_event_comm_output(struct perf_event *event, | |||
4618 | 4219 | ||
4619 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); | 4220 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); |
4620 | ret = perf_output_begin(&handle, event, | 4221 | ret = perf_output_begin(&handle, event, |
4621 | comm_event->event_id.header.size, 0, 0); | 4222 | comm_event->event_id.header.size); |
4622 | 4223 | ||
4623 | if (ret) | 4224 | if (ret) |
4624 | goto out; | 4225 | goto out; |
@@ -4627,7 +4228,7 @@ static void perf_event_comm_output(struct perf_event *event, | |||
4627 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); | 4228 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); |
4628 | 4229 | ||
4629 | perf_output_put(&handle, comm_event->event_id); | 4230 | perf_output_put(&handle, comm_event->event_id); |
4630 | perf_output_copy(&handle, comm_event->comm, | 4231 | __output_copy(&handle, comm_event->comm, |
4631 | comm_event->comm_size); | 4232 | comm_event->comm_size); |
4632 | 4233 | ||
4633 | perf_event__output_id_sample(event, &handle, &sample); | 4234 | perf_event__output_id_sample(event, &handle, &sample); |
@@ -4765,7 +4366,7 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
4765 | 4366 | ||
4766 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); | 4367 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); |
4767 | ret = perf_output_begin(&handle, event, | 4368 | ret = perf_output_begin(&handle, event, |
4768 | mmap_event->event_id.header.size, 0, 0); | 4369 | mmap_event->event_id.header.size); |
4769 | if (ret) | 4370 | if (ret) |
4770 | goto out; | 4371 | goto out; |
4771 | 4372 | ||
@@ -4773,7 +4374,7 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
4773 | mmap_event->event_id.tid = perf_event_tid(event, current); | 4374 | mmap_event->event_id.tid = perf_event_tid(event, current); |
4774 | 4375 | ||
4775 | perf_output_put(&handle, mmap_event->event_id); | 4376 | perf_output_put(&handle, mmap_event->event_id); |
4776 | perf_output_copy(&handle, mmap_event->file_name, | 4377 | __output_copy(&handle, mmap_event->file_name, |
4777 | mmap_event->file_size); | 4378 | mmap_event->file_size); |
4778 | 4379 | ||
4779 | perf_event__output_id_sample(event, &handle, &sample); | 4380 | perf_event__output_id_sample(event, &handle, &sample); |
@@ -4829,7 +4430,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
4829 | 4430 | ||
4830 | if (file) { | 4431 | if (file) { |
4831 | /* | 4432 | /* |
4832 | * d_path works from the end of the buffer backwards, so we | 4433 | * d_path works from the end of the rb backwards, so we |
4833 | * need to add enough zero bytes after the string to handle | 4434 | * need to add enough zero bytes after the string to handle |
4834 | * the 64bit alignment we do later. | 4435 | * the 64bit alignment we do later. |
4835 | */ | 4436 | */ |
@@ -4960,7 +4561,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
4960 | perf_event_header__init_id(&throttle_event.header, &sample, event); | 4561 | perf_event_header__init_id(&throttle_event.header, &sample, event); |
4961 | 4562 | ||
4962 | ret = perf_output_begin(&handle, event, | 4563 | ret = perf_output_begin(&handle, event, |
4963 | throttle_event.header.size, 1, 0); | 4564 | throttle_event.header.size); |
4964 | if (ret) | 4565 | if (ret) |
4965 | return; | 4566 | return; |
4966 | 4567 | ||
@@ -4973,7 +4574,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
4973 | * Generic event overflow handling, sampling. | 4574 | * Generic event overflow handling, sampling. |
4974 | */ | 4575 | */ |
4975 | 4576 | ||
4976 | static int __perf_event_overflow(struct perf_event *event, int nmi, | 4577 | static int __perf_event_overflow(struct perf_event *event, |
4977 | int throttle, struct perf_sample_data *data, | 4578 | int throttle, struct perf_sample_data *data, |
4978 | struct pt_regs *regs) | 4579 | struct pt_regs *regs) |
4979 | { | 4580 | { |
@@ -5016,34 +4617,28 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
5016 | if (events && atomic_dec_and_test(&event->event_limit)) { | 4617 | if (events && atomic_dec_and_test(&event->event_limit)) { |
5017 | ret = 1; | 4618 | ret = 1; |
5018 | event->pending_kill = POLL_HUP; | 4619 | event->pending_kill = POLL_HUP; |
5019 | if (nmi) { | 4620 | event->pending_disable = 1; |
5020 | event->pending_disable = 1; | 4621 | irq_work_queue(&event->pending); |
5021 | irq_work_queue(&event->pending); | ||
5022 | } else | ||
5023 | perf_event_disable(event); | ||
5024 | } | 4622 | } |
5025 | 4623 | ||
5026 | if (event->overflow_handler) | 4624 | if (event->overflow_handler) |
5027 | event->overflow_handler(event, nmi, data, regs); | 4625 | event->overflow_handler(event, data, regs); |
5028 | else | 4626 | else |
5029 | perf_event_output(event, nmi, data, regs); | 4627 | perf_event_output(event, data, regs); |
5030 | 4628 | ||
5031 | if (event->fasync && event->pending_kill) { | 4629 | if (event->fasync && event->pending_kill) { |
5032 | if (nmi) { | 4630 | event->pending_wakeup = 1; |
5033 | event->pending_wakeup = 1; | 4631 | irq_work_queue(&event->pending); |
5034 | irq_work_queue(&event->pending); | ||
5035 | } else | ||
5036 | perf_event_wakeup(event); | ||
5037 | } | 4632 | } |
5038 | 4633 | ||
5039 | return ret; | 4634 | return ret; |
5040 | } | 4635 | } |
5041 | 4636 | ||
5042 | int perf_event_overflow(struct perf_event *event, int nmi, | 4637 | int perf_event_overflow(struct perf_event *event, |
5043 | struct perf_sample_data *data, | 4638 | struct perf_sample_data *data, |
5044 | struct pt_regs *regs) | 4639 | struct pt_regs *regs) |
5045 | { | 4640 | { |
5046 | return __perf_event_overflow(event, nmi, 1, data, regs); | 4641 | return __perf_event_overflow(event, 1, data, regs); |
5047 | } | 4642 | } |
5048 | 4643 | ||
5049 | /* | 4644 | /* |
@@ -5092,7 +4687,7 @@ again: | |||
5092 | } | 4687 | } |
5093 | 4688 | ||
5094 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | 4689 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, |
5095 | int nmi, struct perf_sample_data *data, | 4690 | struct perf_sample_data *data, |
5096 | struct pt_regs *regs) | 4691 | struct pt_regs *regs) |
5097 | { | 4692 | { |
5098 | struct hw_perf_event *hwc = &event->hw; | 4693 | struct hw_perf_event *hwc = &event->hw; |
@@ -5106,7 +4701,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
5106 | return; | 4701 | return; |
5107 | 4702 | ||
5108 | for (; overflow; overflow--) { | 4703 | for (; overflow; overflow--) { |
5109 | if (__perf_event_overflow(event, nmi, throttle, | 4704 | if (__perf_event_overflow(event, throttle, |
5110 | data, regs)) { | 4705 | data, regs)) { |
5111 | /* | 4706 | /* |
5112 | * We inhibit the overflow from happening when | 4707 | * We inhibit the overflow from happening when |
@@ -5119,7 +4714,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
5119 | } | 4714 | } |
5120 | 4715 | ||
5121 | static void perf_swevent_event(struct perf_event *event, u64 nr, | 4716 | static void perf_swevent_event(struct perf_event *event, u64 nr, |
5122 | int nmi, struct perf_sample_data *data, | 4717 | struct perf_sample_data *data, |
5123 | struct pt_regs *regs) | 4718 | struct pt_regs *regs) |
5124 | { | 4719 | { |
5125 | struct hw_perf_event *hwc = &event->hw; | 4720 | struct hw_perf_event *hwc = &event->hw; |
@@ -5133,12 +4728,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, | |||
5133 | return; | 4728 | return; |
5134 | 4729 | ||
5135 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) | 4730 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
5136 | return perf_swevent_overflow(event, 1, nmi, data, regs); | 4731 | return perf_swevent_overflow(event, 1, data, regs); |
5137 | 4732 | ||
5138 | if (local64_add_negative(nr, &hwc->period_left)) | 4733 | if (local64_add_negative(nr, &hwc->period_left)) |
5139 | return; | 4734 | return; |
5140 | 4735 | ||
5141 | perf_swevent_overflow(event, 0, nmi, data, regs); | 4736 | perf_swevent_overflow(event, 0, data, regs); |
5142 | } | 4737 | } |
5143 | 4738 | ||
5144 | static int perf_exclude_event(struct perf_event *event, | 4739 | static int perf_exclude_event(struct perf_event *event, |
@@ -5226,7 +4821,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) | |||
5226 | } | 4821 | } |
5227 | 4822 | ||
5228 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | 4823 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
5229 | u64 nr, int nmi, | 4824 | u64 nr, |
5230 | struct perf_sample_data *data, | 4825 | struct perf_sample_data *data, |
5231 | struct pt_regs *regs) | 4826 | struct pt_regs *regs) |
5232 | { | 4827 | { |
@@ -5242,7 +4837,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
5242 | 4837 | ||
5243 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4838 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
5244 | if (perf_swevent_match(event, type, event_id, data, regs)) | 4839 | if (perf_swevent_match(event, type, event_id, data, regs)) |
5245 | perf_swevent_event(event, nr, nmi, data, regs); | 4840 | perf_swevent_event(event, nr, data, regs); |
5246 | } | 4841 | } |
5247 | end: | 4842 | end: |
5248 | rcu_read_unlock(); | 4843 | rcu_read_unlock(); |
@@ -5263,8 +4858,7 @@ inline void perf_swevent_put_recursion_context(int rctx) | |||
5263 | put_recursion_context(swhash->recursion, rctx); | 4858 | put_recursion_context(swhash->recursion, rctx); |
5264 | } | 4859 | } |
5265 | 4860 | ||
5266 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4861 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
5267 | struct pt_regs *regs, u64 addr) | ||
5268 | { | 4862 | { |
5269 | struct perf_sample_data data; | 4863 | struct perf_sample_data data; |
5270 | int rctx; | 4864 | int rctx; |
@@ -5276,7 +4870,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, | |||
5276 | 4870 | ||
5277 | perf_sample_data_init(&data, addr); | 4871 | perf_sample_data_init(&data, addr); |
5278 | 4872 | ||
5279 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | 4873 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); |
5280 | 4874 | ||
5281 | perf_swevent_put_recursion_context(rctx); | 4875 | perf_swevent_put_recursion_context(rctx); |
5282 | preempt_enable_notrace(); | 4876 | preempt_enable_notrace(); |
@@ -5524,7 +5118,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
5524 | 5118 | ||
5525 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 5119 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
5526 | if (perf_tp_event_match(event, &data, regs)) | 5120 | if (perf_tp_event_match(event, &data, regs)) |
5527 | perf_swevent_event(event, count, 1, &data, regs); | 5121 | perf_swevent_event(event, count, &data, regs); |
5528 | } | 5122 | } |
5529 | 5123 | ||
5530 | perf_swevent_put_recursion_context(rctx); | 5124 | perf_swevent_put_recursion_context(rctx); |
@@ -5617,7 +5211,7 @@ void perf_bp_event(struct perf_event *bp, void *data) | |||
5617 | perf_sample_data_init(&sample, bp->attr.bp_addr); | 5211 | perf_sample_data_init(&sample, bp->attr.bp_addr); |
5618 | 5212 | ||
5619 | if (!bp->hw.state && !perf_exclude_event(bp, regs)) | 5213 | if (!bp->hw.state && !perf_exclude_event(bp, regs)) |
5620 | perf_swevent_event(bp, 1, 1, &sample, regs); | 5214 | perf_swevent_event(bp, 1, &sample, regs); |
5621 | } | 5215 | } |
5622 | #endif | 5216 | #endif |
5623 | 5217 | ||
@@ -5646,7 +5240,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
5646 | 5240 | ||
5647 | if (regs && !perf_exclude_event(event, regs)) { | 5241 | if (regs && !perf_exclude_event(event, regs)) { |
5648 | if (!(event->attr.exclude_idle && current->pid == 0)) | 5242 | if (!(event->attr.exclude_idle && current->pid == 0)) |
5649 | if (perf_event_overflow(event, 0, &data, regs)) | 5243 | if (perf_event_overflow(event, &data, regs)) |
5650 | ret = HRTIMER_NORESTART; | 5244 | ret = HRTIMER_NORESTART; |
5651 | } | 5245 | } |
5652 | 5246 | ||
@@ -5986,6 +5580,7 @@ free_dev: | |||
5986 | } | 5580 | } |
5987 | 5581 | ||
5988 | static struct lock_class_key cpuctx_mutex; | 5582 | static struct lock_class_key cpuctx_mutex; |
5583 | static struct lock_class_key cpuctx_lock; | ||
5989 | 5584 | ||
5990 | int perf_pmu_register(struct pmu *pmu, char *name, int type) | 5585 | int perf_pmu_register(struct pmu *pmu, char *name, int type) |
5991 | { | 5586 | { |
@@ -6036,6 +5631,7 @@ skip_type: | |||
6036 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 5631 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
6037 | __perf_event_init_context(&cpuctx->ctx); | 5632 | __perf_event_init_context(&cpuctx->ctx); |
6038 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); | 5633 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); |
5634 | lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); | ||
6039 | cpuctx->ctx.type = cpu_context; | 5635 | cpuctx->ctx.type = cpu_context; |
6040 | cpuctx->ctx.pmu = pmu; | 5636 | cpuctx->ctx.pmu = pmu; |
6041 | cpuctx->jiffies_interval = 1; | 5637 | cpuctx->jiffies_interval = 1; |
@@ -6150,7 +5746,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
6150 | struct task_struct *task, | 5746 | struct task_struct *task, |
6151 | struct perf_event *group_leader, | 5747 | struct perf_event *group_leader, |
6152 | struct perf_event *parent_event, | 5748 | struct perf_event *parent_event, |
6153 | perf_overflow_handler_t overflow_handler) | 5749 | perf_overflow_handler_t overflow_handler, |
5750 | void *context) | ||
6154 | { | 5751 | { |
6155 | struct pmu *pmu; | 5752 | struct pmu *pmu; |
6156 | struct perf_event *event; | 5753 | struct perf_event *event; |
@@ -6208,10 +5805,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
6208 | #endif | 5805 | #endif |
6209 | } | 5806 | } |
6210 | 5807 | ||
6211 | if (!overflow_handler && parent_event) | 5808 | if (!overflow_handler && parent_event) { |
6212 | overflow_handler = parent_event->overflow_handler; | 5809 | overflow_handler = parent_event->overflow_handler; |
5810 | context = parent_event->overflow_handler_context; | ||
5811 | } | ||
6213 | 5812 | ||
6214 | event->overflow_handler = overflow_handler; | 5813 | event->overflow_handler = overflow_handler; |
5814 | event->overflow_handler_context = context; | ||
6215 | 5815 | ||
6216 | if (attr->disabled) | 5816 | if (attr->disabled) |
6217 | event->state = PERF_EVENT_STATE_OFF; | 5817 | event->state = PERF_EVENT_STATE_OFF; |
@@ -6326,13 +5926,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
6326 | if (ret) | 5926 | if (ret) |
6327 | return -EFAULT; | 5927 | return -EFAULT; |
6328 | 5928 | ||
6329 | /* | ||
6330 | * If the type exists, the corresponding creation will verify | ||
6331 | * the attr->config. | ||
6332 | */ | ||
6333 | if (attr->type >= PERF_TYPE_MAX) | ||
6334 | return -EINVAL; | ||
6335 | |||
6336 | if (attr->__reserved_1) | 5929 | if (attr->__reserved_1) |
6337 | return -EINVAL; | 5930 | return -EINVAL; |
6338 | 5931 | ||
@@ -6354,7 +5947,7 @@ err_size: | |||
6354 | static int | 5947 | static int |
6355 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | 5948 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) |
6356 | { | 5949 | { |
6357 | struct perf_buffer *buffer = NULL, *old_buffer = NULL; | 5950 | struct ring_buffer *rb = NULL, *old_rb = NULL; |
6358 | int ret = -EINVAL; | 5951 | int ret = -EINVAL; |
6359 | 5952 | ||
6360 | if (!output_event) | 5953 | if (!output_event) |
@@ -6371,7 +5964,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | |||
6371 | goto out; | 5964 | goto out; |
6372 | 5965 | ||
6373 | /* | 5966 | /* |
6374 | * If its not a per-cpu buffer, it must be the same task. | 5967 | * If its not a per-cpu rb, it must be the same task. |
6375 | */ | 5968 | */ |
6376 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) | 5969 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) |
6377 | goto out; | 5970 | goto out; |
@@ -6383,20 +5976,20 @@ set: | |||
6383 | goto unlock; | 5976 | goto unlock; |
6384 | 5977 | ||
6385 | if (output_event) { | 5978 | if (output_event) { |
6386 | /* get the buffer we want to redirect to */ | 5979 | /* get the rb we want to redirect to */ |
6387 | buffer = perf_buffer_get(output_event); | 5980 | rb = ring_buffer_get(output_event); |
6388 | if (!buffer) | 5981 | if (!rb) |
6389 | goto unlock; | 5982 | goto unlock; |
6390 | } | 5983 | } |
6391 | 5984 | ||
6392 | old_buffer = event->buffer; | 5985 | old_rb = event->rb; |
6393 | rcu_assign_pointer(event->buffer, buffer); | 5986 | rcu_assign_pointer(event->rb, rb); |
6394 | ret = 0; | 5987 | ret = 0; |
6395 | unlock: | 5988 | unlock: |
6396 | mutex_unlock(&event->mmap_mutex); | 5989 | mutex_unlock(&event->mmap_mutex); |
6397 | 5990 | ||
6398 | if (old_buffer) | 5991 | if (old_rb) |
6399 | perf_buffer_put(old_buffer); | 5992 | ring_buffer_put(old_rb); |
6400 | out: | 5993 | out: |
6401 | return ret; | 5994 | return ret; |
6402 | } | 5995 | } |
@@ -6478,7 +6071,8 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6478 | } | 6071 | } |
6479 | } | 6072 | } |
6480 | 6073 | ||
6481 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL); | 6074 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, |
6075 | NULL, NULL); | ||
6482 | if (IS_ERR(event)) { | 6076 | if (IS_ERR(event)) { |
6483 | err = PTR_ERR(event); | 6077 | err = PTR_ERR(event); |
6484 | goto err_task; | 6078 | goto err_task; |
@@ -6663,7 +6257,8 @@ err_fd: | |||
6663 | struct perf_event * | 6257 | struct perf_event * |
6664 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 6258 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, |
6665 | struct task_struct *task, | 6259 | struct task_struct *task, |
6666 | perf_overflow_handler_t overflow_handler) | 6260 | perf_overflow_handler_t overflow_handler, |
6261 | void *context) | ||
6667 | { | 6262 | { |
6668 | struct perf_event_context *ctx; | 6263 | struct perf_event_context *ctx; |
6669 | struct perf_event *event; | 6264 | struct perf_event *event; |
@@ -6673,7 +6268,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
6673 | * Get the target context (task or percpu): | 6268 | * Get the target context (task or percpu): |
6674 | */ | 6269 | */ |
6675 | 6270 | ||
6676 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler); | 6271 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, |
6272 | overflow_handler, context); | ||
6677 | if (IS_ERR(event)) { | 6273 | if (IS_ERR(event)) { |
6678 | err = PTR_ERR(event); | 6274 | err = PTR_ERR(event); |
6679 | goto err; | 6275 | goto err; |
@@ -6780,7 +6376,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
6780 | * our context. | 6376 | * our context. |
6781 | */ | 6377 | */ |
6782 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); | 6378 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); |
6783 | task_ctx_sched_out(child_ctx, EVENT_ALL); | ||
6784 | 6379 | ||
6785 | /* | 6380 | /* |
6786 | * Take the context lock here so that if find_get_context is | 6381 | * Take the context lock here so that if find_get_context is |
@@ -6788,6 +6383,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
6788 | * incremented the context's refcount before we do put_ctx below. | 6383 | * incremented the context's refcount before we do put_ctx below. |
6789 | */ | 6384 | */ |
6790 | raw_spin_lock(&child_ctx->lock); | 6385 | raw_spin_lock(&child_ctx->lock); |
6386 | task_ctx_sched_out(child_ctx); | ||
6791 | child->perf_event_ctxp[ctxn] = NULL; | 6387 | child->perf_event_ctxp[ctxn] = NULL; |
6792 | /* | 6388 | /* |
6793 | * If this context is a clone; unclone it so it can't get | 6389 | * If this context is a clone; unclone it so it can't get |
@@ -6957,7 +6553,7 @@ inherit_event(struct perf_event *parent_event, | |||
6957 | parent_event->cpu, | 6553 | parent_event->cpu, |
6958 | child, | 6554 | child, |
6959 | group_leader, parent_event, | 6555 | group_leader, parent_event, |
6960 | NULL); | 6556 | NULL, NULL); |
6961 | if (IS_ERR(child_event)) | 6557 | if (IS_ERR(child_event)) |
6962 | return child_event; | 6558 | return child_event; |
6963 | get_ctx(child_ctx); | 6559 | get_ctx(child_ctx); |
@@ -6984,6 +6580,8 @@ inherit_event(struct perf_event *parent_event, | |||
6984 | 6580 | ||
6985 | child_event->ctx = child_ctx; | 6581 | child_event->ctx = child_ctx; |
6986 | child_event->overflow_handler = parent_event->overflow_handler; | 6582 | child_event->overflow_handler = parent_event->overflow_handler; |
6583 | child_event->overflow_handler_context | ||
6584 | = parent_event->overflow_handler_context; | ||
6987 | 6585 | ||
6988 | /* | 6586 | /* |
6989 | * Precalculate sample_data sizes | 6587 | * Precalculate sample_data sizes |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 086adf25a55e..b7971d6f38bf 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -431,9 +431,11 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
431 | struct perf_event * | 431 | struct perf_event * |
432 | register_user_hw_breakpoint(struct perf_event_attr *attr, | 432 | register_user_hw_breakpoint(struct perf_event_attr *attr, |
433 | perf_overflow_handler_t triggered, | 433 | perf_overflow_handler_t triggered, |
434 | void *context, | ||
434 | struct task_struct *tsk) | 435 | struct task_struct *tsk) |
435 | { | 436 | { |
436 | return perf_event_create_kernel_counter(attr, -1, tsk, triggered); | 437 | return perf_event_create_kernel_counter(attr, -1, tsk, triggered, |
438 | context); | ||
437 | } | 439 | } |
438 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | 440 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); |
439 | 441 | ||
@@ -502,7 +504,8 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | |||
502 | */ | 504 | */ |
503 | struct perf_event * __percpu * | 505 | struct perf_event * __percpu * |
504 | register_wide_hw_breakpoint(struct perf_event_attr *attr, | 506 | register_wide_hw_breakpoint(struct perf_event_attr *attr, |
505 | perf_overflow_handler_t triggered) | 507 | perf_overflow_handler_t triggered, |
508 | void *context) | ||
506 | { | 509 | { |
507 | struct perf_event * __percpu *cpu_events, **pevent, *bp; | 510 | struct perf_event * __percpu *cpu_events, **pevent, *bp; |
508 | long err; | 511 | long err; |
@@ -515,7 +518,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
515 | get_online_cpus(); | 518 | get_online_cpus(); |
516 | for_each_online_cpu(cpu) { | 519 | for_each_online_cpu(cpu) { |
517 | pevent = per_cpu_ptr(cpu_events, cpu); | 520 | pevent = per_cpu_ptr(cpu_events, cpu); |
518 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); | 521 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, |
522 | triggered, context); | ||
519 | 523 | ||
520 | *pevent = bp; | 524 | *pevent = bp; |
521 | 525 | ||
diff --git a/kernel/events/internal.h b/kernel/events/internal.h new file mode 100644 index 000000000000..09097dd8116c --- /dev/null +++ b/kernel/events/internal.h | |||
@@ -0,0 +1,96 @@ | |||
1 | #ifndef _KERNEL_EVENTS_INTERNAL_H | ||
2 | #define _KERNEL_EVENTS_INTERNAL_H | ||
3 | |||
4 | #define RING_BUFFER_WRITABLE 0x01 | ||
5 | |||
6 | struct ring_buffer { | ||
7 | atomic_t refcount; | ||
8 | struct rcu_head rcu_head; | ||
9 | #ifdef CONFIG_PERF_USE_VMALLOC | ||
10 | struct work_struct work; | ||
11 | int page_order; /* allocation order */ | ||
12 | #endif | ||
13 | int nr_pages; /* nr of data pages */ | ||
14 | int writable; /* are we writable */ | ||
15 | |||
16 | atomic_t poll; /* POLL_ for wakeups */ | ||
17 | |||
18 | local_t head; /* write position */ | ||
19 | local_t nest; /* nested writers */ | ||
20 | local_t events; /* event limit */ | ||
21 | local_t wakeup; /* wakeup stamp */ | ||
22 | local_t lost; /* nr records lost */ | ||
23 | |||
24 | long watermark; /* wakeup watermark */ | ||
25 | |||
26 | struct perf_event_mmap_page *user_page; | ||
27 | void *data_pages[0]; | ||
28 | }; | ||
29 | |||
30 | extern void rb_free(struct ring_buffer *rb); | ||
31 | extern struct ring_buffer * | ||
32 | rb_alloc(int nr_pages, long watermark, int cpu, int flags); | ||
33 | extern void perf_event_wakeup(struct perf_event *event); | ||
34 | |||
35 | extern void | ||
36 | perf_event_header__init_id(struct perf_event_header *header, | ||
37 | struct perf_sample_data *data, | ||
38 | struct perf_event *event); | ||
39 | extern void | ||
40 | perf_event__output_id_sample(struct perf_event *event, | ||
41 | struct perf_output_handle *handle, | ||
42 | struct perf_sample_data *sample); | ||
43 | |||
44 | extern struct page * | ||
45 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); | ||
46 | |||
47 | #ifdef CONFIG_PERF_USE_VMALLOC | ||
48 | /* | ||
49 | * Back perf_mmap() with vmalloc memory. | ||
50 | * | ||
51 | * Required for architectures that have d-cache aliasing issues. | ||
52 | */ | ||
53 | |||
54 | static inline int page_order(struct ring_buffer *rb) | ||
55 | { | ||
56 | return rb->page_order; | ||
57 | } | ||
58 | |||
59 | #else | ||
60 | |||
61 | static inline int page_order(struct ring_buffer *rb) | ||
62 | { | ||
63 | return 0; | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | static unsigned long perf_data_size(struct ring_buffer *rb) | ||
68 | { | ||
69 | return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); | ||
70 | } | ||
71 | |||
72 | static inline void | ||
73 | __output_copy(struct perf_output_handle *handle, | ||
74 | const void *buf, unsigned int len) | ||
75 | { | ||
76 | do { | ||
77 | unsigned long size = min_t(unsigned long, handle->size, len); | ||
78 | |||
79 | memcpy(handle->addr, buf, size); | ||
80 | |||
81 | len -= size; | ||
82 | handle->addr += size; | ||
83 | buf += size; | ||
84 | handle->size -= size; | ||
85 | if (!handle->size) { | ||
86 | struct ring_buffer *rb = handle->rb; | ||
87 | |||
88 | handle->page++; | ||
89 | handle->page &= rb->nr_pages - 1; | ||
90 | handle->addr = rb->data_pages[handle->page]; | ||
91 | handle->size = PAGE_SIZE << page_order(rb); | ||
92 | } | ||
93 | } while (len); | ||
94 | } | ||
95 | |||
96 | #endif /* _KERNEL_EVENTS_INTERNAL_H */ | ||
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c new file mode 100644 index 000000000000..a2a29205cc0f --- /dev/null +++ b/kernel/events/ring_buffer.c | |||
@@ -0,0 +1,380 @@ | |||
1 | /* | ||
2 | * Performance events ring-buffer code: | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | ||
8 | * | ||
9 | * For licensing details see kernel-base/COPYING | ||
10 | */ | ||
11 | |||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/slab.h> | ||
15 | |||
16 | #include "internal.h" | ||
17 | |||
18 | static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, | ||
19 | unsigned long offset, unsigned long head) | ||
20 | { | ||
21 | unsigned long mask; | ||
22 | |||
23 | if (!rb->writable) | ||
24 | return true; | ||
25 | |||
26 | mask = perf_data_size(rb) - 1; | ||
27 | |||
28 | offset = (offset - tail) & mask; | ||
29 | head = (head - tail) & mask; | ||
30 | |||
31 | if ((int)(head - offset) < 0) | ||
32 | return false; | ||
33 | |||
34 | return true; | ||
35 | } | ||
36 | |||
37 | static void perf_output_wakeup(struct perf_output_handle *handle) | ||
38 | { | ||
39 | atomic_set(&handle->rb->poll, POLL_IN); | ||
40 | |||
41 | handle->event->pending_wakeup = 1; | ||
42 | irq_work_queue(&handle->event->pending); | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * We need to ensure a later event_id doesn't publish a head when a former | ||
47 | * event isn't done writing. However since we need to deal with NMIs we | ||
48 | * cannot fully serialize things. | ||
49 | * | ||
50 | * We only publish the head (and generate a wakeup) when the outer-most | ||
51 | * event completes. | ||
52 | */ | ||
53 | static void perf_output_get_handle(struct perf_output_handle *handle) | ||
54 | { | ||
55 | struct ring_buffer *rb = handle->rb; | ||
56 | |||
57 | preempt_disable(); | ||
58 | local_inc(&rb->nest); | ||
59 | handle->wakeup = local_read(&rb->wakeup); | ||
60 | } | ||
61 | |||
62 | static void perf_output_put_handle(struct perf_output_handle *handle) | ||
63 | { | ||
64 | struct ring_buffer *rb = handle->rb; | ||
65 | unsigned long head; | ||
66 | |||
67 | again: | ||
68 | head = local_read(&rb->head); | ||
69 | |||
70 | /* | ||
71 | * IRQ/NMI can happen here, which means we can miss a head update. | ||
72 | */ | ||
73 | |||
74 | if (!local_dec_and_test(&rb->nest)) | ||
75 | goto out; | ||
76 | |||
77 | /* | ||
78 | * Publish the known good head. Rely on the full barrier implied | ||
79 | * by atomic_dec_and_test() order the rb->head read and this | ||
80 | * write. | ||
81 | */ | ||
82 | rb->user_page->data_head = head; | ||
83 | |||
84 | /* | ||
85 | * Now check if we missed an update, rely on the (compiler) | ||
86 | * barrier in atomic_dec_and_test() to re-read rb->head. | ||
87 | */ | ||
88 | if (unlikely(head != local_read(&rb->head))) { | ||
89 | local_inc(&rb->nest); | ||
90 | goto again; | ||
91 | } | ||
92 | |||
93 | if (handle->wakeup != local_read(&rb->wakeup)) | ||
94 | perf_output_wakeup(handle); | ||
95 | |||
96 | out: | ||
97 | preempt_enable(); | ||
98 | } | ||
99 | |||
100 | int perf_output_begin(struct perf_output_handle *handle, | ||
101 | struct perf_event *event, unsigned int size) | ||
102 | { | ||
103 | struct ring_buffer *rb; | ||
104 | unsigned long tail, offset, head; | ||
105 | int have_lost; | ||
106 | struct perf_sample_data sample_data; | ||
107 | struct { | ||
108 | struct perf_event_header header; | ||
109 | u64 id; | ||
110 | u64 lost; | ||
111 | } lost_event; | ||
112 | |||
113 | rcu_read_lock(); | ||
114 | /* | ||
115 | * For inherited events we send all the output towards the parent. | ||
116 | */ | ||
117 | if (event->parent) | ||
118 | event = event->parent; | ||
119 | |||
120 | rb = rcu_dereference(event->rb); | ||
121 | if (!rb) | ||
122 | goto out; | ||
123 | |||
124 | handle->rb = rb; | ||
125 | handle->event = event; | ||
126 | |||
127 | if (!rb->nr_pages) | ||
128 | goto out; | ||
129 | |||
130 | have_lost = local_read(&rb->lost); | ||
131 | if (have_lost) { | ||
132 | lost_event.header.size = sizeof(lost_event); | ||
133 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
134 | event); | ||
135 | size += lost_event.header.size; | ||
136 | } | ||
137 | |||
138 | perf_output_get_handle(handle); | ||
139 | |||
140 | do { | ||
141 | /* | ||
142 | * Userspace could choose to issue a mb() before updating the | ||
143 | * tail pointer. So that all reads will be completed before the | ||
144 | * write is issued. | ||
145 | */ | ||
146 | tail = ACCESS_ONCE(rb->user_page->data_tail); | ||
147 | smp_rmb(); | ||
148 | offset = head = local_read(&rb->head); | ||
149 | head += size; | ||
150 | if (unlikely(!perf_output_space(rb, tail, offset, head))) | ||
151 | goto fail; | ||
152 | } while (local_cmpxchg(&rb->head, offset, head) != offset); | ||
153 | |||
154 | if (head - local_read(&rb->wakeup) > rb->watermark) | ||
155 | local_add(rb->watermark, &rb->wakeup); | ||
156 | |||
157 | handle->page = offset >> (PAGE_SHIFT + page_order(rb)); | ||
158 | handle->page &= rb->nr_pages - 1; | ||
159 | handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); | ||
160 | handle->addr = rb->data_pages[handle->page]; | ||
161 | handle->addr += handle->size; | ||
162 | handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; | ||
163 | |||
164 | if (have_lost) { | ||
165 | lost_event.header.type = PERF_RECORD_LOST; | ||
166 | lost_event.header.misc = 0; | ||
167 | lost_event.id = event->id; | ||
168 | lost_event.lost = local_xchg(&rb->lost, 0); | ||
169 | |||
170 | perf_output_put(handle, lost_event); | ||
171 | perf_event__output_id_sample(event, handle, &sample_data); | ||
172 | } | ||
173 | |||
174 | return 0; | ||
175 | |||
176 | fail: | ||
177 | local_inc(&rb->lost); | ||
178 | perf_output_put_handle(handle); | ||
179 | out: | ||
180 | rcu_read_unlock(); | ||
181 | |||
182 | return -ENOSPC; | ||
183 | } | ||
184 | |||
185 | void perf_output_copy(struct perf_output_handle *handle, | ||
186 | const void *buf, unsigned int len) | ||
187 | { | ||
188 | __output_copy(handle, buf, len); | ||
189 | } | ||
190 | |||
191 | void perf_output_end(struct perf_output_handle *handle) | ||
192 | { | ||
193 | perf_output_put_handle(handle); | ||
194 | rcu_read_unlock(); | ||
195 | } | ||
196 | |||
197 | static void | ||
198 | ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | ||
199 | { | ||
200 | long max_size = perf_data_size(rb); | ||
201 | |||
202 | if (watermark) | ||
203 | rb->watermark = min(max_size, watermark); | ||
204 | |||
205 | if (!rb->watermark) | ||
206 | rb->watermark = max_size / 2; | ||
207 | |||
208 | if (flags & RING_BUFFER_WRITABLE) | ||
209 | rb->writable = 1; | ||
210 | |||
211 | atomic_set(&rb->refcount, 1); | ||
212 | } | ||
213 | |||
214 | #ifndef CONFIG_PERF_USE_VMALLOC | ||
215 | |||
216 | /* | ||
217 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. | ||
218 | */ | ||
219 | |||
220 | struct page * | ||
221 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | ||
222 | { | ||
223 | if (pgoff > rb->nr_pages) | ||
224 | return NULL; | ||
225 | |||
226 | if (pgoff == 0) | ||
227 | return virt_to_page(rb->user_page); | ||
228 | |||
229 | return virt_to_page(rb->data_pages[pgoff - 1]); | ||
230 | } | ||
231 | |||
232 | static void *perf_mmap_alloc_page(int cpu) | ||
233 | { | ||
234 | struct page *page; | ||
235 | int node; | ||
236 | |||
237 | node = (cpu == -1) ? cpu : cpu_to_node(cpu); | ||
238 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | ||
239 | if (!page) | ||
240 | return NULL; | ||
241 | |||
242 | return page_address(page); | ||
243 | } | ||
244 | |||
245 | struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
246 | { | ||
247 | struct ring_buffer *rb; | ||
248 | unsigned long size; | ||
249 | int i; | ||
250 | |||
251 | size = sizeof(struct ring_buffer); | ||
252 | size += nr_pages * sizeof(void *); | ||
253 | |||
254 | rb = kzalloc(size, GFP_KERNEL); | ||
255 | if (!rb) | ||
256 | goto fail; | ||
257 | |||
258 | rb->user_page = perf_mmap_alloc_page(cpu); | ||
259 | if (!rb->user_page) | ||
260 | goto fail_user_page; | ||
261 | |||
262 | for (i = 0; i < nr_pages; i++) { | ||
263 | rb->data_pages[i] = perf_mmap_alloc_page(cpu); | ||
264 | if (!rb->data_pages[i]) | ||
265 | goto fail_data_pages; | ||
266 | } | ||
267 | |||
268 | rb->nr_pages = nr_pages; | ||
269 | |||
270 | ring_buffer_init(rb, watermark, flags); | ||
271 | |||
272 | return rb; | ||
273 | |||
274 | fail_data_pages: | ||
275 | for (i--; i >= 0; i--) | ||
276 | free_page((unsigned long)rb->data_pages[i]); | ||
277 | |||
278 | free_page((unsigned long)rb->user_page); | ||
279 | |||
280 | fail_user_page: | ||
281 | kfree(rb); | ||
282 | |||
283 | fail: | ||
284 | return NULL; | ||
285 | } | ||
286 | |||
287 | static void perf_mmap_free_page(unsigned long addr) | ||
288 | { | ||
289 | struct page *page = virt_to_page((void *)addr); | ||
290 | |||
291 | page->mapping = NULL; | ||
292 | __free_page(page); | ||
293 | } | ||
294 | |||
295 | void rb_free(struct ring_buffer *rb) | ||
296 | { | ||
297 | int i; | ||
298 | |||
299 | perf_mmap_free_page((unsigned long)rb->user_page); | ||
300 | for (i = 0; i < rb->nr_pages; i++) | ||
301 | perf_mmap_free_page((unsigned long)rb->data_pages[i]); | ||
302 | kfree(rb); | ||
303 | } | ||
304 | |||
305 | #else | ||
306 | |||
307 | struct page * | ||
308 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | ||
309 | { | ||
310 | if (pgoff > (1UL << page_order(rb))) | ||
311 | return NULL; | ||
312 | |||
313 | return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); | ||
314 | } | ||
315 | |||
316 | static void perf_mmap_unmark_page(void *addr) | ||
317 | { | ||
318 | struct page *page = vmalloc_to_page(addr); | ||
319 | |||
320 | page->mapping = NULL; | ||
321 | } | ||
322 | |||
323 | static void rb_free_work(struct work_struct *work) | ||
324 | { | ||
325 | struct ring_buffer *rb; | ||
326 | void *base; | ||
327 | int i, nr; | ||
328 | |||
329 | rb = container_of(work, struct ring_buffer, work); | ||
330 | nr = 1 << page_order(rb); | ||
331 | |||
332 | base = rb->user_page; | ||
333 | for (i = 0; i < nr + 1; i++) | ||
334 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
335 | |||
336 | vfree(base); | ||
337 | kfree(rb); | ||
338 | } | ||
339 | |||
340 | void rb_free(struct ring_buffer *rb) | ||
341 | { | ||
342 | schedule_work(&rb->work); | ||
343 | } | ||
344 | |||
345 | struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
346 | { | ||
347 | struct ring_buffer *rb; | ||
348 | unsigned long size; | ||
349 | void *all_buf; | ||
350 | |||
351 | size = sizeof(struct ring_buffer); | ||
352 | size += sizeof(void *); | ||
353 | |||
354 | rb = kzalloc(size, GFP_KERNEL); | ||
355 | if (!rb) | ||
356 | goto fail; | ||
357 | |||
358 | INIT_WORK(&rb->work, rb_free_work); | ||
359 | |||
360 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
361 | if (!all_buf) | ||
362 | goto fail_all_buf; | ||
363 | |||
364 | rb->user_page = all_buf; | ||
365 | rb->data_pages[0] = all_buf + PAGE_SIZE; | ||
366 | rb->page_order = ilog2(nr_pages); | ||
367 | rb->nr_pages = 1; | ||
368 | |||
369 | ring_buffer_init(rb, watermark, flags); | ||
370 | |||
371 | return rb; | ||
372 | |||
373 | fail_all_buf: | ||
374 | kfree(rb); | ||
375 | |||
376 | fail: | ||
377 | return NULL; | ||
378 | } | ||
379 | |||
380 | #endif | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 77981813a1e7..b30fd54eb985 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -1255,19 +1255,29 @@ static int __kprobes in_kprobes_functions(unsigned long addr) | |||
1255 | /* | 1255 | /* |
1256 | * If we have a symbol_name argument, look it up and add the offset field | 1256 | * If we have a symbol_name argument, look it up and add the offset field |
1257 | * to it. This way, we can specify a relative address to a symbol. | 1257 | * to it. This way, we can specify a relative address to a symbol. |
1258 | * This returns encoded errors if it fails to look up symbol or invalid | ||
1259 | * combination of parameters. | ||
1258 | */ | 1260 | */ |
1259 | static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) | 1261 | static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) |
1260 | { | 1262 | { |
1261 | kprobe_opcode_t *addr = p->addr; | 1263 | kprobe_opcode_t *addr = p->addr; |
1264 | |||
1265 | if ((p->symbol_name && p->addr) || | ||
1266 | (!p->symbol_name && !p->addr)) | ||
1267 | goto invalid; | ||
1268 | |||
1262 | if (p->symbol_name) { | 1269 | if (p->symbol_name) { |
1263 | if (addr) | ||
1264 | return NULL; | ||
1265 | kprobe_lookup_name(p->symbol_name, addr); | 1270 | kprobe_lookup_name(p->symbol_name, addr); |
1271 | if (!addr) | ||
1272 | return ERR_PTR(-ENOENT); | ||
1266 | } | 1273 | } |
1267 | 1274 | ||
1268 | if (!addr) | 1275 | addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); |
1269 | return NULL; | 1276 | if (addr) |
1270 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); | 1277 | return addr; |
1278 | |||
1279 | invalid: | ||
1280 | return ERR_PTR(-EINVAL); | ||
1271 | } | 1281 | } |
1272 | 1282 | ||
1273 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ | 1283 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ |
@@ -1311,8 +1321,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1311 | kprobe_opcode_t *addr; | 1321 | kprobe_opcode_t *addr; |
1312 | 1322 | ||
1313 | addr = kprobe_addr(p); | 1323 | addr = kprobe_addr(p); |
1314 | if (!addr) | 1324 | if (IS_ERR(addr)) |
1315 | return -EINVAL; | 1325 | return PTR_ERR(addr); |
1316 | p->addr = addr; | 1326 | p->addr = addr; |
1317 | 1327 | ||
1318 | ret = check_kprobe_rereg(p); | 1328 | ret = check_kprobe_rereg(p); |
@@ -1335,6 +1345,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1335 | */ | 1345 | */ |
1336 | probed_mod = __module_text_address((unsigned long) p->addr); | 1346 | probed_mod = __module_text_address((unsigned long) p->addr); |
1337 | if (probed_mod) { | 1347 | if (probed_mod) { |
1348 | /* Return -ENOENT if fail. */ | ||
1349 | ret = -ENOENT; | ||
1338 | /* | 1350 | /* |
1339 | * We must hold a refcount of the probed module while updating | 1351 | * We must hold a refcount of the probed module while updating |
1340 | * its code to prohibit unexpected unloading. | 1352 | * its code to prohibit unexpected unloading. |
@@ -1351,6 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1351 | module_put(probed_mod); | 1363 | module_put(probed_mod); |
1352 | goto fail_with_jump_label; | 1364 | goto fail_with_jump_label; |
1353 | } | 1365 | } |
1366 | /* ret will be updated by following code */ | ||
1354 | } | 1367 | } |
1355 | preempt_enable(); | 1368 | preempt_enable(); |
1356 | jump_label_unlock(); | 1369 | jump_label_unlock(); |
@@ -1399,7 +1412,7 @@ out: | |||
1399 | fail_with_jump_label: | 1412 | fail_with_jump_label: |
1400 | preempt_enable(); | 1413 | preempt_enable(); |
1401 | jump_label_unlock(); | 1414 | jump_label_unlock(); |
1402 | return -EINVAL; | 1415 | return ret; |
1403 | } | 1416 | } |
1404 | EXPORT_SYMBOL_GPL(register_kprobe); | 1417 | EXPORT_SYMBOL_GPL(register_kprobe); |
1405 | 1418 | ||
@@ -1686,8 +1699,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
1686 | 1699 | ||
1687 | if (kretprobe_blacklist_size) { | 1700 | if (kretprobe_blacklist_size) { |
1688 | addr = kprobe_addr(&rp->kp); | 1701 | addr = kprobe_addr(&rp->kp); |
1689 | if (!addr) | 1702 | if (IS_ERR(addr)) |
1690 | return -EINVAL; | 1703 | return PTR_ERR(addr); |
1691 | 1704 | ||
1692 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { | 1705 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { |
1693 | if (kretprobe_blacklist[i].addr == addr) | 1706 | if (kretprobe_blacklist[i].addr == addr) |
diff --git a/kernel/sched.c b/kernel/sched.c index c518b05fd062..84b9e076812e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2220,7 +2220,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2220 | 2220 | ||
2221 | if (task_cpu(p) != new_cpu) { | 2221 | if (task_cpu(p) != new_cpu) { |
2222 | p->se.nr_migrations++; | 2222 | p->se.nr_migrations++; |
2223 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); | 2223 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); |
2224 | } | 2224 | } |
2225 | 2225 | ||
2226 | __set_task_cpu(p, new_cpu); | 2226 | __set_task_cpu(p, new_cpu); |
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index eb212f8f8bc8..d20c6983aad9 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c | |||
@@ -26,12 +26,18 @@ void print_stack_trace(struct stack_trace *trace, int spaces) | |||
26 | EXPORT_SYMBOL_GPL(print_stack_trace); | 26 | EXPORT_SYMBOL_GPL(print_stack_trace); |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Architectures that do not implement save_stack_trace_tsk get this | 29 | * Architectures that do not implement save_stack_trace_tsk or |
30 | * weak alias and a once-per-bootup warning (whenever this facility | 30 | * save_stack_trace_regs get this weak alias and a once-per-bootup warning |
31 | * is utilized - for example by procfs): | 31 | * (whenever this facility is utilized - for example by procfs): |
32 | */ | 32 | */ |
33 | __weak void | 33 | __weak void |
34 | save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 34 | save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
35 | { | 35 | { |
36 | WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n"); | 36 | WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n"); |
37 | } | 37 | } |
38 | |||
39 | __weak void | ||
40 | save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) | ||
41 | { | ||
42 | WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n"); | ||
43 | } | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 908038f57440..c3e4575e7829 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -32,7 +32,6 @@ | |||
32 | 32 | ||
33 | #include <trace/events/sched.h> | 33 | #include <trace/events/sched.h> |
34 | 34 | ||
35 | #include <asm/ftrace.h> | ||
36 | #include <asm/setup.h> | 35 | #include <asm/setup.h> |
37 | 36 | ||
38 | #include "trace_output.h" | 37 | #include "trace_output.h" |
@@ -82,14 +81,14 @@ static int ftrace_disabled __read_mostly; | |||
82 | 81 | ||
83 | static DEFINE_MUTEX(ftrace_lock); | 82 | static DEFINE_MUTEX(ftrace_lock); |
84 | 83 | ||
85 | static struct ftrace_ops ftrace_list_end __read_mostly = | 84 | static struct ftrace_ops ftrace_list_end __read_mostly = { |
86 | { | ||
87 | .func = ftrace_stub, | 85 | .func = ftrace_stub, |
88 | }; | 86 | }; |
89 | 87 | ||
90 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; | 88 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; |
91 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; | 89 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; |
92 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; | 90 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; |
91 | static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; | ||
93 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; | 92 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; |
94 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; | 93 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; |
95 | static struct ftrace_ops global_ops; | 94 | static struct ftrace_ops global_ops; |
@@ -148,9 +147,11 @@ void clear_ftrace_function(void) | |||
148 | { | 147 | { |
149 | ftrace_trace_function = ftrace_stub; | 148 | ftrace_trace_function = ftrace_stub; |
150 | __ftrace_trace_function = ftrace_stub; | 149 | __ftrace_trace_function = ftrace_stub; |
150 | __ftrace_trace_function_delay = ftrace_stub; | ||
151 | ftrace_pid_function = ftrace_stub; | 151 | ftrace_pid_function = ftrace_stub; |
152 | } | 152 | } |
153 | 153 | ||
154 | #undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
154 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | 155 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST |
155 | /* | 156 | /* |
156 | * For those archs that do not test ftrace_trace_stop in their | 157 | * For those archs that do not test ftrace_trace_stop in their |
@@ -210,7 +211,12 @@ static void update_ftrace_function(void) | |||
210 | #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | 211 | #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST |
211 | ftrace_trace_function = func; | 212 | ftrace_trace_function = func; |
212 | #else | 213 | #else |
214 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
215 | /* do not update till all functions have been modified */ | ||
216 | __ftrace_trace_function_delay = func; | ||
217 | #else | ||
213 | __ftrace_trace_function = func; | 218 | __ftrace_trace_function = func; |
219 | #endif | ||
214 | ftrace_trace_function = ftrace_test_stop_func; | 220 | ftrace_trace_function = ftrace_test_stop_func; |
215 | #endif | 221 | #endif |
216 | } | 222 | } |
@@ -785,8 +791,7 @@ static void unregister_ftrace_profiler(void) | |||
785 | unregister_ftrace_graph(); | 791 | unregister_ftrace_graph(); |
786 | } | 792 | } |
787 | #else | 793 | #else |
788 | static struct ftrace_ops ftrace_profile_ops __read_mostly = | 794 | static struct ftrace_ops ftrace_profile_ops __read_mostly = { |
789 | { | ||
790 | .func = function_profile_call, | 795 | .func = function_profile_call, |
791 | }; | 796 | }; |
792 | 797 | ||
@@ -806,19 +811,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, | |||
806 | size_t cnt, loff_t *ppos) | 811 | size_t cnt, loff_t *ppos) |
807 | { | 812 | { |
808 | unsigned long val; | 813 | unsigned long val; |
809 | char buf[64]; /* big enough to hold a number */ | ||
810 | int ret; | 814 | int ret; |
811 | 815 | ||
812 | if (cnt >= sizeof(buf)) | 816 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
813 | return -EINVAL; | 817 | if (ret) |
814 | |||
815 | if (copy_from_user(&buf, ubuf, cnt)) | ||
816 | return -EFAULT; | ||
817 | |||
818 | buf[cnt] = 0; | ||
819 | |||
820 | ret = strict_strtoul(buf, 10, &val); | ||
821 | if (ret < 0) | ||
822 | return ret; | 818 | return ret; |
823 | 819 | ||
824 | val = !!val; | 820 | val = !!val; |
@@ -1182,8 +1178,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) | |||
1182 | return NULL; | 1178 | return NULL; |
1183 | } | 1179 | } |
1184 | 1180 | ||
1181 | static void | ||
1182 | ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash); | ||
1183 | static void | ||
1184 | ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash); | ||
1185 | |||
1185 | static int | 1186 | static int |
1186 | ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | 1187 | ftrace_hash_move(struct ftrace_ops *ops, int enable, |
1188 | struct ftrace_hash **dst, struct ftrace_hash *src) | ||
1187 | { | 1189 | { |
1188 | struct ftrace_func_entry *entry; | 1190 | struct ftrace_func_entry *entry; |
1189 | struct hlist_node *tp, *tn; | 1191 | struct hlist_node *tp, *tn; |
@@ -1193,9 +1195,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | |||
1193 | unsigned long key; | 1195 | unsigned long key; |
1194 | int size = src->count; | 1196 | int size = src->count; |
1195 | int bits = 0; | 1197 | int bits = 0; |
1198 | int ret; | ||
1196 | int i; | 1199 | int i; |
1197 | 1200 | ||
1198 | /* | 1201 | /* |
1202 | * Remove the current set, update the hash and add | ||
1203 | * them back. | ||
1204 | */ | ||
1205 | ftrace_hash_rec_disable(ops, enable); | ||
1206 | |||
1207 | /* | ||
1199 | * If the new source is empty, just free dst and assign it | 1208 | * If the new source is empty, just free dst and assign it |
1200 | * the empty_hash. | 1209 | * the empty_hash. |
1201 | */ | 1210 | */ |
@@ -1215,9 +1224,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | |||
1215 | if (bits > FTRACE_HASH_MAX_BITS) | 1224 | if (bits > FTRACE_HASH_MAX_BITS) |
1216 | bits = FTRACE_HASH_MAX_BITS; | 1225 | bits = FTRACE_HASH_MAX_BITS; |
1217 | 1226 | ||
1227 | ret = -ENOMEM; | ||
1218 | new_hash = alloc_ftrace_hash(bits); | 1228 | new_hash = alloc_ftrace_hash(bits); |
1219 | if (!new_hash) | 1229 | if (!new_hash) |
1220 | return -ENOMEM; | 1230 | goto out; |
1221 | 1231 | ||
1222 | size = 1 << src->size_bits; | 1232 | size = 1 << src->size_bits; |
1223 | for (i = 0; i < size; i++) { | 1233 | for (i = 0; i < size; i++) { |
@@ -1236,7 +1246,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | |||
1236 | rcu_assign_pointer(*dst, new_hash); | 1246 | rcu_assign_pointer(*dst, new_hash); |
1237 | free_ftrace_hash_rcu(old_hash); | 1247 | free_ftrace_hash_rcu(old_hash); |
1238 | 1248 | ||
1239 | return 0; | 1249 | ret = 0; |
1250 | out: | ||
1251 | /* | ||
1252 | * Enable regardless of ret: | ||
1253 | * On success, we enable the new hash. | ||
1254 | * On failure, we re-enable the original hash. | ||
1255 | */ | ||
1256 | ftrace_hash_rec_enable(ops, enable); | ||
1257 | |||
1258 | return ret; | ||
1240 | } | 1259 | } |
1241 | 1260 | ||
1242 | /* | 1261 | /* |
@@ -1596,6 +1615,12 @@ static int __ftrace_modify_code(void *data) | |||
1596 | { | 1615 | { |
1597 | int *command = data; | 1616 | int *command = data; |
1598 | 1617 | ||
1618 | /* | ||
1619 | * Do not call function tracer while we update the code. | ||
1620 | * We are in stop machine, no worrying about races. | ||
1621 | */ | ||
1622 | function_trace_stop++; | ||
1623 | |||
1599 | if (*command & FTRACE_ENABLE_CALLS) | 1624 | if (*command & FTRACE_ENABLE_CALLS) |
1600 | ftrace_replace_code(1); | 1625 | ftrace_replace_code(1); |
1601 | else if (*command & FTRACE_DISABLE_CALLS) | 1626 | else if (*command & FTRACE_DISABLE_CALLS) |
@@ -1609,6 +1634,18 @@ static int __ftrace_modify_code(void *data) | |||
1609 | else if (*command & FTRACE_STOP_FUNC_RET) | 1634 | else if (*command & FTRACE_STOP_FUNC_RET) |
1610 | ftrace_disable_ftrace_graph_caller(); | 1635 | ftrace_disable_ftrace_graph_caller(); |
1611 | 1636 | ||
1637 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
1638 | /* | ||
1639 | * For archs that call ftrace_test_stop_func(), we must | ||
1640 | * wait till after we update all the function callers | ||
1641 | * before we update the callback. This keeps different | ||
1642 | * ops that record different functions from corrupting | ||
1643 | * each other. | ||
1644 | */ | ||
1645 | __ftrace_trace_function = __ftrace_trace_function_delay; | ||
1646 | #endif | ||
1647 | function_trace_stop--; | ||
1648 | |||
1612 | return 0; | 1649 | return 0; |
1613 | } | 1650 | } |
1614 | 1651 | ||
@@ -1744,10 +1781,36 @@ static cycle_t ftrace_update_time; | |||
1744 | static unsigned long ftrace_update_cnt; | 1781 | static unsigned long ftrace_update_cnt; |
1745 | unsigned long ftrace_update_tot_cnt; | 1782 | unsigned long ftrace_update_tot_cnt; |
1746 | 1783 | ||
1784 | static int ops_traces_mod(struct ftrace_ops *ops) | ||
1785 | { | ||
1786 | struct ftrace_hash *hash; | ||
1787 | |||
1788 | hash = ops->filter_hash; | ||
1789 | return !!(!hash || !hash->count); | ||
1790 | } | ||
1791 | |||
1747 | static int ftrace_update_code(struct module *mod) | 1792 | static int ftrace_update_code(struct module *mod) |
1748 | { | 1793 | { |
1749 | struct dyn_ftrace *p; | 1794 | struct dyn_ftrace *p; |
1750 | cycle_t start, stop; | 1795 | cycle_t start, stop; |
1796 | unsigned long ref = 0; | ||
1797 | |||
1798 | /* | ||
1799 | * When adding a module, we need to check if tracers are | ||
1800 | * currently enabled and if they are set to trace all functions. | ||
1801 | * If they are, we need to enable the module functions as well | ||
1802 | * as update the reference counts for those function records. | ||
1803 | */ | ||
1804 | if (mod) { | ||
1805 | struct ftrace_ops *ops; | ||
1806 | |||
1807 | for (ops = ftrace_ops_list; | ||
1808 | ops != &ftrace_list_end; ops = ops->next) { | ||
1809 | if (ops->flags & FTRACE_OPS_FL_ENABLED && | ||
1810 | ops_traces_mod(ops)) | ||
1811 | ref++; | ||
1812 | } | ||
1813 | } | ||
1751 | 1814 | ||
1752 | start = ftrace_now(raw_smp_processor_id()); | 1815 | start = ftrace_now(raw_smp_processor_id()); |
1753 | ftrace_update_cnt = 0; | 1816 | ftrace_update_cnt = 0; |
@@ -1760,7 +1823,7 @@ static int ftrace_update_code(struct module *mod) | |||
1760 | 1823 | ||
1761 | p = ftrace_new_addrs; | 1824 | p = ftrace_new_addrs; |
1762 | ftrace_new_addrs = p->newlist; | 1825 | ftrace_new_addrs = p->newlist; |
1763 | p->flags = 0L; | 1826 | p->flags = ref; |
1764 | 1827 | ||
1765 | /* | 1828 | /* |
1766 | * Do the initial record conversion from mcount jump | 1829 | * Do the initial record conversion from mcount jump |
@@ -1783,7 +1846,7 @@ static int ftrace_update_code(struct module *mod) | |||
1783 | * conversion puts the module to the correct state, thus | 1846 | * conversion puts the module to the correct state, thus |
1784 | * passing the ftrace_make_call check. | 1847 | * passing the ftrace_make_call check. |
1785 | */ | 1848 | */ |
1786 | if (ftrace_start_up) { | 1849 | if (ftrace_start_up && ref) { |
1787 | int failed = __ftrace_replace_code(p, 1); | 1850 | int failed = __ftrace_replace_code(p, 1); |
1788 | if (failed) { | 1851 | if (failed) { |
1789 | ftrace_bug(failed, p->ip); | 1852 | ftrace_bug(failed, p->ip); |
@@ -2407,10 +2470,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod) | |||
2407 | */ | 2470 | */ |
2408 | 2471 | ||
2409 | static int | 2472 | static int |
2410 | ftrace_mod_callback(char *func, char *cmd, char *param, int enable) | 2473 | ftrace_mod_callback(struct ftrace_hash *hash, |
2474 | char *func, char *cmd, char *param, int enable) | ||
2411 | { | 2475 | { |
2412 | struct ftrace_ops *ops = &global_ops; | ||
2413 | struct ftrace_hash *hash; | ||
2414 | char *mod; | 2476 | char *mod; |
2415 | int ret = -EINVAL; | 2477 | int ret = -EINVAL; |
2416 | 2478 | ||
@@ -2430,11 +2492,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable) | |||
2430 | if (!strlen(mod)) | 2492 | if (!strlen(mod)) |
2431 | return ret; | 2493 | return ret; |
2432 | 2494 | ||
2433 | if (enable) | ||
2434 | hash = ops->filter_hash; | ||
2435 | else | ||
2436 | hash = ops->notrace_hash; | ||
2437 | |||
2438 | ret = ftrace_match_module_records(hash, func, mod); | 2495 | ret = ftrace_match_module_records(hash, func, mod); |
2439 | if (!ret) | 2496 | if (!ret) |
2440 | ret = -EINVAL; | 2497 | ret = -EINVAL; |
@@ -2760,7 +2817,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash, | |||
2760 | mutex_lock(&ftrace_cmd_mutex); | 2817 | mutex_lock(&ftrace_cmd_mutex); |
2761 | list_for_each_entry(p, &ftrace_commands, list) { | 2818 | list_for_each_entry(p, &ftrace_commands, list) { |
2762 | if (strcmp(p->name, command) == 0) { | 2819 | if (strcmp(p->name, command) == 0) { |
2763 | ret = p->func(func, command, next, enable); | 2820 | ret = p->func(hash, func, command, next, enable); |
2764 | goto out_unlock; | 2821 | goto out_unlock; |
2765 | } | 2822 | } |
2766 | } | 2823 | } |
@@ -2857,7 +2914,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
2857 | ftrace_match_records(hash, buf, len); | 2914 | ftrace_match_records(hash, buf, len); |
2858 | 2915 | ||
2859 | mutex_lock(&ftrace_lock); | 2916 | mutex_lock(&ftrace_lock); |
2860 | ret = ftrace_hash_move(orig_hash, hash); | 2917 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
2918 | if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED | ||
2919 | && ftrace_enabled) | ||
2920 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | ||
2921 | |||
2861 | mutex_unlock(&ftrace_lock); | 2922 | mutex_unlock(&ftrace_lock); |
2862 | 2923 | ||
2863 | mutex_unlock(&ftrace_regex_lock); | 2924 | mutex_unlock(&ftrace_regex_lock); |
@@ -3040,18 +3101,12 @@ ftrace_regex_release(struct inode *inode, struct file *file) | |||
3040 | orig_hash = &iter->ops->notrace_hash; | 3101 | orig_hash = &iter->ops->notrace_hash; |
3041 | 3102 | ||
3042 | mutex_lock(&ftrace_lock); | 3103 | mutex_lock(&ftrace_lock); |
3043 | /* | 3104 | ret = ftrace_hash_move(iter->ops, filter_hash, |
3044 | * Remove the current set, update the hash and add | 3105 | orig_hash, iter->hash); |
3045 | * them back. | 3106 | if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) |
3046 | */ | 3107 | && ftrace_enabled) |
3047 | ftrace_hash_rec_disable(iter->ops, filter_hash); | 3108 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
3048 | ret = ftrace_hash_move(orig_hash, iter->hash); | 3109 | |
3049 | if (!ret) { | ||
3050 | ftrace_hash_rec_enable(iter->ops, filter_hash); | ||
3051 | if (iter->ops->flags & FTRACE_OPS_FL_ENABLED | ||
3052 | && ftrace_enabled) | ||
3053 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | ||
3054 | } | ||
3055 | mutex_unlock(&ftrace_lock); | 3110 | mutex_unlock(&ftrace_lock); |
3056 | } | 3111 | } |
3057 | free_ftrace_hash(iter->hash); | 3112 | free_ftrace_hash(iter->hash); |
@@ -3330,7 +3385,7 @@ static int ftrace_process_locs(struct module *mod, | |||
3330 | { | 3385 | { |
3331 | unsigned long *p; | 3386 | unsigned long *p; |
3332 | unsigned long addr; | 3387 | unsigned long addr; |
3333 | unsigned long flags; | 3388 | unsigned long flags = 0; /* Shut up gcc */ |
3334 | 3389 | ||
3335 | mutex_lock(&ftrace_lock); | 3390 | mutex_lock(&ftrace_lock); |
3336 | p = start; | 3391 | p = start; |
@@ -3348,12 +3403,18 @@ static int ftrace_process_locs(struct module *mod, | |||
3348 | } | 3403 | } |
3349 | 3404 | ||
3350 | /* | 3405 | /* |
3351 | * Disable interrupts to prevent interrupts from executing | 3406 | * We only need to disable interrupts on start up |
3352 | * code that is being modified. | 3407 | * because we are modifying code that an interrupt |
3408 | * may execute, and the modification is not atomic. | ||
3409 | * But for modules, nothing runs the code we modify | ||
3410 | * until we are finished with it, and there's no | ||
3411 | * reason to cause large interrupt latencies while we do it. | ||
3353 | */ | 3412 | */ |
3354 | local_irq_save(flags); | 3413 | if (!mod) |
3414 | local_irq_save(flags); | ||
3355 | ftrace_update_code(mod); | 3415 | ftrace_update_code(mod); |
3356 | local_irq_restore(flags); | 3416 | if (!mod) |
3417 | local_irq_restore(flags); | ||
3357 | mutex_unlock(&ftrace_lock); | 3418 | mutex_unlock(&ftrace_lock); |
3358 | 3419 | ||
3359 | return 0; | 3420 | return 0; |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b0c7aa407943..731201bf4acc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -997,15 +997,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
997 | unsigned nr_pages) | 997 | unsigned nr_pages) |
998 | { | 998 | { |
999 | struct buffer_page *bpage, *tmp; | 999 | struct buffer_page *bpage, *tmp; |
1000 | unsigned long addr; | ||
1001 | LIST_HEAD(pages); | 1000 | LIST_HEAD(pages); |
1002 | unsigned i; | 1001 | unsigned i; |
1003 | 1002 | ||
1004 | WARN_ON(!nr_pages); | 1003 | WARN_ON(!nr_pages); |
1005 | 1004 | ||
1006 | for (i = 0; i < nr_pages; i++) { | 1005 | for (i = 0; i < nr_pages; i++) { |
1006 | struct page *page; | ||
1007 | /* | ||
1008 | * __GFP_NORETRY flag makes sure that the allocation fails | ||
1009 | * gracefully without invoking oom-killer and the system is | ||
1010 | * not destabilized. | ||
1011 | */ | ||
1007 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1012 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
1008 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 1013 | GFP_KERNEL | __GFP_NORETRY, |
1014 | cpu_to_node(cpu_buffer->cpu)); | ||
1009 | if (!bpage) | 1015 | if (!bpage) |
1010 | goto free_pages; | 1016 | goto free_pages; |
1011 | 1017 | ||
@@ -1013,10 +1019,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1013 | 1019 | ||
1014 | list_add(&bpage->list, &pages); | 1020 | list_add(&bpage->list, &pages); |
1015 | 1021 | ||
1016 | addr = __get_free_page(GFP_KERNEL); | 1022 | page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), |
1017 | if (!addr) | 1023 | GFP_KERNEL | __GFP_NORETRY, 0); |
1024 | if (!page) | ||
1018 | goto free_pages; | 1025 | goto free_pages; |
1019 | bpage->page = (void *)addr; | 1026 | bpage->page = page_address(page); |
1020 | rb_init_page(bpage->page); | 1027 | rb_init_page(bpage->page); |
1021 | } | 1028 | } |
1022 | 1029 | ||
@@ -1045,7 +1052,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
1045 | { | 1052 | { |
1046 | struct ring_buffer_per_cpu *cpu_buffer; | 1053 | struct ring_buffer_per_cpu *cpu_buffer; |
1047 | struct buffer_page *bpage; | 1054 | struct buffer_page *bpage; |
1048 | unsigned long addr; | 1055 | struct page *page; |
1049 | int ret; | 1056 | int ret; |
1050 | 1057 | ||
1051 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), | 1058 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), |
@@ -1067,10 +1074,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
1067 | rb_check_bpage(cpu_buffer, bpage); | 1074 | rb_check_bpage(cpu_buffer, bpage); |
1068 | 1075 | ||
1069 | cpu_buffer->reader_page = bpage; | 1076 | cpu_buffer->reader_page = bpage; |
1070 | addr = __get_free_page(GFP_KERNEL); | 1077 | page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); |
1071 | if (!addr) | 1078 | if (!page) |
1072 | goto fail_free_reader; | 1079 | goto fail_free_reader; |
1073 | bpage->page = (void *)addr; | 1080 | bpage->page = page_address(page); |
1074 | rb_init_page(bpage->page); | 1081 | rb_init_page(bpage->page); |
1075 | 1082 | ||
1076 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 1083 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
@@ -1314,7 +1321,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
1314 | unsigned nr_pages, rm_pages, new_pages; | 1321 | unsigned nr_pages, rm_pages, new_pages; |
1315 | struct buffer_page *bpage, *tmp; | 1322 | struct buffer_page *bpage, *tmp; |
1316 | unsigned long buffer_size; | 1323 | unsigned long buffer_size; |
1317 | unsigned long addr; | ||
1318 | LIST_HEAD(pages); | 1324 | LIST_HEAD(pages); |
1319 | int i, cpu; | 1325 | int i, cpu; |
1320 | 1326 | ||
@@ -1375,16 +1381,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
1375 | 1381 | ||
1376 | for_each_buffer_cpu(buffer, cpu) { | 1382 | for_each_buffer_cpu(buffer, cpu) { |
1377 | for (i = 0; i < new_pages; i++) { | 1383 | for (i = 0; i < new_pages; i++) { |
1384 | struct page *page; | ||
1385 | /* | ||
1386 | * __GFP_NORETRY flag makes sure that the allocation | ||
1387 | * fails gracefully without invoking oom-killer and | ||
1388 | * the system is not destabilized. | ||
1389 | */ | ||
1378 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), | 1390 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), |
1379 | cache_line_size()), | 1391 | cache_line_size()), |
1380 | GFP_KERNEL, cpu_to_node(cpu)); | 1392 | GFP_KERNEL | __GFP_NORETRY, |
1393 | cpu_to_node(cpu)); | ||
1381 | if (!bpage) | 1394 | if (!bpage) |
1382 | goto free_pages; | 1395 | goto free_pages; |
1383 | list_add(&bpage->list, &pages); | 1396 | list_add(&bpage->list, &pages); |
1384 | addr = __get_free_page(GFP_KERNEL); | 1397 | page = alloc_pages_node(cpu_to_node(cpu), |
1385 | if (!addr) | 1398 | GFP_KERNEL | __GFP_NORETRY, 0); |
1399 | if (!page) | ||
1386 | goto free_pages; | 1400 | goto free_pages; |
1387 | bpage->page = (void *)addr; | 1401 | bpage->page = page_address(page); |
1388 | rb_init_page(bpage->page); | 1402 | rb_init_page(bpage->page); |
1389 | } | 1403 | } |
1390 | } | 1404 | } |
@@ -3730,16 +3744,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | |||
3730 | * Returns: | 3744 | * Returns: |
3731 | * The page allocated, or NULL on error. | 3745 | * The page allocated, or NULL on error. |
3732 | */ | 3746 | */ |
3733 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | 3747 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) |
3734 | { | 3748 | { |
3735 | struct buffer_data_page *bpage; | 3749 | struct buffer_data_page *bpage; |
3736 | unsigned long addr; | 3750 | struct page *page; |
3737 | 3751 | ||
3738 | addr = __get_free_page(GFP_KERNEL); | 3752 | page = alloc_pages_node(cpu_to_node(cpu), |
3739 | if (!addr) | 3753 | GFP_KERNEL | __GFP_NORETRY, 0); |
3754 | if (!page) | ||
3740 | return NULL; | 3755 | return NULL; |
3741 | 3756 | ||
3742 | bpage = (void *)addr; | 3757 | bpage = page_address(page); |
3743 | 3758 | ||
3744 | rb_init_page(bpage); | 3759 | rb_init_page(bpage); |
3745 | 3760 | ||
@@ -3978,20 +3993,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
3978 | size_t cnt, loff_t *ppos) | 3993 | size_t cnt, loff_t *ppos) |
3979 | { | 3994 | { |
3980 | unsigned long *p = filp->private_data; | 3995 | unsigned long *p = filp->private_data; |
3981 | char buf[64]; | ||
3982 | unsigned long val; | 3996 | unsigned long val; |
3983 | int ret; | 3997 | int ret; |
3984 | 3998 | ||
3985 | if (cnt >= sizeof(buf)) | 3999 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
3986 | return -EINVAL; | 4000 | if (ret) |
3987 | |||
3988 | if (copy_from_user(&buf, ubuf, cnt)) | ||
3989 | return -EFAULT; | ||
3990 | |||
3991 | buf[cnt] = 0; | ||
3992 | |||
3993 | ret = strict_strtoul(buf, 10, &val); | ||
3994 | if (ret < 0) | ||
3995 | return ret; | 4001 | return ret; |
3996 | 4002 | ||
3997 | if (val) | 4003 | if (val) |
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 302f8a614635..a5457d577b98 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu) | |||
106 | int inc; | 106 | int inc; |
107 | int i; | 107 | int i; |
108 | 108 | ||
109 | bpage = ring_buffer_alloc_read_page(buffer); | 109 | bpage = ring_buffer_alloc_read_page(buffer, cpu); |
110 | if (!bpage) | 110 | if (!bpage) |
111 | return EVENT_DROPPED; | 111 | return EVENT_DROPPED; |
112 | 112 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee9c921d7f21..e5df02c69b1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -343,26 +343,27 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
343 | static int trace_stop_count; | 343 | static int trace_stop_count; |
344 | static DEFINE_SPINLOCK(tracing_start_lock); | 344 | static DEFINE_SPINLOCK(tracing_start_lock); |
345 | 345 | ||
346 | static void wakeup_work_handler(struct work_struct *work) | ||
347 | { | ||
348 | wake_up(&trace_wait); | ||
349 | } | ||
350 | |||
351 | static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); | ||
352 | |||
346 | /** | 353 | /** |
347 | * trace_wake_up - wake up tasks waiting for trace input | 354 | * trace_wake_up - wake up tasks waiting for trace input |
348 | * | 355 | * |
349 | * Simply wakes up any task that is blocked on the trace_wait | 356 | * Schedules a delayed work to wake up any task that is blocked on the |
350 | * queue. These is used with trace_poll for tasks polling the trace. | 357 | * trace_wait queue. These is used with trace_poll for tasks polling the |
358 | * trace. | ||
351 | */ | 359 | */ |
352 | void trace_wake_up(void) | 360 | void trace_wake_up(void) |
353 | { | 361 | { |
354 | int cpu; | 362 | const unsigned long delay = msecs_to_jiffies(2); |
355 | 363 | ||
356 | if (trace_flags & TRACE_ITER_BLOCK) | 364 | if (trace_flags & TRACE_ITER_BLOCK) |
357 | return; | 365 | return; |
358 | /* | 366 | schedule_delayed_work(&wakeup_work, delay); |
359 | * The runqueue_is_locked() can fail, but this is the best we | ||
360 | * have for now: | ||
361 | */ | ||
362 | cpu = get_cpu(); | ||
363 | if (!runqueue_is_locked(cpu)) | ||
364 | wake_up(&trace_wait); | ||
365 | put_cpu(); | ||
366 | } | 367 | } |
367 | 368 | ||
368 | static int __init set_buf_size(char *str) | 369 | static int __init set_buf_size(char *str) |
@@ -424,6 +425,7 @@ static const char *trace_options[] = { | |||
424 | "graph-time", | 425 | "graph-time", |
425 | "record-cmd", | 426 | "record-cmd", |
426 | "overwrite", | 427 | "overwrite", |
428 | "disable_on_free", | ||
427 | NULL | 429 | NULL |
428 | }; | 430 | }; |
429 | 431 | ||
@@ -1191,6 +1193,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, | |||
1191 | } | 1193 | } |
1192 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); | 1194 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); |
1193 | 1195 | ||
1196 | void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, | ||
1197 | struct ring_buffer_event *event, | ||
1198 | unsigned long flags, int pc, | ||
1199 | struct pt_regs *regs) | ||
1200 | { | ||
1201 | ring_buffer_unlock_commit(buffer, event); | ||
1202 | |||
1203 | ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); | ||
1204 | ftrace_trace_userstack(buffer, flags, pc); | ||
1205 | } | ||
1206 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); | ||
1207 | |||
1194 | void trace_current_buffer_discard_commit(struct ring_buffer *buffer, | 1208 | void trace_current_buffer_discard_commit(struct ring_buffer *buffer, |
1195 | struct ring_buffer_event *event) | 1209 | struct ring_buffer_event *event) |
1196 | { | 1210 | { |
@@ -1234,30 +1248,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, | |||
1234 | } | 1248 | } |
1235 | 1249 | ||
1236 | #ifdef CONFIG_STACKTRACE | 1250 | #ifdef CONFIG_STACKTRACE |
1251 | |||
1252 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) | ||
1253 | struct ftrace_stack { | ||
1254 | unsigned long calls[FTRACE_STACK_MAX_ENTRIES]; | ||
1255 | }; | ||
1256 | |||
1257 | static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack); | ||
1258 | static DEFINE_PER_CPU(int, ftrace_stack_reserve); | ||
1259 | |||
1237 | static void __ftrace_trace_stack(struct ring_buffer *buffer, | 1260 | static void __ftrace_trace_stack(struct ring_buffer *buffer, |
1238 | unsigned long flags, | 1261 | unsigned long flags, |
1239 | int skip, int pc) | 1262 | int skip, int pc, struct pt_regs *regs) |
1240 | { | 1263 | { |
1241 | struct ftrace_event_call *call = &event_kernel_stack; | 1264 | struct ftrace_event_call *call = &event_kernel_stack; |
1242 | struct ring_buffer_event *event; | 1265 | struct ring_buffer_event *event; |
1243 | struct stack_entry *entry; | 1266 | struct stack_entry *entry; |
1244 | struct stack_trace trace; | 1267 | struct stack_trace trace; |
1268 | int use_stack; | ||
1269 | int size = FTRACE_STACK_ENTRIES; | ||
1270 | |||
1271 | trace.nr_entries = 0; | ||
1272 | trace.skip = skip; | ||
1273 | |||
1274 | /* | ||
1275 | * Since events can happen in NMIs there's no safe way to | ||
1276 | * use the per cpu ftrace_stacks. We reserve it and if an interrupt | ||
1277 | * or NMI comes in, it will just have to use the default | ||
1278 | * FTRACE_STACK_SIZE. | ||
1279 | */ | ||
1280 | preempt_disable_notrace(); | ||
1281 | |||
1282 | use_stack = ++__get_cpu_var(ftrace_stack_reserve); | ||
1283 | /* | ||
1284 | * We don't need any atomic variables, just a barrier. | ||
1285 | * If an interrupt comes in, we don't care, because it would | ||
1286 | * have exited and put the counter back to what we want. | ||
1287 | * We just need a barrier to keep gcc from moving things | ||
1288 | * around. | ||
1289 | */ | ||
1290 | barrier(); | ||
1291 | if (use_stack == 1) { | ||
1292 | trace.entries = &__get_cpu_var(ftrace_stack).calls[0]; | ||
1293 | trace.max_entries = FTRACE_STACK_MAX_ENTRIES; | ||
1294 | |||
1295 | if (regs) | ||
1296 | save_stack_trace_regs(regs, &trace); | ||
1297 | else | ||
1298 | save_stack_trace(&trace); | ||
1299 | |||
1300 | if (trace.nr_entries > size) | ||
1301 | size = trace.nr_entries; | ||
1302 | } else | ||
1303 | /* From now on, use_stack is a boolean */ | ||
1304 | use_stack = 0; | ||
1305 | |||
1306 | size *= sizeof(unsigned long); | ||
1245 | 1307 | ||
1246 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, | 1308 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, |
1247 | sizeof(*entry), flags, pc); | 1309 | sizeof(*entry) + size, flags, pc); |
1248 | if (!event) | 1310 | if (!event) |
1249 | return; | 1311 | goto out; |
1250 | entry = ring_buffer_event_data(event); | 1312 | entry = ring_buffer_event_data(event); |
1251 | memset(&entry->caller, 0, sizeof(entry->caller)); | ||
1252 | 1313 | ||
1253 | trace.nr_entries = 0; | 1314 | memset(&entry->caller, 0, size); |
1254 | trace.max_entries = FTRACE_STACK_ENTRIES; | 1315 | |
1255 | trace.skip = skip; | 1316 | if (use_stack) |
1256 | trace.entries = entry->caller; | 1317 | memcpy(&entry->caller, trace.entries, |
1318 | trace.nr_entries * sizeof(unsigned long)); | ||
1319 | else { | ||
1320 | trace.max_entries = FTRACE_STACK_ENTRIES; | ||
1321 | trace.entries = entry->caller; | ||
1322 | if (regs) | ||
1323 | save_stack_trace_regs(regs, &trace); | ||
1324 | else | ||
1325 | save_stack_trace(&trace); | ||
1326 | } | ||
1327 | |||
1328 | entry->size = trace.nr_entries; | ||
1257 | 1329 | ||
1258 | save_stack_trace(&trace); | ||
1259 | if (!filter_check_discard(call, entry, buffer, event)) | 1330 | if (!filter_check_discard(call, entry, buffer, event)) |
1260 | ring_buffer_unlock_commit(buffer, event); | 1331 | ring_buffer_unlock_commit(buffer, event); |
1332 | |||
1333 | out: | ||
1334 | /* Again, don't let gcc optimize things here */ | ||
1335 | barrier(); | ||
1336 | __get_cpu_var(ftrace_stack_reserve)--; | ||
1337 | preempt_enable_notrace(); | ||
1338 | |||
1339 | } | ||
1340 | |||
1341 | void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, | ||
1342 | int skip, int pc, struct pt_regs *regs) | ||
1343 | { | ||
1344 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) | ||
1345 | return; | ||
1346 | |||
1347 | __ftrace_trace_stack(buffer, flags, skip, pc, regs); | ||
1261 | } | 1348 | } |
1262 | 1349 | ||
1263 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | 1350 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, |
@@ -1266,13 +1353,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | |||
1266 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) | 1353 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) |
1267 | return; | 1354 | return; |
1268 | 1355 | ||
1269 | __ftrace_trace_stack(buffer, flags, skip, pc); | 1356 | __ftrace_trace_stack(buffer, flags, skip, pc, NULL); |
1270 | } | 1357 | } |
1271 | 1358 | ||
1272 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, | 1359 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, |
1273 | int pc) | 1360 | int pc) |
1274 | { | 1361 | { |
1275 | __ftrace_trace_stack(tr->buffer, flags, skip, pc); | 1362 | __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL); |
1276 | } | 1363 | } |
1277 | 1364 | ||
1278 | /** | 1365 | /** |
@@ -1288,7 +1375,7 @@ void trace_dump_stack(void) | |||
1288 | local_save_flags(flags); | 1375 | local_save_flags(flags); |
1289 | 1376 | ||
1290 | /* skipping 3 traces, seems to get us at the caller of this function */ | 1377 | /* skipping 3 traces, seems to get us at the caller of this function */ |
1291 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); | 1378 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL); |
1292 | } | 1379 | } |
1293 | 1380 | ||
1294 | static DEFINE_PER_CPU(int, user_stack_count); | 1381 | static DEFINE_PER_CPU(int, user_stack_count); |
@@ -1536,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, | |||
1536 | 1623 | ||
1537 | ftrace_enable_cpu(); | 1624 | ftrace_enable_cpu(); |
1538 | 1625 | ||
1539 | return event ? ring_buffer_event_data(event) : NULL; | 1626 | if (event) { |
1627 | iter->ent_size = ring_buffer_event_length(event); | ||
1628 | return ring_buffer_event_data(event); | ||
1629 | } | ||
1630 | iter->ent_size = 0; | ||
1631 | return NULL; | ||
1540 | } | 1632 | } |
1541 | 1633 | ||
1542 | static struct trace_entry * | 1634 | static struct trace_entry * |
@@ -2051,6 +2143,9 @@ void trace_default_header(struct seq_file *m) | |||
2051 | { | 2143 | { |
2052 | struct trace_iterator *iter = m->private; | 2144 | struct trace_iterator *iter = m->private; |
2053 | 2145 | ||
2146 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
2147 | return; | ||
2148 | |||
2054 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { | 2149 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { |
2055 | /* print nothing if the buffers are empty */ | 2150 | /* print nothing if the buffers are empty */ |
2056 | if (trace_empty(iter)) | 2151 | if (trace_empty(iter)) |
@@ -2701,20 +2796,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, | |||
2701 | size_t cnt, loff_t *ppos) | 2796 | size_t cnt, loff_t *ppos) |
2702 | { | 2797 | { |
2703 | struct trace_array *tr = filp->private_data; | 2798 | struct trace_array *tr = filp->private_data; |
2704 | char buf[64]; | ||
2705 | unsigned long val; | 2799 | unsigned long val; |
2706 | int ret; | 2800 | int ret; |
2707 | 2801 | ||
2708 | if (cnt >= sizeof(buf)) | 2802 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
2709 | return -EINVAL; | 2803 | if (ret) |
2710 | |||
2711 | if (copy_from_user(&buf, ubuf, cnt)) | ||
2712 | return -EFAULT; | ||
2713 | |||
2714 | buf[cnt] = 0; | ||
2715 | |||
2716 | ret = strict_strtoul(buf, 10, &val); | ||
2717 | if (ret < 0) | ||
2718 | return ret; | 2804 | return ret; |
2719 | 2805 | ||
2720 | val = !!val; | 2806 | val = !!val; |
@@ -2767,7 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr) | |||
2767 | return t->init(tr); | 2853 | return t->init(tr); |
2768 | } | 2854 | } |
2769 | 2855 | ||
2770 | static int tracing_resize_ring_buffer(unsigned long size) | 2856 | static int __tracing_resize_ring_buffer(unsigned long size) |
2771 | { | 2857 | { |
2772 | int ret; | 2858 | int ret; |
2773 | 2859 | ||
@@ -2819,6 +2905,41 @@ static int tracing_resize_ring_buffer(unsigned long size) | |||
2819 | return ret; | 2905 | return ret; |
2820 | } | 2906 | } |
2821 | 2907 | ||
2908 | static ssize_t tracing_resize_ring_buffer(unsigned long size) | ||
2909 | { | ||
2910 | int cpu, ret = size; | ||
2911 | |||
2912 | mutex_lock(&trace_types_lock); | ||
2913 | |||
2914 | tracing_stop(); | ||
2915 | |||
2916 | /* disable all cpu buffers */ | ||
2917 | for_each_tracing_cpu(cpu) { | ||
2918 | if (global_trace.data[cpu]) | ||
2919 | atomic_inc(&global_trace.data[cpu]->disabled); | ||
2920 | if (max_tr.data[cpu]) | ||
2921 | atomic_inc(&max_tr.data[cpu]->disabled); | ||
2922 | } | ||
2923 | |||
2924 | if (size != global_trace.entries) | ||
2925 | ret = __tracing_resize_ring_buffer(size); | ||
2926 | |||
2927 | if (ret < 0) | ||
2928 | ret = -ENOMEM; | ||
2929 | |||
2930 | for_each_tracing_cpu(cpu) { | ||
2931 | if (global_trace.data[cpu]) | ||
2932 | atomic_dec(&global_trace.data[cpu]->disabled); | ||
2933 | if (max_tr.data[cpu]) | ||
2934 | atomic_dec(&max_tr.data[cpu]->disabled); | ||
2935 | } | ||
2936 | |||
2937 | tracing_start(); | ||
2938 | mutex_unlock(&trace_types_lock); | ||
2939 | |||
2940 | return ret; | ||
2941 | } | ||
2942 | |||
2822 | 2943 | ||
2823 | /** | 2944 | /** |
2824 | * tracing_update_buffers - used by tracing facility to expand ring buffers | 2945 | * tracing_update_buffers - used by tracing facility to expand ring buffers |
@@ -2836,7 +2957,7 @@ int tracing_update_buffers(void) | |||
2836 | 2957 | ||
2837 | mutex_lock(&trace_types_lock); | 2958 | mutex_lock(&trace_types_lock); |
2838 | if (!ring_buffer_expanded) | 2959 | if (!ring_buffer_expanded) |
2839 | ret = tracing_resize_ring_buffer(trace_buf_size); | 2960 | ret = __tracing_resize_ring_buffer(trace_buf_size); |
2840 | mutex_unlock(&trace_types_lock); | 2961 | mutex_unlock(&trace_types_lock); |
2841 | 2962 | ||
2842 | return ret; | 2963 | return ret; |
@@ -2860,7 +2981,7 @@ static int tracing_set_tracer(const char *buf) | |||
2860 | mutex_lock(&trace_types_lock); | 2981 | mutex_lock(&trace_types_lock); |
2861 | 2982 | ||
2862 | if (!ring_buffer_expanded) { | 2983 | if (!ring_buffer_expanded) { |
2863 | ret = tracing_resize_ring_buffer(trace_buf_size); | 2984 | ret = __tracing_resize_ring_buffer(trace_buf_size); |
2864 | if (ret < 0) | 2985 | if (ret < 0) |
2865 | goto out; | 2986 | goto out; |
2866 | ret = 0; | 2987 | ret = 0; |
@@ -2966,20 +3087,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, | |||
2966 | size_t cnt, loff_t *ppos) | 3087 | size_t cnt, loff_t *ppos) |
2967 | { | 3088 | { |
2968 | unsigned long *ptr = filp->private_data; | 3089 | unsigned long *ptr = filp->private_data; |
2969 | char buf[64]; | ||
2970 | unsigned long val; | 3090 | unsigned long val; |
2971 | int ret; | 3091 | int ret; |
2972 | 3092 | ||
2973 | if (cnt >= sizeof(buf)) | 3093 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
2974 | return -EINVAL; | 3094 | if (ret) |
2975 | |||
2976 | if (copy_from_user(&buf, ubuf, cnt)) | ||
2977 | return -EFAULT; | ||
2978 | |||
2979 | buf[cnt] = 0; | ||
2980 | |||
2981 | ret = strict_strtoul(buf, 10, &val); | ||
2982 | if (ret < 0) | ||
2983 | return ret; | 3095 | return ret; |
2984 | 3096 | ||
2985 | *ptr = val * 1000; | 3097 | *ptr = val * 1000; |
@@ -3434,67 +3546,54 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
3434 | size_t cnt, loff_t *ppos) | 3546 | size_t cnt, loff_t *ppos) |
3435 | { | 3547 | { |
3436 | unsigned long val; | 3548 | unsigned long val; |
3437 | char buf[64]; | 3549 | int ret; |
3438 | int ret, cpu; | ||
3439 | |||
3440 | if (cnt >= sizeof(buf)) | ||
3441 | return -EINVAL; | ||
3442 | |||
3443 | if (copy_from_user(&buf, ubuf, cnt)) | ||
3444 | return -EFAULT; | ||
3445 | |||
3446 | buf[cnt] = 0; | ||
3447 | 3550 | ||
3448 | ret = strict_strtoul(buf, 10, &val); | 3551 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
3449 | if (ret < 0) | 3552 | if (ret) |
3450 | return ret; | 3553 | return ret; |
3451 | 3554 | ||
3452 | /* must have at least 1 entry */ | 3555 | /* must have at least 1 entry */ |
3453 | if (!val) | 3556 | if (!val) |
3454 | return -EINVAL; | 3557 | return -EINVAL; |
3455 | 3558 | ||
3456 | mutex_lock(&trace_types_lock); | ||
3457 | |||
3458 | tracing_stop(); | ||
3459 | |||
3460 | /* disable all cpu buffers */ | ||
3461 | for_each_tracing_cpu(cpu) { | ||
3462 | if (global_trace.data[cpu]) | ||
3463 | atomic_inc(&global_trace.data[cpu]->disabled); | ||
3464 | if (max_tr.data[cpu]) | ||
3465 | atomic_inc(&max_tr.data[cpu]->disabled); | ||
3466 | } | ||
3467 | |||
3468 | /* value is in KB */ | 3559 | /* value is in KB */ |
3469 | val <<= 10; | 3560 | val <<= 10; |
3470 | 3561 | ||
3471 | if (val != global_trace.entries) { | 3562 | ret = tracing_resize_ring_buffer(val); |
3472 | ret = tracing_resize_ring_buffer(val); | 3563 | if (ret < 0) |
3473 | if (ret < 0) { | 3564 | return ret; |
3474 | cnt = ret; | ||
3475 | goto out; | ||
3476 | } | ||
3477 | } | ||
3478 | 3565 | ||
3479 | *ppos += cnt; | 3566 | *ppos += cnt; |
3480 | 3567 | ||
3481 | /* If check pages failed, return ENOMEM */ | 3568 | return cnt; |
3482 | if (tracing_disabled) | 3569 | } |
3483 | cnt = -ENOMEM; | ||
3484 | out: | ||
3485 | for_each_tracing_cpu(cpu) { | ||
3486 | if (global_trace.data[cpu]) | ||
3487 | atomic_dec(&global_trace.data[cpu]->disabled); | ||
3488 | if (max_tr.data[cpu]) | ||
3489 | atomic_dec(&max_tr.data[cpu]->disabled); | ||
3490 | } | ||
3491 | 3570 | ||
3492 | tracing_start(); | 3571 | static ssize_t |
3493 | mutex_unlock(&trace_types_lock); | 3572 | tracing_free_buffer_write(struct file *filp, const char __user *ubuf, |
3573 | size_t cnt, loff_t *ppos) | ||
3574 | { | ||
3575 | /* | ||
3576 | * There is no need to read what the user has written, this function | ||
3577 | * is just to make sure that there is no error when "echo" is used | ||
3578 | */ | ||
3579 | |||
3580 | *ppos += cnt; | ||
3494 | 3581 | ||
3495 | return cnt; | 3582 | return cnt; |
3496 | } | 3583 | } |
3497 | 3584 | ||
3585 | static int | ||
3586 | tracing_free_buffer_release(struct inode *inode, struct file *filp) | ||
3587 | { | ||
3588 | /* disable tracing ? */ | ||
3589 | if (trace_flags & TRACE_ITER_STOP_ON_FREE) | ||
3590 | tracing_off(); | ||
3591 | /* resize the ring buffer to 0 */ | ||
3592 | tracing_resize_ring_buffer(0); | ||
3593 | |||
3594 | return 0; | ||
3595 | } | ||
3596 | |||
3498 | static int mark_printk(const char *fmt, ...) | 3597 | static int mark_printk(const char *fmt, ...) |
3499 | { | 3598 | { |
3500 | int ret; | 3599 | int ret; |
@@ -3640,6 +3739,11 @@ static const struct file_operations tracing_entries_fops = { | |||
3640 | .llseek = generic_file_llseek, | 3739 | .llseek = generic_file_llseek, |
3641 | }; | 3740 | }; |
3642 | 3741 | ||
3742 | static const struct file_operations tracing_free_buffer_fops = { | ||
3743 | .write = tracing_free_buffer_write, | ||
3744 | .release = tracing_free_buffer_release, | ||
3745 | }; | ||
3746 | |||
3643 | static const struct file_operations tracing_mark_fops = { | 3747 | static const struct file_operations tracing_mark_fops = { |
3644 | .open = tracing_open_generic, | 3748 | .open = tracing_open_generic, |
3645 | .write = tracing_mark_write, | 3749 | .write = tracing_mark_write, |
@@ -3696,7 +3800,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3696 | return 0; | 3800 | return 0; |
3697 | 3801 | ||
3698 | if (!info->spare) | 3802 | if (!info->spare) |
3699 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | 3803 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu); |
3700 | if (!info->spare) | 3804 | if (!info->spare) |
3701 | return -ENOMEM; | 3805 | return -ENOMEM; |
3702 | 3806 | ||
@@ -3853,7 +3957,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3853 | 3957 | ||
3854 | ref->ref = 1; | 3958 | ref->ref = 1; |
3855 | ref->buffer = info->tr->buffer; | 3959 | ref->buffer = info->tr->buffer; |
3856 | ref->page = ring_buffer_alloc_read_page(ref->buffer); | 3960 | ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu); |
3857 | if (!ref->page) { | 3961 | if (!ref->page) { |
3858 | kfree(ref); | 3962 | kfree(ref); |
3859 | break; | 3963 | break; |
@@ -3862,8 +3966,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3862 | r = ring_buffer_read_page(ref->buffer, &ref->page, | 3966 | r = ring_buffer_read_page(ref->buffer, &ref->page, |
3863 | len, info->cpu, 1); | 3967 | len, info->cpu, 1); |
3864 | if (r < 0) { | 3968 | if (r < 0) { |
3865 | ring_buffer_free_read_page(ref->buffer, | 3969 | ring_buffer_free_read_page(ref->buffer, ref->page); |
3866 | ref->page); | ||
3867 | kfree(ref); | 3970 | kfree(ref); |
3868 | break; | 3971 | break; |
3869 | } | 3972 | } |
@@ -4099,19 +4202,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
4099 | { | 4202 | { |
4100 | struct trace_option_dentry *topt = filp->private_data; | 4203 | struct trace_option_dentry *topt = filp->private_data; |
4101 | unsigned long val; | 4204 | unsigned long val; |
4102 | char buf[64]; | ||
4103 | int ret; | 4205 | int ret; |
4104 | 4206 | ||
4105 | if (cnt >= sizeof(buf)) | 4207 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
4106 | return -EINVAL; | 4208 | if (ret) |
4107 | |||
4108 | if (copy_from_user(&buf, ubuf, cnt)) | ||
4109 | return -EFAULT; | ||
4110 | |||
4111 | buf[cnt] = 0; | ||
4112 | |||
4113 | ret = strict_strtoul(buf, 10, &val); | ||
4114 | if (ret < 0) | ||
4115 | return ret; | 4209 | return ret; |
4116 | 4210 | ||
4117 | if (val != 0 && val != 1) | 4211 | if (val != 0 && val != 1) |
@@ -4159,20 +4253,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
4159 | loff_t *ppos) | 4253 | loff_t *ppos) |
4160 | { | 4254 | { |
4161 | long index = (long)filp->private_data; | 4255 | long index = (long)filp->private_data; |
4162 | char buf[64]; | ||
4163 | unsigned long val; | 4256 | unsigned long val; |
4164 | int ret; | 4257 | int ret; |
4165 | 4258 | ||
4166 | if (cnt >= sizeof(buf)) | 4259 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
4167 | return -EINVAL; | 4260 | if (ret) |
4168 | |||
4169 | if (copy_from_user(&buf, ubuf, cnt)) | ||
4170 | return -EFAULT; | ||
4171 | |||
4172 | buf[cnt] = 0; | ||
4173 | |||
4174 | ret = strict_strtoul(buf, 10, &val); | ||
4175 | if (ret < 0) | ||
4176 | return ret; | 4261 | return ret; |
4177 | 4262 | ||
4178 | if (val != 0 && val != 1) | 4263 | if (val != 0 && val != 1) |
@@ -4365,6 +4450,9 @@ static __init int tracer_init_debugfs(void) | |||
4365 | trace_create_file("buffer_size_kb", 0644, d_tracer, | 4450 | trace_create_file("buffer_size_kb", 0644, d_tracer, |
4366 | &global_trace, &tracing_entries_fops); | 4451 | &global_trace, &tracing_entries_fops); |
4367 | 4452 | ||
4453 | trace_create_file("free_buffer", 0644, d_tracer, | ||
4454 | &global_trace, &tracing_free_buffer_fops); | ||
4455 | |||
4368 | trace_create_file("trace_marker", 0220, d_tracer, | 4456 | trace_create_file("trace_marker", 0220, d_tracer, |
4369 | NULL, &tracing_mark_fops); | 4457 | NULL, &tracing_mark_fops); |
4370 | 4458 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 229f8591f61d..3f381d0b20a8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -278,6 +278,29 @@ struct tracer { | |||
278 | }; | 278 | }; |
279 | 279 | ||
280 | 280 | ||
281 | /* Only current can touch trace_recursion */ | ||
282 | #define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) | ||
283 | #define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) | ||
284 | |||
285 | /* Ring buffer has the 10 LSB bits to count */ | ||
286 | #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) | ||
287 | |||
288 | /* for function tracing recursion */ | ||
289 | #define TRACE_INTERNAL_BIT (1<<11) | ||
290 | #define TRACE_GLOBAL_BIT (1<<12) | ||
291 | /* | ||
292 | * Abuse of the trace_recursion. | ||
293 | * As we need a way to maintain state if we are tracing the function | ||
294 | * graph in irq because we want to trace a particular function that | ||
295 | * was called in irq context but we have irq tracing off. Since this | ||
296 | * can only be modified by current, we can reuse trace_recursion. | ||
297 | */ | ||
298 | #define TRACE_IRQ_BIT (1<<13) | ||
299 | |||
300 | #define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) | ||
301 | #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) | ||
302 | #define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) | ||
303 | |||
281 | #define TRACE_PIPE_ALL_CPU -1 | 304 | #define TRACE_PIPE_ALL_CPU -1 |
282 | 305 | ||
283 | int tracer_init(struct tracer *t, struct trace_array *tr); | 306 | int tracer_init(struct tracer *t, struct trace_array *tr); |
@@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr, | |||
389 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | 412 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, |
390 | int skip, int pc); | 413 | int skip, int pc); |
391 | 414 | ||
415 | void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, | ||
416 | int skip, int pc, struct pt_regs *regs); | ||
417 | |||
392 | void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, | 418 | void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, |
393 | int pc); | 419 | int pc); |
394 | 420 | ||
@@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer, | |||
400 | { | 426 | { |
401 | } | 427 | } |
402 | 428 | ||
429 | static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer, | ||
430 | unsigned long flags, int skip, | ||
431 | int pc, struct pt_regs *regs) | ||
432 | { | ||
433 | } | ||
434 | |||
403 | static inline void ftrace_trace_userstack(struct ring_buffer *buffer, | 435 | static inline void ftrace_trace_userstack(struct ring_buffer *buffer, |
404 | unsigned long flags, int pc) | 436 | unsigned long flags, int pc) |
405 | { | 437 | { |
@@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr) | |||
507 | return 1; | 539 | return 1; |
508 | 540 | ||
509 | for (i = 0; i < ftrace_graph_count; i++) { | 541 | for (i = 0; i < ftrace_graph_count; i++) { |
510 | if (addr == ftrace_graph_funcs[i]) | 542 | if (addr == ftrace_graph_funcs[i]) { |
543 | /* | ||
544 | * If no irqs are to be traced, but a set_graph_function | ||
545 | * is set, and called by an interrupt handler, we still | ||
546 | * want to trace it. | ||
547 | */ | ||
548 | if (in_irq()) | ||
549 | trace_recursion_set(TRACE_IRQ_BIT); | ||
550 | else | ||
551 | trace_recursion_clear(TRACE_IRQ_BIT); | ||
511 | return 1; | 552 | return 1; |
553 | } | ||
512 | } | 554 | } |
513 | 555 | ||
514 | return 0; | 556 | return 0; |
@@ -609,6 +651,7 @@ enum trace_iterator_flags { | |||
609 | TRACE_ITER_GRAPH_TIME = 0x80000, | 651 | TRACE_ITER_GRAPH_TIME = 0x80000, |
610 | TRACE_ITER_RECORD_CMD = 0x100000, | 652 | TRACE_ITER_RECORD_CMD = 0x100000, |
611 | TRACE_ITER_OVERWRITE = 0x200000, | 653 | TRACE_ITER_OVERWRITE = 0x200000, |
654 | TRACE_ITER_STOP_ON_FREE = 0x400000, | ||
612 | }; | 655 | }; |
613 | 656 | ||
614 | /* | 657 | /* |
@@ -677,6 +720,7 @@ struct event_subsystem { | |||
677 | struct dentry *entry; | 720 | struct dentry *entry; |
678 | struct event_filter *filter; | 721 | struct event_filter *filter; |
679 | int nr_events; | 722 | int nr_events; |
723 | int ref_count; | ||
680 | }; | 724 | }; |
681 | 725 | ||
682 | #define FILTER_PRED_INVALID ((unsigned short)-1) | 726 | #define FILTER_PRED_INVALID ((unsigned short)-1) |
@@ -784,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[]; | |||
784 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) | 828 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) |
785 | #include "trace_entries.h" | 829 | #include "trace_entries.h" |
786 | 830 | ||
787 | /* Only current can touch trace_recursion */ | ||
788 | #define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) | ||
789 | #define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) | ||
790 | |||
791 | /* Ring buffer has the 10 LSB bits to count */ | ||
792 | #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) | ||
793 | |||
794 | /* for function tracing recursion */ | ||
795 | #define TRACE_INTERNAL_BIT (1<<11) | ||
796 | #define TRACE_GLOBAL_BIT (1<<12) | ||
797 | |||
798 | #define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) | ||
799 | #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) | ||
800 | #define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) | ||
801 | |||
802 | #endif /* _LINUX_KERNEL_TRACE_H */ | 831 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e32744c84d94..93365907f219 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry, | |||
161 | TRACE_STACK, | 161 | TRACE_STACK, |
162 | 162 | ||
163 | F_STRUCT( | 163 | F_STRUCT( |
164 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) | 164 | __field( int, size ) |
165 | __dynamic_array(unsigned long, caller ) | ||
165 | ), | 166 | ), |
166 | 167 | ||
167 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | 168 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 686ec399f2a8..581876f9f387 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -244,6 +244,35 @@ static void ftrace_clear_events(void) | |||
244 | mutex_unlock(&event_mutex); | 244 | mutex_unlock(&event_mutex); |
245 | } | 245 | } |
246 | 246 | ||
247 | static void __put_system(struct event_subsystem *system) | ||
248 | { | ||
249 | struct event_filter *filter = system->filter; | ||
250 | |||
251 | WARN_ON_ONCE(system->ref_count == 0); | ||
252 | if (--system->ref_count) | ||
253 | return; | ||
254 | |||
255 | if (filter) { | ||
256 | kfree(filter->filter_string); | ||
257 | kfree(filter); | ||
258 | } | ||
259 | kfree(system->name); | ||
260 | kfree(system); | ||
261 | } | ||
262 | |||
263 | static void __get_system(struct event_subsystem *system) | ||
264 | { | ||
265 | WARN_ON_ONCE(system->ref_count == 0); | ||
266 | system->ref_count++; | ||
267 | } | ||
268 | |||
269 | static void put_system(struct event_subsystem *system) | ||
270 | { | ||
271 | mutex_lock(&event_mutex); | ||
272 | __put_system(system); | ||
273 | mutex_unlock(&event_mutex); | ||
274 | } | ||
275 | |||
247 | /* | 276 | /* |
248 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. | 277 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. |
249 | */ | 278 | */ |
@@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
486 | loff_t *ppos) | 515 | loff_t *ppos) |
487 | { | 516 | { |
488 | struct ftrace_event_call *call = filp->private_data; | 517 | struct ftrace_event_call *call = filp->private_data; |
489 | char buf[64]; | ||
490 | unsigned long val; | 518 | unsigned long val; |
491 | int ret; | 519 | int ret; |
492 | 520 | ||
493 | if (cnt >= sizeof(buf)) | 521 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
494 | return -EINVAL; | 522 | if (ret) |
495 | |||
496 | if (copy_from_user(&buf, ubuf, cnt)) | ||
497 | return -EFAULT; | ||
498 | |||
499 | buf[cnt] = 0; | ||
500 | |||
501 | ret = strict_strtoul(buf, 10, &val); | ||
502 | if (ret < 0) | ||
503 | return ret; | 523 | return ret; |
504 | 524 | ||
505 | ret = tracing_update_buffers(); | 525 | ret = tracing_update_buffers(); |
@@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
528 | loff_t *ppos) | 548 | loff_t *ppos) |
529 | { | 549 | { |
530 | const char set_to_char[4] = { '?', '0', '1', 'X' }; | 550 | const char set_to_char[4] = { '?', '0', '1', 'X' }; |
531 | const char *system = filp->private_data; | 551 | struct event_subsystem *system = filp->private_data; |
532 | struct ftrace_event_call *call; | 552 | struct ftrace_event_call *call; |
533 | char buf[2]; | 553 | char buf[2]; |
534 | int set = 0; | 554 | int set = 0; |
@@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
539 | if (!call->name || !call->class || !call->class->reg) | 559 | if (!call->name || !call->class || !call->class->reg) |
540 | continue; | 560 | continue; |
541 | 561 | ||
542 | if (system && strcmp(call->class->system, system) != 0) | 562 | if (system && strcmp(call->class->system, system->name) != 0) |
543 | continue; | 563 | continue; |
544 | 564 | ||
545 | /* | 565 | /* |
@@ -569,21 +589,13 @@ static ssize_t | |||
569 | system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | 589 | system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, |
570 | loff_t *ppos) | 590 | loff_t *ppos) |
571 | { | 591 | { |
572 | const char *system = filp->private_data; | 592 | struct event_subsystem *system = filp->private_data; |
593 | const char *name = NULL; | ||
573 | unsigned long val; | 594 | unsigned long val; |
574 | char buf[64]; | ||
575 | ssize_t ret; | 595 | ssize_t ret; |
576 | 596 | ||
577 | if (cnt >= sizeof(buf)) | 597 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
578 | return -EINVAL; | 598 | if (ret) |
579 | |||
580 | if (copy_from_user(&buf, ubuf, cnt)) | ||
581 | return -EFAULT; | ||
582 | |||
583 | buf[cnt] = 0; | ||
584 | |||
585 | ret = strict_strtoul(buf, 10, &val); | ||
586 | if (ret < 0) | ||
587 | return ret; | 599 | return ret; |
588 | 600 | ||
589 | ret = tracing_update_buffers(); | 601 | ret = tracing_update_buffers(); |
@@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
593 | if (val != 0 && val != 1) | 605 | if (val != 0 && val != 1) |
594 | return -EINVAL; | 606 | return -EINVAL; |
595 | 607 | ||
596 | ret = __ftrace_set_clr_event(NULL, system, NULL, val); | 608 | /* |
609 | * Opening of "enable" adds a ref count to system, | ||
610 | * so the name is safe to use. | ||
611 | */ | ||
612 | if (system) | ||
613 | name = system->name; | ||
614 | |||
615 | ret = __ftrace_set_clr_event(NULL, name, NULL, val); | ||
597 | if (ret) | 616 | if (ret) |
598 | goto out; | 617 | goto out; |
599 | 618 | ||
@@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
826 | return cnt; | 845 | return cnt; |
827 | } | 846 | } |
828 | 847 | ||
848 | static LIST_HEAD(event_subsystems); | ||
849 | |||
850 | static int subsystem_open(struct inode *inode, struct file *filp) | ||
851 | { | ||
852 | struct event_subsystem *system = NULL; | ||
853 | int ret; | ||
854 | |||
855 | if (!inode->i_private) | ||
856 | goto skip_search; | ||
857 | |||
858 | /* Make sure the system still exists */ | ||
859 | mutex_lock(&event_mutex); | ||
860 | list_for_each_entry(system, &event_subsystems, list) { | ||
861 | if (system == inode->i_private) { | ||
862 | /* Don't open systems with no events */ | ||
863 | if (!system->nr_events) { | ||
864 | system = NULL; | ||
865 | break; | ||
866 | } | ||
867 | __get_system(system); | ||
868 | break; | ||
869 | } | ||
870 | } | ||
871 | mutex_unlock(&event_mutex); | ||
872 | |||
873 | if (system != inode->i_private) | ||
874 | return -ENODEV; | ||
875 | |||
876 | skip_search: | ||
877 | ret = tracing_open_generic(inode, filp); | ||
878 | if (ret < 0 && system) | ||
879 | put_system(system); | ||
880 | |||
881 | return ret; | ||
882 | } | ||
883 | |||
884 | static int subsystem_release(struct inode *inode, struct file *file) | ||
885 | { | ||
886 | struct event_subsystem *system = inode->i_private; | ||
887 | |||
888 | if (system) | ||
889 | put_system(system); | ||
890 | |||
891 | return 0; | ||
892 | } | ||
893 | |||
829 | static ssize_t | 894 | static ssize_t |
830 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | 895 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, |
831 | loff_t *ppos) | 896 | loff_t *ppos) |
@@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = { | |||
963 | }; | 1028 | }; |
964 | 1029 | ||
965 | static const struct file_operations ftrace_subsystem_filter_fops = { | 1030 | static const struct file_operations ftrace_subsystem_filter_fops = { |
966 | .open = tracing_open_generic, | 1031 | .open = subsystem_open, |
967 | .read = subsystem_filter_read, | 1032 | .read = subsystem_filter_read, |
968 | .write = subsystem_filter_write, | 1033 | .write = subsystem_filter_write, |
969 | .llseek = default_llseek, | 1034 | .llseek = default_llseek, |
1035 | .release = subsystem_release, | ||
970 | }; | 1036 | }; |
971 | 1037 | ||
972 | static const struct file_operations ftrace_system_enable_fops = { | 1038 | static const struct file_operations ftrace_system_enable_fops = { |
973 | .open = tracing_open_generic, | 1039 | .open = subsystem_open, |
974 | .read = system_enable_read, | 1040 | .read = system_enable_read, |
975 | .write = system_enable_write, | 1041 | .write = system_enable_write, |
976 | .llseek = default_llseek, | 1042 | .llseek = default_llseek, |
1043 | .release = subsystem_release, | ||
977 | }; | 1044 | }; |
978 | 1045 | ||
979 | static const struct file_operations ftrace_show_header_fops = { | 1046 | static const struct file_operations ftrace_show_header_fops = { |
@@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void) | |||
1002 | return d_events; | 1069 | return d_events; |
1003 | } | 1070 | } |
1004 | 1071 | ||
1005 | static LIST_HEAD(event_subsystems); | ||
1006 | |||
1007 | static struct dentry * | 1072 | static struct dentry * |
1008 | event_subsystem_dir(const char *name, struct dentry *d_events) | 1073 | event_subsystem_dir(const char *name, struct dentry *d_events) |
1009 | { | 1074 | { |
@@ -1013,6 +1078,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
1013 | /* First see if we did not already create this dir */ | 1078 | /* First see if we did not already create this dir */ |
1014 | list_for_each_entry(system, &event_subsystems, list) { | 1079 | list_for_each_entry(system, &event_subsystems, list) { |
1015 | if (strcmp(system->name, name) == 0) { | 1080 | if (strcmp(system->name, name) == 0) { |
1081 | __get_system(system); | ||
1016 | system->nr_events++; | 1082 | system->nr_events++; |
1017 | return system->entry; | 1083 | return system->entry; |
1018 | } | 1084 | } |
@@ -1035,6 +1101,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
1035 | } | 1101 | } |
1036 | 1102 | ||
1037 | system->nr_events = 1; | 1103 | system->nr_events = 1; |
1104 | system->ref_count = 1; | ||
1038 | system->name = kstrdup(name, GFP_KERNEL); | 1105 | system->name = kstrdup(name, GFP_KERNEL); |
1039 | if (!system->name) { | 1106 | if (!system->name) { |
1040 | debugfs_remove(system->entry); | 1107 | debugfs_remove(system->entry); |
@@ -1062,8 +1129,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
1062 | "'%s/filter' entry\n", name); | 1129 | "'%s/filter' entry\n", name); |
1063 | } | 1130 | } |
1064 | 1131 | ||
1065 | trace_create_file("enable", 0644, system->entry, | 1132 | trace_create_file("enable", 0644, system->entry, system, |
1066 | (void *)system->name, | ||
1067 | &ftrace_system_enable_fops); | 1133 | &ftrace_system_enable_fops); |
1068 | 1134 | ||
1069 | return system->entry; | 1135 | return system->entry; |
@@ -1184,16 +1250,9 @@ static void remove_subsystem_dir(const char *name) | |||
1184 | list_for_each_entry(system, &event_subsystems, list) { | 1250 | list_for_each_entry(system, &event_subsystems, list) { |
1185 | if (strcmp(system->name, name) == 0) { | 1251 | if (strcmp(system->name, name) == 0) { |
1186 | if (!--system->nr_events) { | 1252 | if (!--system->nr_events) { |
1187 | struct event_filter *filter = system->filter; | ||
1188 | |||
1189 | debugfs_remove_recursive(system->entry); | 1253 | debugfs_remove_recursive(system->entry); |
1190 | list_del(&system->list); | 1254 | list_del(&system->list); |
1191 | if (filter) { | 1255 | __put_system(system); |
1192 | kfree(filter->filter_string); | ||
1193 | kfree(filter); | ||
1194 | } | ||
1195 | kfree(system->name); | ||
1196 | kfree(system); | ||
1197 | } | 1256 | } |
1198 | break; | 1257 | break; |
1199 | } | 1258 | } |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8008ddcfbf20..256764ecccd6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
1886 | 1886 | ||
1887 | mutex_lock(&event_mutex); | 1887 | mutex_lock(&event_mutex); |
1888 | 1888 | ||
1889 | /* Make sure the system still has events */ | ||
1890 | if (!system->nr_events) { | ||
1891 | err = -ENODEV; | ||
1892 | goto out_unlock; | ||
1893 | } | ||
1894 | |||
1889 | if (!strcmp(strstrip(filter_string), "0")) { | 1895 | if (!strcmp(strstrip(filter_string), "0")) { |
1890 | filter_free_subsystem_preds(system); | 1896 | filter_free_subsystem_preds(system); |
1891 | remove_filter_string(system->filter); | 1897 | remove_filter_string(system->filter); |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8d0e1cc4e974..c7b0c6a7db09 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param) | |||
324 | } | 324 | } |
325 | 325 | ||
326 | static int | 326 | static int |
327 | ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) | 327 | ftrace_trace_onoff_callback(struct ftrace_hash *hash, |
328 | char *glob, char *cmd, char *param, int enable) | ||
328 | { | 329 | { |
329 | struct ftrace_probe_ops *ops; | 330 | struct ftrace_probe_ops *ops; |
330 | void *count = (void *)-1; | 331 | void *count = (void *)-1; |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 962cdb24ed81..a7d2a4c653d8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = { | |||
74 | 74 | ||
75 | static struct trace_array *graph_array; | 75 | static struct trace_array *graph_array; |
76 | 76 | ||
77 | /* | ||
78 | * DURATION column is being also used to display IRQ signs, | ||
79 | * following values are used by print_graph_irq and others | ||
80 | * to fill in space into DURATION column. | ||
81 | */ | ||
82 | enum { | ||
83 | DURATION_FILL_FULL = -1, | ||
84 | DURATION_FILL_START = -2, | ||
85 | DURATION_FILL_END = -3, | ||
86 | }; | ||
87 | |||
88 | static enum print_line_t | ||
89 | print_graph_duration(unsigned long long duration, struct trace_seq *s, | ||
90 | u32 flags); | ||
77 | 91 | ||
78 | /* Add a function return address to the trace stack on thread info.*/ | 92 | /* Add a function return address to the trace stack on thread info.*/ |
79 | int | 93 | int |
@@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr, | |||
213 | 227 | ||
214 | static inline int ftrace_graph_ignore_irqs(void) | 228 | static inline int ftrace_graph_ignore_irqs(void) |
215 | { | 229 | { |
216 | if (!ftrace_graph_skip_irqs) | 230 | if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT)) |
217 | return 0; | 231 | return 0; |
218 | 232 | ||
219 | return in_irq(); | 233 | return in_irq(); |
@@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
577 | return next; | 591 | return next; |
578 | } | 592 | } |
579 | 593 | ||
580 | /* Signal a overhead of time execution to the output */ | ||
581 | static int | ||
582 | print_graph_overhead(unsigned long long duration, struct trace_seq *s, | ||
583 | u32 flags) | ||
584 | { | ||
585 | /* If duration disappear, we don't need anything */ | ||
586 | if (!(flags & TRACE_GRAPH_PRINT_DURATION)) | ||
587 | return 1; | ||
588 | |||
589 | /* Non nested entry or return */ | ||
590 | if (duration == -1) | ||
591 | return trace_seq_printf(s, " "); | ||
592 | |||
593 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { | ||
594 | /* Duration exceeded 100 msecs */ | ||
595 | if (duration > 100000ULL) | ||
596 | return trace_seq_printf(s, "! "); | ||
597 | |||
598 | /* Duration exceeded 10 msecs */ | ||
599 | if (duration > 10000ULL) | ||
600 | return trace_seq_printf(s, "+ "); | ||
601 | } | ||
602 | |||
603 | return trace_seq_printf(s, " "); | ||
604 | } | ||
605 | |||
606 | static int print_graph_abs_time(u64 t, struct trace_seq *s) | 594 | static int print_graph_abs_time(u64 t, struct trace_seq *s) |
607 | { | 595 | { |
608 | unsigned long usecs_rem; | 596 | unsigned long usecs_rem; |
@@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
625 | addr >= (unsigned long)__irqentry_text_end) | 613 | addr >= (unsigned long)__irqentry_text_end) |
626 | return TRACE_TYPE_UNHANDLED; | 614 | return TRACE_TYPE_UNHANDLED; |
627 | 615 | ||
628 | /* Absolute time */ | 616 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
629 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { | 617 | /* Absolute time */ |
630 | ret = print_graph_abs_time(iter->ts, s); | 618 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { |
631 | if (!ret) | 619 | ret = print_graph_abs_time(iter->ts, s); |
632 | return TRACE_TYPE_PARTIAL_LINE; | 620 | if (!ret) |
633 | } | 621 | return TRACE_TYPE_PARTIAL_LINE; |
622 | } | ||
634 | 623 | ||
635 | /* Cpu */ | 624 | /* Cpu */ |
636 | if (flags & TRACE_GRAPH_PRINT_CPU) { | 625 | if (flags & TRACE_GRAPH_PRINT_CPU) { |
637 | ret = print_graph_cpu(s, cpu); | 626 | ret = print_graph_cpu(s, cpu); |
638 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 627 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
639 | return TRACE_TYPE_PARTIAL_LINE; | 628 | return TRACE_TYPE_PARTIAL_LINE; |
640 | } | 629 | } |
641 | 630 | ||
642 | /* Proc */ | 631 | /* Proc */ |
643 | if (flags & TRACE_GRAPH_PRINT_PROC) { | 632 | if (flags & TRACE_GRAPH_PRINT_PROC) { |
644 | ret = print_graph_proc(s, pid); | 633 | ret = print_graph_proc(s, pid); |
645 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 634 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
646 | return TRACE_TYPE_PARTIAL_LINE; | 635 | return TRACE_TYPE_PARTIAL_LINE; |
647 | ret = trace_seq_printf(s, " | "); | 636 | ret = trace_seq_printf(s, " | "); |
648 | if (!ret) | 637 | if (!ret) |
649 | return TRACE_TYPE_PARTIAL_LINE; | 638 | return TRACE_TYPE_PARTIAL_LINE; |
639 | } | ||
650 | } | 640 | } |
651 | 641 | ||
652 | /* No overhead */ | 642 | /* No overhead */ |
653 | ret = print_graph_overhead(-1, s, flags); | 643 | ret = print_graph_duration(DURATION_FILL_START, s, flags); |
654 | if (!ret) | 644 | if (ret != TRACE_TYPE_HANDLED) |
655 | return TRACE_TYPE_PARTIAL_LINE; | 645 | return ret; |
656 | 646 | ||
657 | if (type == TRACE_GRAPH_ENT) | 647 | if (type == TRACE_GRAPH_ENT) |
658 | ret = trace_seq_printf(s, "==========>"); | 648 | ret = trace_seq_printf(s, "==========>"); |
@@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
662 | if (!ret) | 652 | if (!ret) |
663 | return TRACE_TYPE_PARTIAL_LINE; | 653 | return TRACE_TYPE_PARTIAL_LINE; |
664 | 654 | ||
665 | /* Don't close the duration column if haven't one */ | 655 | ret = print_graph_duration(DURATION_FILL_END, s, flags); |
666 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 656 | if (ret != TRACE_TYPE_HANDLED) |
667 | trace_seq_printf(s, " |"); | 657 | return ret; |
658 | |||
668 | ret = trace_seq_printf(s, "\n"); | 659 | ret = trace_seq_printf(s, "\n"); |
669 | 660 | ||
670 | if (!ret) | 661 | if (!ret) |
@@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
716 | } | 707 | } |
717 | 708 | ||
718 | static enum print_line_t | 709 | static enum print_line_t |
719 | print_graph_duration(unsigned long long duration, struct trace_seq *s) | 710 | print_graph_duration(unsigned long long duration, struct trace_seq *s, |
711 | u32 flags) | ||
720 | { | 712 | { |
721 | int ret; | 713 | int ret = -1; |
714 | |||
715 | if (!(flags & TRACE_GRAPH_PRINT_DURATION) || | ||
716 | !(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
717 | return TRACE_TYPE_HANDLED; | ||
718 | |||
719 | /* No real adata, just filling the column with spaces */ | ||
720 | switch (duration) { | ||
721 | case DURATION_FILL_FULL: | ||
722 | ret = trace_seq_printf(s, " | "); | ||
723 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
724 | case DURATION_FILL_START: | ||
725 | ret = trace_seq_printf(s, " "); | ||
726 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
727 | case DURATION_FILL_END: | ||
728 | ret = trace_seq_printf(s, " |"); | ||
729 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
730 | } | ||
731 | |||
732 | /* Signal a overhead of time execution to the output */ | ||
733 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { | ||
734 | /* Duration exceeded 100 msecs */ | ||
735 | if (duration > 100000ULL) | ||
736 | ret = trace_seq_printf(s, "! "); | ||
737 | /* Duration exceeded 10 msecs */ | ||
738 | else if (duration > 10000ULL) | ||
739 | ret = trace_seq_printf(s, "+ "); | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * The -1 means we either did not exceed the duration tresholds | ||
744 | * or we dont want to print out the overhead. Either way we need | ||
745 | * to fill out the space. | ||
746 | */ | ||
747 | if (ret == -1) | ||
748 | ret = trace_seq_printf(s, " "); | ||
749 | |||
750 | /* Catching here any failure happenned above */ | ||
751 | if (!ret) | ||
752 | return TRACE_TYPE_PARTIAL_LINE; | ||
722 | 753 | ||
723 | ret = trace_print_graph_duration(duration, s); | 754 | ret = trace_print_graph_duration(duration, s); |
724 | if (ret != TRACE_TYPE_HANDLED) | 755 | if (ret != TRACE_TYPE_HANDLED) |
@@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
767 | cpu_data->enter_funcs[call->depth] = 0; | 798 | cpu_data->enter_funcs[call->depth] = 0; |
768 | } | 799 | } |
769 | 800 | ||
770 | /* Overhead */ | 801 | /* Overhead and duration */ |
771 | ret = print_graph_overhead(duration, s, flags); | 802 | ret = print_graph_duration(duration, s, flags); |
772 | if (!ret) | 803 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
773 | return TRACE_TYPE_PARTIAL_LINE; | 804 | return TRACE_TYPE_PARTIAL_LINE; |
774 | 805 | ||
775 | /* Duration */ | ||
776 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | ||
777 | ret = print_graph_duration(duration, s); | ||
778 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
779 | return TRACE_TYPE_PARTIAL_LINE; | ||
780 | } | ||
781 | |||
782 | /* Function */ | 806 | /* Function */ |
783 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 807 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
784 | ret = trace_seq_printf(s, " "); | 808 | ret = trace_seq_printf(s, " "); |
@@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
815 | cpu_data->enter_funcs[call->depth] = call->func; | 839 | cpu_data->enter_funcs[call->depth] = call->func; |
816 | } | 840 | } |
817 | 841 | ||
818 | /* No overhead */ | ||
819 | ret = print_graph_overhead(-1, s, flags); | ||
820 | if (!ret) | ||
821 | return TRACE_TYPE_PARTIAL_LINE; | ||
822 | |||
823 | /* No time */ | 842 | /* No time */ |
824 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | 843 | ret = print_graph_duration(DURATION_FILL_FULL, s, flags); |
825 | ret = trace_seq_printf(s, " | "); | 844 | if (ret != TRACE_TYPE_HANDLED) |
826 | if (!ret) | 845 | return ret; |
827 | return TRACE_TYPE_PARTIAL_LINE; | ||
828 | } | ||
829 | 846 | ||
830 | /* Function */ | 847 | /* Function */ |
831 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 848 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
@@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
865 | return TRACE_TYPE_PARTIAL_LINE; | 882 | return TRACE_TYPE_PARTIAL_LINE; |
866 | } | 883 | } |
867 | 884 | ||
885 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
886 | return 0; | ||
887 | |||
868 | /* Absolute time */ | 888 | /* Absolute time */ |
869 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { | 889 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { |
870 | ret = print_graph_abs_time(iter->ts, s); | 890 | ret = print_graph_abs_time(iter->ts, s); |
@@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
1078 | if (print_graph_prologue(iter, s, 0, 0, flags)) | 1098 | if (print_graph_prologue(iter, s, 0, 0, flags)) |
1079 | return TRACE_TYPE_PARTIAL_LINE; | 1099 | return TRACE_TYPE_PARTIAL_LINE; |
1080 | 1100 | ||
1081 | /* Overhead */ | 1101 | /* Overhead and duration */ |
1082 | ret = print_graph_overhead(duration, s, flags); | 1102 | ret = print_graph_duration(duration, s, flags); |
1083 | if (!ret) | 1103 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
1084 | return TRACE_TYPE_PARTIAL_LINE; | 1104 | return TRACE_TYPE_PARTIAL_LINE; |
1085 | 1105 | ||
1086 | /* Duration */ | ||
1087 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | ||
1088 | ret = print_graph_duration(duration, s); | ||
1089 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
1090 | return TRACE_TYPE_PARTIAL_LINE; | ||
1091 | } | ||
1092 | |||
1093 | /* Closing brace */ | 1106 | /* Closing brace */ |
1094 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { | 1107 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { |
1095 | ret = trace_seq_printf(s, " "); | 1108 | ret = trace_seq_printf(s, " "); |
@@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
1146 | if (print_graph_prologue(iter, s, 0, 0, flags)) | 1159 | if (print_graph_prologue(iter, s, 0, 0, flags)) |
1147 | return TRACE_TYPE_PARTIAL_LINE; | 1160 | return TRACE_TYPE_PARTIAL_LINE; |
1148 | 1161 | ||
1149 | /* No overhead */ | ||
1150 | ret = print_graph_overhead(-1, s, flags); | ||
1151 | if (!ret) | ||
1152 | return TRACE_TYPE_PARTIAL_LINE; | ||
1153 | |||
1154 | /* No time */ | 1162 | /* No time */ |
1155 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | 1163 | ret = print_graph_duration(DURATION_FILL_FULL, s, flags); |
1156 | ret = trace_seq_printf(s, " | "); | 1164 | if (ret != TRACE_TYPE_HANDLED) |
1157 | if (!ret) | 1165 | return ret; |
1158 | return TRACE_TYPE_PARTIAL_LINE; | ||
1159 | } | ||
1160 | 1166 | ||
1161 | /* Indentation */ | 1167 | /* Indentation */ |
1162 | if (depth > 0) | 1168 | if (depth > 0) |
@@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
1207 | 1213 | ||
1208 | 1214 | ||
1209 | enum print_line_t | 1215 | enum print_line_t |
1210 | __print_graph_function_flags(struct trace_iterator *iter, u32 flags) | 1216 | print_graph_function_flags(struct trace_iterator *iter, u32 flags) |
1211 | { | 1217 | { |
1212 | struct ftrace_graph_ent_entry *field; | 1218 | struct ftrace_graph_ent_entry *field; |
1213 | struct fgraph_data *data = iter->private; | 1219 | struct fgraph_data *data = iter->private; |
@@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags) | |||
1270 | static enum print_line_t | 1276 | static enum print_line_t |
1271 | print_graph_function(struct trace_iterator *iter) | 1277 | print_graph_function(struct trace_iterator *iter) |
1272 | { | 1278 | { |
1273 | return __print_graph_function_flags(iter, tracer_flags.val); | 1279 | return print_graph_function_flags(iter, tracer_flags.val); |
1274 | } | ||
1275 | |||
1276 | enum print_line_t print_graph_function_flags(struct trace_iterator *iter, | ||
1277 | u32 flags) | ||
1278 | { | ||
1279 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
1280 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
1281 | else | ||
1282 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
1283 | |||
1284 | return __print_graph_function_flags(iter, flags); | ||
1285 | } | 1280 | } |
1286 | 1281 | ||
1287 | static enum print_line_t | 1282 | static enum print_line_t |
@@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags) | |||
1309 | seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); | 1304 | seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); |
1310 | seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); | 1305 | seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); |
1311 | seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); | 1306 | seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); |
1312 | seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); | 1307 | seq_printf(s, "#%.*s||| / \n", size, spaces); |
1313 | seq_printf(s, "#%.*s|||| / \n", size, spaces); | ||
1314 | } | 1308 | } |
1315 | 1309 | ||
1316 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | 1310 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) |
@@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | |||
1329 | if (flags & TRACE_GRAPH_PRINT_PROC) | 1323 | if (flags & TRACE_GRAPH_PRINT_PROC) |
1330 | seq_printf(s, " TASK/PID "); | 1324 | seq_printf(s, " TASK/PID "); |
1331 | if (lat) | 1325 | if (lat) |
1332 | seq_printf(s, "|||||"); | 1326 | seq_printf(s, "||||"); |
1333 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 1327 | if (flags & TRACE_GRAPH_PRINT_DURATION) |
1334 | seq_printf(s, " DURATION "); | 1328 | seq_printf(s, " DURATION "); |
1335 | seq_printf(s, " FUNCTION CALLS\n"); | 1329 | seq_printf(s, " FUNCTION CALLS\n"); |
@@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | |||
1343 | if (flags & TRACE_GRAPH_PRINT_PROC) | 1337 | if (flags & TRACE_GRAPH_PRINT_PROC) |
1344 | seq_printf(s, " | | "); | 1338 | seq_printf(s, " | | "); |
1345 | if (lat) | 1339 | if (lat) |
1346 | seq_printf(s, "|||||"); | 1340 | seq_printf(s, "||||"); |
1347 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 1341 | if (flags & TRACE_GRAPH_PRINT_DURATION) |
1348 | seq_printf(s, " | | "); | 1342 | seq_printf(s, " | | "); |
1349 | seq_printf(s, " | | | |\n"); | 1343 | seq_printf(s, " | | | |\n"); |
@@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) | |||
1358 | { | 1352 | { |
1359 | struct trace_iterator *iter = s->private; | 1353 | struct trace_iterator *iter = s->private; |
1360 | 1354 | ||
1355 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
1356 | return; | ||
1357 | |||
1361 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | 1358 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { |
1362 | /* print nothing if the buffers are empty */ | 1359 | /* print nothing if the buffers are empty */ |
1363 | if (trace_empty(iter)) | 1360 | if (trace_empty(iter)) |
1364 | return; | 1361 | return; |
1365 | 1362 | ||
1366 | print_trace_header(s, iter); | 1363 | print_trace_header(s, iter); |
1367 | flags |= TRACE_GRAPH_PRINT_DURATION; | 1364 | } |
1368 | } else | ||
1369 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
1370 | 1365 | ||
1371 | __print_graph_headers_flags(s, flags); | 1366 | __print_graph_headers_flags(s, flags); |
1372 | } | 1367 | } |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index c77424be284d..667aa8cc0cfc 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter) | |||
226 | } | 226 | } |
227 | 227 | ||
228 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ | 228 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ |
229 | TRACE_GRAPH_PRINT_PROC) | 229 | TRACE_GRAPH_PRINT_PROC | \ |
230 | TRACE_GRAPH_PRINT_ABS_TIME | \ | ||
231 | TRACE_GRAPH_PRINT_DURATION) | ||
230 | 232 | ||
231 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) | 233 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) |
232 | { | 234 | { |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 27d13b36b8be..5fb3697bf0e5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref) | |||
343 | DEFINE_FETCH_deref(string) | 343 | DEFINE_FETCH_deref(string) |
344 | DEFINE_FETCH_deref(string_size) | 344 | DEFINE_FETCH_deref(string_size) |
345 | 345 | ||
346 | static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) | ||
347 | { | ||
348 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) | ||
349 | update_deref_fetch_param(data->orig.data); | ||
350 | else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) | ||
351 | update_symbol_cache(data->orig.data); | ||
352 | } | ||
353 | |||
346 | static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) | 354 | static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) |
347 | { | 355 | { |
348 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) | 356 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) |
@@ -377,6 +385,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield) | |||
377 | #define fetch_bitfield_string_size NULL | 385 | #define fetch_bitfield_string_size NULL |
378 | 386 | ||
379 | static __kprobes void | 387 | static __kprobes void |
388 | update_bitfield_fetch_param(struct bitfield_fetch_param *data) | ||
389 | { | ||
390 | /* | ||
391 | * Don't check the bitfield itself, because this must be the | ||
392 | * last fetch function. | ||
393 | */ | ||
394 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) | ||
395 | update_deref_fetch_param(data->orig.data); | ||
396 | else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) | ||
397 | update_symbol_cache(data->orig.data); | ||
398 | } | ||
399 | |||
400 | static __kprobes void | ||
380 | free_bitfield_fetch_param(struct bitfield_fetch_param *data) | 401 | free_bitfield_fetch_param(struct bitfield_fetch_param *data) |
381 | { | 402 | { |
382 | /* | 403 | /* |
@@ -389,6 +410,7 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data) | |||
389 | free_symbol_cache(data->orig.data); | 410 | free_symbol_cache(data->orig.data); |
390 | kfree(data); | 411 | kfree(data); |
391 | } | 412 | } |
413 | |||
392 | /* Default (unsigned long) fetch type */ | 414 | /* Default (unsigned long) fetch type */ |
393 | #define __DEFAULT_FETCH_TYPE(t) u##t | 415 | #define __DEFAULT_FETCH_TYPE(t) u##t |
394 | #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) | 416 | #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) |
@@ -536,6 +558,7 @@ struct probe_arg { | |||
536 | /* Flags for trace_probe */ | 558 | /* Flags for trace_probe */ |
537 | #define TP_FLAG_TRACE 1 | 559 | #define TP_FLAG_TRACE 1 |
538 | #define TP_FLAG_PROFILE 2 | 560 | #define TP_FLAG_PROFILE 2 |
561 | #define TP_FLAG_REGISTERED 4 | ||
539 | 562 | ||
540 | struct trace_probe { | 563 | struct trace_probe { |
541 | struct list_head list; | 564 | struct list_head list; |
@@ -555,16 +578,49 @@ struct trace_probe { | |||
555 | (sizeof(struct probe_arg) * (n))) | 578 | (sizeof(struct probe_arg) * (n))) |
556 | 579 | ||
557 | 580 | ||
558 | static __kprobes int probe_is_return(struct trace_probe *tp) | 581 | static __kprobes int trace_probe_is_return(struct trace_probe *tp) |
559 | { | 582 | { |
560 | return tp->rp.handler != NULL; | 583 | return tp->rp.handler != NULL; |
561 | } | 584 | } |
562 | 585 | ||
563 | static __kprobes const char *probe_symbol(struct trace_probe *tp) | 586 | static __kprobes const char *trace_probe_symbol(struct trace_probe *tp) |
564 | { | 587 | { |
565 | return tp->symbol ? tp->symbol : "unknown"; | 588 | return tp->symbol ? tp->symbol : "unknown"; |
566 | } | 589 | } |
567 | 590 | ||
591 | static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) | ||
592 | { | ||
593 | return tp->rp.kp.offset; | ||
594 | } | ||
595 | |||
596 | static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) | ||
597 | { | ||
598 | return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); | ||
599 | } | ||
600 | |||
601 | static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) | ||
602 | { | ||
603 | return !!(tp->flags & TP_FLAG_REGISTERED); | ||
604 | } | ||
605 | |||
606 | static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) | ||
607 | { | ||
608 | return !!(kprobe_gone(&tp->rp.kp)); | ||
609 | } | ||
610 | |||
611 | static __kprobes bool trace_probe_within_module(struct trace_probe *tp, | ||
612 | struct module *mod) | ||
613 | { | ||
614 | int len = strlen(mod->name); | ||
615 | const char *name = trace_probe_symbol(tp); | ||
616 | return strncmp(mod->name, name, len) == 0 && name[len] == ':'; | ||
617 | } | ||
618 | |||
619 | static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) | ||
620 | { | ||
621 | return !!strchr(trace_probe_symbol(tp), ':'); | ||
622 | } | ||
623 | |||
568 | static int register_probe_event(struct trace_probe *tp); | 624 | static int register_probe_event(struct trace_probe *tp); |
569 | static void unregister_probe_event(struct trace_probe *tp); | 625 | static void unregister_probe_event(struct trace_probe *tp); |
570 | 626 | ||
@@ -646,6 +702,16 @@ error: | |||
646 | return ERR_PTR(ret); | 702 | return ERR_PTR(ret); |
647 | } | 703 | } |
648 | 704 | ||
705 | static void update_probe_arg(struct probe_arg *arg) | ||
706 | { | ||
707 | if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) | ||
708 | update_bitfield_fetch_param(arg->fetch.data); | ||
709 | else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) | ||
710 | update_deref_fetch_param(arg->fetch.data); | ||
711 | else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) | ||
712 | update_symbol_cache(arg->fetch.data); | ||
713 | } | ||
714 | |||
649 | static void free_probe_arg(struct probe_arg *arg) | 715 | static void free_probe_arg(struct probe_arg *arg) |
650 | { | 716 | { |
651 | if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) | 717 | if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) |
@@ -671,7 +737,7 @@ static void free_trace_probe(struct trace_probe *tp) | |||
671 | kfree(tp); | 737 | kfree(tp); |
672 | } | 738 | } |
673 | 739 | ||
674 | static struct trace_probe *find_probe_event(const char *event, | 740 | static struct trace_probe *find_trace_probe(const char *event, |
675 | const char *group) | 741 | const char *group) |
676 | { | 742 | { |
677 | struct trace_probe *tp; | 743 | struct trace_probe *tp; |
@@ -683,13 +749,96 @@ static struct trace_probe *find_probe_event(const char *event, | |||
683 | return NULL; | 749 | return NULL; |
684 | } | 750 | } |
685 | 751 | ||
752 | /* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ | ||
753 | static int enable_trace_probe(struct trace_probe *tp, int flag) | ||
754 | { | ||
755 | int ret = 0; | ||
756 | |||
757 | tp->flags |= flag; | ||
758 | if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && | ||
759 | !trace_probe_has_gone(tp)) { | ||
760 | if (trace_probe_is_return(tp)) | ||
761 | ret = enable_kretprobe(&tp->rp); | ||
762 | else | ||
763 | ret = enable_kprobe(&tp->rp.kp); | ||
764 | } | ||
765 | |||
766 | return ret; | ||
767 | } | ||
768 | |||
769 | /* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ | ||
770 | static void disable_trace_probe(struct trace_probe *tp, int flag) | ||
771 | { | ||
772 | tp->flags &= ~flag; | ||
773 | if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { | ||
774 | if (trace_probe_is_return(tp)) | ||
775 | disable_kretprobe(&tp->rp); | ||
776 | else | ||
777 | disable_kprobe(&tp->rp.kp); | ||
778 | } | ||
779 | } | ||
780 | |||
781 | /* Internal register function - just handle k*probes and flags */ | ||
782 | static int __register_trace_probe(struct trace_probe *tp) | ||
783 | { | ||
784 | int i, ret; | ||
785 | |||
786 | if (trace_probe_is_registered(tp)) | ||
787 | return -EINVAL; | ||
788 | |||
789 | for (i = 0; i < tp->nr_args; i++) | ||
790 | update_probe_arg(&tp->args[i]); | ||
791 | |||
792 | /* Set/clear disabled flag according to tp->flag */ | ||
793 | if (trace_probe_is_enabled(tp)) | ||
794 | tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; | ||
795 | else | ||
796 | tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; | ||
797 | |||
798 | if (trace_probe_is_return(tp)) | ||
799 | ret = register_kretprobe(&tp->rp); | ||
800 | else | ||
801 | ret = register_kprobe(&tp->rp.kp); | ||
802 | |||
803 | if (ret == 0) | ||
804 | tp->flags |= TP_FLAG_REGISTERED; | ||
805 | else { | ||
806 | pr_warning("Could not insert probe at %s+%lu: %d\n", | ||
807 | trace_probe_symbol(tp), trace_probe_offset(tp), ret); | ||
808 | if (ret == -ENOENT && trace_probe_is_on_module(tp)) { | ||
809 | pr_warning("This probe might be able to register after" | ||
810 | "target module is loaded. Continue.\n"); | ||
811 | ret = 0; | ||
812 | } else if (ret == -EILSEQ) { | ||
813 | pr_warning("Probing address(0x%p) is not an " | ||
814 | "instruction boundary.\n", | ||
815 | tp->rp.kp.addr); | ||
816 | ret = -EINVAL; | ||
817 | } | ||
818 | } | ||
819 | |||
820 | return ret; | ||
821 | } | ||
822 | |||
823 | /* Internal unregister function - just handle k*probes and flags */ | ||
824 | static void __unregister_trace_probe(struct trace_probe *tp) | ||
825 | { | ||
826 | if (trace_probe_is_registered(tp)) { | ||
827 | if (trace_probe_is_return(tp)) | ||
828 | unregister_kretprobe(&tp->rp); | ||
829 | else | ||
830 | unregister_kprobe(&tp->rp.kp); | ||
831 | tp->flags &= ~TP_FLAG_REGISTERED; | ||
832 | /* Cleanup kprobe for reuse */ | ||
833 | if (tp->rp.kp.symbol_name) | ||
834 | tp->rp.kp.addr = NULL; | ||
835 | } | ||
836 | } | ||
837 | |||
686 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ | 838 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ |
687 | static void unregister_trace_probe(struct trace_probe *tp) | 839 | static void unregister_trace_probe(struct trace_probe *tp) |
688 | { | 840 | { |
689 | if (probe_is_return(tp)) | 841 | __unregister_trace_probe(tp); |
690 | unregister_kretprobe(&tp->rp); | ||
691 | else | ||
692 | unregister_kprobe(&tp->rp.kp); | ||
693 | list_del(&tp->list); | 842 | list_del(&tp->list); |
694 | unregister_probe_event(tp); | 843 | unregister_probe_event(tp); |
695 | } | 844 | } |
@@ -702,41 +851,65 @@ static int register_trace_probe(struct trace_probe *tp) | |||
702 | 851 | ||
703 | mutex_lock(&probe_lock); | 852 | mutex_lock(&probe_lock); |
704 | 853 | ||
705 | /* register as an event */ | 854 | /* Delete old (same name) event if exist */ |
706 | old_tp = find_probe_event(tp->call.name, tp->call.class->system); | 855 | old_tp = find_trace_probe(tp->call.name, tp->call.class->system); |
707 | if (old_tp) { | 856 | if (old_tp) { |
708 | /* delete old event */ | ||
709 | unregister_trace_probe(old_tp); | 857 | unregister_trace_probe(old_tp); |
710 | free_trace_probe(old_tp); | 858 | free_trace_probe(old_tp); |
711 | } | 859 | } |
860 | |||
861 | /* Register new event */ | ||
712 | ret = register_probe_event(tp); | 862 | ret = register_probe_event(tp); |
713 | if (ret) { | 863 | if (ret) { |
714 | pr_warning("Failed to register probe event(%d)\n", ret); | 864 | pr_warning("Failed to register probe event(%d)\n", ret); |
715 | goto end; | 865 | goto end; |
716 | } | 866 | } |
717 | 867 | ||
718 | tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; | 868 | /* Register k*probe */ |
719 | if (probe_is_return(tp)) | 869 | ret = __register_trace_probe(tp); |
720 | ret = register_kretprobe(&tp->rp); | 870 | if (ret < 0) |
721 | else | ||
722 | ret = register_kprobe(&tp->rp.kp); | ||
723 | |||
724 | if (ret) { | ||
725 | pr_warning("Could not insert probe(%d)\n", ret); | ||
726 | if (ret == -EILSEQ) { | ||
727 | pr_warning("Probing address(0x%p) is not an " | ||
728 | "instruction boundary.\n", | ||
729 | tp->rp.kp.addr); | ||
730 | ret = -EINVAL; | ||
731 | } | ||
732 | unregister_probe_event(tp); | 871 | unregister_probe_event(tp); |
733 | } else | 872 | else |
734 | list_add_tail(&tp->list, &probe_list); | 873 | list_add_tail(&tp->list, &probe_list); |
874 | |||
735 | end: | 875 | end: |
736 | mutex_unlock(&probe_lock); | 876 | mutex_unlock(&probe_lock); |
737 | return ret; | 877 | return ret; |
738 | } | 878 | } |
739 | 879 | ||
880 | /* Module notifier call back, checking event on the module */ | ||
881 | static int trace_probe_module_callback(struct notifier_block *nb, | ||
882 | unsigned long val, void *data) | ||
883 | { | ||
884 | struct module *mod = data; | ||
885 | struct trace_probe *tp; | ||
886 | int ret; | ||
887 | |||
888 | if (val != MODULE_STATE_COMING) | ||
889 | return NOTIFY_DONE; | ||
890 | |||
891 | /* Update probes on coming module */ | ||
892 | mutex_lock(&probe_lock); | ||
893 | list_for_each_entry(tp, &probe_list, list) { | ||
894 | if (trace_probe_within_module(tp, mod)) { | ||
895 | __unregister_trace_probe(tp); | ||
896 | ret = __register_trace_probe(tp); | ||
897 | if (ret) | ||
898 | pr_warning("Failed to re-register probe %s on" | ||
899 | "%s: %d\n", | ||
900 | tp->call.name, mod->name, ret); | ||
901 | } | ||
902 | } | ||
903 | mutex_unlock(&probe_lock); | ||
904 | |||
905 | return NOTIFY_DONE; | ||
906 | } | ||
907 | |||
908 | static struct notifier_block trace_probe_module_nb = { | ||
909 | .notifier_call = trace_probe_module_callback, | ||
910 | .priority = 1 /* Invoked after kprobe module callback */ | ||
911 | }; | ||
912 | |||
740 | /* Split symbol and offset. */ | 913 | /* Split symbol and offset. */ |
741 | static int split_symbol_offset(char *symbol, unsigned long *offset) | 914 | static int split_symbol_offset(char *symbol, unsigned long *offset) |
742 | { | 915 | { |
@@ -962,8 +1135,8 @@ static int create_trace_probe(int argc, char **argv) | |||
962 | { | 1135 | { |
963 | /* | 1136 | /* |
964 | * Argument syntax: | 1137 | * Argument syntax: |
965 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] | 1138 | * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] |
966 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] | 1139 | * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] |
967 | * Fetch args: | 1140 | * Fetch args: |
968 | * $retval : fetch return value | 1141 | * $retval : fetch return value |
969 | * $stack : fetch stack address | 1142 | * $stack : fetch stack address |
@@ -1025,7 +1198,7 @@ static int create_trace_probe(int argc, char **argv) | |||
1025 | return -EINVAL; | 1198 | return -EINVAL; |
1026 | } | 1199 | } |
1027 | mutex_lock(&probe_lock); | 1200 | mutex_lock(&probe_lock); |
1028 | tp = find_probe_event(event, group); | 1201 | tp = find_trace_probe(event, group); |
1029 | if (!tp) { | 1202 | if (!tp) { |
1030 | mutex_unlock(&probe_lock); | 1203 | mutex_unlock(&probe_lock); |
1031 | pr_info("Event %s/%s doesn't exist.\n", group, event); | 1204 | pr_info("Event %s/%s doesn't exist.\n", group, event); |
@@ -1144,7 +1317,7 @@ error: | |||
1144 | return ret; | 1317 | return ret; |
1145 | } | 1318 | } |
1146 | 1319 | ||
1147 | static void cleanup_all_probes(void) | 1320 | static void release_all_trace_probes(void) |
1148 | { | 1321 | { |
1149 | struct trace_probe *tp; | 1322 | struct trace_probe *tp; |
1150 | 1323 | ||
@@ -1158,7 +1331,6 @@ static void cleanup_all_probes(void) | |||
1158 | mutex_unlock(&probe_lock); | 1331 | mutex_unlock(&probe_lock); |
1159 | } | 1332 | } |
1160 | 1333 | ||
1161 | |||
1162 | /* Probes listing interfaces */ | 1334 | /* Probes listing interfaces */ |
1163 | static void *probes_seq_start(struct seq_file *m, loff_t *pos) | 1335 | static void *probes_seq_start(struct seq_file *m, loff_t *pos) |
1164 | { | 1336 | { |
@@ -1181,15 +1353,16 @@ static int probes_seq_show(struct seq_file *m, void *v) | |||
1181 | struct trace_probe *tp = v; | 1353 | struct trace_probe *tp = v; |
1182 | int i; | 1354 | int i; |
1183 | 1355 | ||
1184 | seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); | 1356 | seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p'); |
1185 | seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); | 1357 | seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); |
1186 | 1358 | ||
1187 | if (!tp->symbol) | 1359 | if (!tp->symbol) |
1188 | seq_printf(m, " 0x%p", tp->rp.kp.addr); | 1360 | seq_printf(m, " 0x%p", tp->rp.kp.addr); |
1189 | else if (tp->rp.kp.offset) | 1361 | else if (tp->rp.kp.offset) |
1190 | seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); | 1362 | seq_printf(m, " %s+%u", trace_probe_symbol(tp), |
1363 | tp->rp.kp.offset); | ||
1191 | else | 1364 | else |
1192 | seq_printf(m, " %s", probe_symbol(tp)); | 1365 | seq_printf(m, " %s", trace_probe_symbol(tp)); |
1193 | 1366 | ||
1194 | for (i = 0; i < tp->nr_args; i++) | 1367 | for (i = 0; i < tp->nr_args; i++) |
1195 | seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); | 1368 | seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); |
@@ -1209,7 +1382,7 @@ static int probes_open(struct inode *inode, struct file *file) | |||
1209 | { | 1382 | { |
1210 | if ((file->f_mode & FMODE_WRITE) && | 1383 | if ((file->f_mode & FMODE_WRITE) && |
1211 | (file->f_flags & O_TRUNC)) | 1384 | (file->f_flags & O_TRUNC)) |
1212 | cleanup_all_probes(); | 1385 | release_all_trace_probes(); |
1213 | 1386 | ||
1214 | return seq_open(file, &probes_seq_op); | 1387 | return seq_open(file, &probes_seq_op); |
1215 | } | 1388 | } |
@@ -1397,7 +1570,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | |||
1397 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1570 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
1398 | 1571 | ||
1399 | if (!filter_current_check_discard(buffer, call, entry, event)) | 1572 | if (!filter_current_check_discard(buffer, call, entry, event)) |
1400 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 1573 | trace_nowake_buffer_unlock_commit_regs(buffer, event, |
1574 | irq_flags, pc, regs); | ||
1401 | } | 1575 | } |
1402 | 1576 | ||
1403 | /* Kretprobe handler */ | 1577 | /* Kretprobe handler */ |
@@ -1429,7 +1603,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, | |||
1429 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1603 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
1430 | 1604 | ||
1431 | if (!filter_current_check_discard(buffer, call, entry, event)) | 1605 | if (!filter_current_check_discard(buffer, call, entry, event)) |
1432 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 1606 | trace_nowake_buffer_unlock_commit_regs(buffer, event, |
1607 | irq_flags, pc, regs); | ||
1433 | } | 1608 | } |
1434 | 1609 | ||
1435 | /* Event entry printers */ | 1610 | /* Event entry printers */ |
@@ -1511,30 +1686,6 @@ partial: | |||
1511 | return TRACE_TYPE_PARTIAL_LINE; | 1686 | return TRACE_TYPE_PARTIAL_LINE; |
1512 | } | 1687 | } |
1513 | 1688 | ||
1514 | static int probe_event_enable(struct ftrace_event_call *call) | ||
1515 | { | ||
1516 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1517 | |||
1518 | tp->flags |= TP_FLAG_TRACE; | ||
1519 | if (probe_is_return(tp)) | ||
1520 | return enable_kretprobe(&tp->rp); | ||
1521 | else | ||
1522 | return enable_kprobe(&tp->rp.kp); | ||
1523 | } | ||
1524 | |||
1525 | static void probe_event_disable(struct ftrace_event_call *call) | ||
1526 | { | ||
1527 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1528 | |||
1529 | tp->flags &= ~TP_FLAG_TRACE; | ||
1530 | if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { | ||
1531 | if (probe_is_return(tp)) | ||
1532 | disable_kretprobe(&tp->rp); | ||
1533 | else | ||
1534 | disable_kprobe(&tp->rp.kp); | ||
1535 | } | ||
1536 | } | ||
1537 | |||
1538 | #undef DEFINE_FIELD | 1689 | #undef DEFINE_FIELD |
1539 | #define DEFINE_FIELD(type, item, name, is_signed) \ | 1690 | #define DEFINE_FIELD(type, item, name, is_signed) \ |
1540 | do { \ | 1691 | do { \ |
@@ -1596,7 +1747,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) | |||
1596 | 1747 | ||
1597 | const char *fmt, *arg; | 1748 | const char *fmt, *arg; |
1598 | 1749 | ||
1599 | if (!probe_is_return(tp)) { | 1750 | if (!trace_probe_is_return(tp)) { |
1600 | fmt = "(%lx)"; | 1751 | fmt = "(%lx)"; |
1601 | arg = "REC->" FIELD_STRING_IP; | 1752 | arg = "REC->" FIELD_STRING_IP; |
1602 | } else { | 1753 | } else { |
@@ -1713,49 +1864,25 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1713 | head = this_cpu_ptr(call->perf_events); | 1864 | head = this_cpu_ptr(call->perf_events); |
1714 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); | 1865 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); |
1715 | } | 1866 | } |
1716 | |||
1717 | static int probe_perf_enable(struct ftrace_event_call *call) | ||
1718 | { | ||
1719 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1720 | |||
1721 | tp->flags |= TP_FLAG_PROFILE; | ||
1722 | |||
1723 | if (probe_is_return(tp)) | ||
1724 | return enable_kretprobe(&tp->rp); | ||
1725 | else | ||
1726 | return enable_kprobe(&tp->rp.kp); | ||
1727 | } | ||
1728 | |||
1729 | static void probe_perf_disable(struct ftrace_event_call *call) | ||
1730 | { | ||
1731 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1732 | |||
1733 | tp->flags &= ~TP_FLAG_PROFILE; | ||
1734 | |||
1735 | if (!(tp->flags & TP_FLAG_TRACE)) { | ||
1736 | if (probe_is_return(tp)) | ||
1737 | disable_kretprobe(&tp->rp); | ||
1738 | else | ||
1739 | disable_kprobe(&tp->rp.kp); | ||
1740 | } | ||
1741 | } | ||
1742 | #endif /* CONFIG_PERF_EVENTS */ | 1867 | #endif /* CONFIG_PERF_EVENTS */ |
1743 | 1868 | ||
1744 | static __kprobes | 1869 | static __kprobes |
1745 | int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) | 1870 | int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) |
1746 | { | 1871 | { |
1872 | struct trace_probe *tp = (struct trace_probe *)event->data; | ||
1873 | |||
1747 | switch (type) { | 1874 | switch (type) { |
1748 | case TRACE_REG_REGISTER: | 1875 | case TRACE_REG_REGISTER: |
1749 | return probe_event_enable(event); | 1876 | return enable_trace_probe(tp, TP_FLAG_TRACE); |
1750 | case TRACE_REG_UNREGISTER: | 1877 | case TRACE_REG_UNREGISTER: |
1751 | probe_event_disable(event); | 1878 | disable_trace_probe(tp, TP_FLAG_TRACE); |
1752 | return 0; | 1879 | return 0; |
1753 | 1880 | ||
1754 | #ifdef CONFIG_PERF_EVENTS | 1881 | #ifdef CONFIG_PERF_EVENTS |
1755 | case TRACE_REG_PERF_REGISTER: | 1882 | case TRACE_REG_PERF_REGISTER: |
1756 | return probe_perf_enable(event); | 1883 | return enable_trace_probe(tp, TP_FLAG_PROFILE); |
1757 | case TRACE_REG_PERF_UNREGISTER: | 1884 | case TRACE_REG_PERF_UNREGISTER: |
1758 | probe_perf_disable(event); | 1885 | disable_trace_probe(tp, TP_FLAG_PROFILE); |
1759 | return 0; | 1886 | return 0; |
1760 | #endif | 1887 | #endif |
1761 | } | 1888 | } |
@@ -1805,7 +1932,7 @@ static int register_probe_event(struct trace_probe *tp) | |||
1805 | 1932 | ||
1806 | /* Initialize ftrace_event_call */ | 1933 | /* Initialize ftrace_event_call */ |
1807 | INIT_LIST_HEAD(&call->class->fields); | 1934 | INIT_LIST_HEAD(&call->class->fields); |
1808 | if (probe_is_return(tp)) { | 1935 | if (trace_probe_is_return(tp)) { |
1809 | call->event.funcs = &kretprobe_funcs; | 1936 | call->event.funcs = &kretprobe_funcs; |
1810 | call->class->define_fields = kretprobe_event_define_fields; | 1937 | call->class->define_fields = kretprobe_event_define_fields; |
1811 | } else { | 1938 | } else { |
@@ -1844,6 +1971,9 @@ static __init int init_kprobe_trace(void) | |||
1844 | struct dentry *d_tracer; | 1971 | struct dentry *d_tracer; |
1845 | struct dentry *entry; | 1972 | struct dentry *entry; |
1846 | 1973 | ||
1974 | if (register_module_notifier(&trace_probe_module_nb)) | ||
1975 | return -EINVAL; | ||
1976 | |||
1847 | d_tracer = tracing_init_dentry(); | 1977 | d_tracer = tracing_init_dentry(); |
1848 | if (!d_tracer) | 1978 | if (!d_tracer) |
1849 | return 0; | 1979 | return 0; |
@@ -1897,12 +2027,12 @@ static __init int kprobe_trace_self_tests_init(void) | |||
1897 | warn++; | 2027 | warn++; |
1898 | } else { | 2028 | } else { |
1899 | /* Enable trace point */ | 2029 | /* Enable trace point */ |
1900 | tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); | 2030 | tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); |
1901 | if (WARN_ON_ONCE(tp == NULL)) { | 2031 | if (WARN_ON_ONCE(tp == NULL)) { |
1902 | pr_warning("error on getting new probe.\n"); | 2032 | pr_warning("error on getting new probe.\n"); |
1903 | warn++; | 2033 | warn++; |
1904 | } else | 2034 | } else |
1905 | probe_event_enable(&tp->call); | 2035 | enable_trace_probe(tp, TP_FLAG_TRACE); |
1906 | } | 2036 | } |
1907 | 2037 | ||
1908 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " | 2038 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " |
@@ -1912,12 +2042,12 @@ static __init int kprobe_trace_self_tests_init(void) | |||
1912 | warn++; | 2042 | warn++; |
1913 | } else { | 2043 | } else { |
1914 | /* Enable trace point */ | 2044 | /* Enable trace point */ |
1915 | tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); | 2045 | tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); |
1916 | if (WARN_ON_ONCE(tp == NULL)) { | 2046 | if (WARN_ON_ONCE(tp == NULL)) { |
1917 | pr_warning("error on getting new probe.\n"); | 2047 | pr_warning("error on getting new probe.\n"); |
1918 | warn++; | 2048 | warn++; |
1919 | } else | 2049 | } else |
1920 | probe_event_enable(&tp->call); | 2050 | enable_trace_probe(tp, TP_FLAG_TRACE); |
1921 | } | 2051 | } |
1922 | 2052 | ||
1923 | if (warn) | 2053 | if (warn) |
@@ -1938,7 +2068,7 @@ static __init int kprobe_trace_self_tests_init(void) | |||
1938 | } | 2068 | } |
1939 | 2069 | ||
1940 | end: | 2070 | end: |
1941 | cleanup_all_probes(); | 2071 | release_all_trace_probes(); |
1942 | if (warn) | 2072 | if (warn) |
1943 | pr_cont("NG: Some tests are failed. Please check them.\n"); | 2073 | pr_cont("NG: Some tests are failed. Please check them.\n"); |
1944 | else | 2074 | else |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index e37de492a9e1..51999309a6cf 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, | |||
1107 | { | 1107 | { |
1108 | struct stack_entry *field; | 1108 | struct stack_entry *field; |
1109 | struct trace_seq *s = &iter->seq; | 1109 | struct trace_seq *s = &iter->seq; |
1110 | int i; | 1110 | unsigned long *p; |
1111 | unsigned long *end; | ||
1111 | 1112 | ||
1112 | trace_assign_type(field, iter->ent); | 1113 | trace_assign_type(field, iter->ent); |
1114 | end = (unsigned long *)((long)iter->ent + iter->ent_size); | ||
1113 | 1115 | ||
1114 | if (!trace_seq_puts(s, "<stack trace>\n")) | 1116 | if (!trace_seq_puts(s, "<stack trace>\n")) |
1115 | goto partial; | 1117 | goto partial; |
1116 | for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { | 1118 | |
1117 | if (!field->caller[i] || (field->caller[i] == ULONG_MAX)) | 1119 | for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) { |
1118 | break; | ||
1119 | if (!trace_seq_puts(s, " => ")) | 1120 | if (!trace_seq_puts(s, " => ")) |
1120 | goto partial; | 1121 | goto partial; |
1121 | 1122 | ||
1122 | if (!seq_print_ip_sym(s, field->caller[i], flags)) | 1123 | if (!seq_print_ip_sym(s, *p, flags)) |
1123 | goto partial; | 1124 | goto partial; |
1124 | if (!trace_seq_puts(s, "\n")) | 1125 | if (!trace_seq_puts(s, "\n")) |
1125 | goto partial; | 1126 | goto partial; |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index f029dd4fd2ca..e4a70c0c71b6 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -227,7 +227,9 @@ static void wakeup_trace_close(struct trace_iterator *iter) | |||
227 | graph_trace_close(iter); | 227 | graph_trace_close(iter); |
228 | } | 228 | } |
229 | 229 | ||
230 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) | 230 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \ |
231 | TRACE_GRAPH_PRINT_ABS_TIME | \ | ||
232 | TRACE_GRAPH_PRINT_DURATION) | ||
231 | 233 | ||
232 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | 234 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) |
233 | { | 235 | { |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b0b53b8e4c25..77575b386d97 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -156,20 +156,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
156 | { | 156 | { |
157 | long *ptr = filp->private_data; | 157 | long *ptr = filp->private_data; |
158 | unsigned long val, flags; | 158 | unsigned long val, flags; |
159 | char buf[64]; | ||
160 | int ret; | 159 | int ret; |
161 | int cpu; | 160 | int cpu; |
162 | 161 | ||
163 | if (count >= sizeof(buf)) | 162 | ret = kstrtoul_from_user(ubuf, count, 10, &val); |
164 | return -EINVAL; | 163 | if (ret) |
165 | |||
166 | if (copy_from_user(&buf, ubuf, count)) | ||
167 | return -EFAULT; | ||
168 | |||
169 | buf[count] = 0; | ||
170 | |||
171 | ret = strict_strtoul(buf, 10, &val); | ||
172 | if (ret < 0) | ||
173 | return ret; | 164 | return ret; |
174 | 165 | ||
175 | local_irq_save(flags); | 166 | local_irq_save(flags); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 3d0c56ad4792..36491cd5b7d4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -200,6 +200,7 @@ static int is_softlockup(unsigned long touch_ts) | |||
200 | } | 200 | } |
201 | 201 | ||
202 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 202 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
203 | |||
203 | static struct perf_event_attr wd_hw_attr = { | 204 | static struct perf_event_attr wd_hw_attr = { |
204 | .type = PERF_TYPE_HARDWARE, | 205 | .type = PERF_TYPE_HARDWARE, |
205 | .config = PERF_COUNT_HW_CPU_CYCLES, | 206 | .config = PERF_COUNT_HW_CPU_CYCLES, |
@@ -209,7 +210,7 @@ static struct perf_event_attr wd_hw_attr = { | |||
209 | }; | 210 | }; |
210 | 211 | ||
211 | /* Callback function for perf event subsystem */ | 212 | /* Callback function for perf event subsystem */ |
212 | static void watchdog_overflow_callback(struct perf_event *event, int nmi, | 213 | static void watchdog_overflow_callback(struct perf_event *event, |
213 | struct perf_sample_data *data, | 214 | struct perf_sample_data *data, |
214 | struct pt_regs *regs) | 215 | struct pt_regs *regs) |
215 | { | 216 | { |
@@ -368,10 +369,11 @@ static int watchdog_nmi_enable(int cpu) | |||
368 | if (event != NULL) | 369 | if (event != NULL) |
369 | goto out_enable; | 370 | goto out_enable; |
370 | 371 | ||
371 | /* Try to register using hardware perf events */ | ||
372 | wd_attr = &wd_hw_attr; | 372 | wd_attr = &wd_hw_attr; |
373 | wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); | 373 | wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); |
374 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); | 374 | |
375 | /* Try to register using hardware perf events */ | ||
376 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | ||
375 | if (!IS_ERR(event)) { | 377 | if (!IS_ERR(event)) { |
376 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); | 378 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); |
377 | goto out_save; | 379 | goto out_save; |