diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-07-23 08:46:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-02 07:47:56 -0400 |
commit | 9f498cc5be7e013d8d6e4c616980ed0ffc8680d2 (patch) | |
tree | 25ef7a52d61ffd336c73b42dcf770424beb19cf3 /kernel | |
parent | e53c0994709166b111fbe9162d1a16ece7dfc45b (diff) |
perf_counter: Full task tracing
In order to be able to distinguish between no samples due to
inactivity and no samples due to task ended, Arjan asked for
PERF_EVENT_EXIT events. This is useful to the boot delay
instrumentation (bootchart) app.
This patch changes the PERF_EVENT_FORK to be emitted on every
clone, and adds PERF_EVENT_EXIT to be emitted on task exit,
after the task's counters have been closed.
This task tracing is controlled through: attr.comm || attr.mmap
and through the new attr.task field.
Suggested-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ cleaned up perf_counter.h a bit ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/perf_counter.c | 87 |
2 files changed, 59 insertions, 32 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 29b532e718f7..466531eb92cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1269,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1269 | write_unlock_irq(&tasklist_lock); | 1269 | write_unlock_irq(&tasklist_lock); |
1270 | proc_fork_connector(p); | 1270 | proc_fork_connector(p); |
1271 | cgroup_post_fork(p); | 1271 | cgroup_post_fork(p); |
1272 | perf_counter_fork(p); | ||
1272 | return p; | 1273 | return p; |
1273 | 1274 | ||
1274 | bad_fork_free_pid: | 1275 | bad_fork_free_pid: |
@@ -1410,9 +1411,6 @@ long do_fork(unsigned long clone_flags, | |||
1410 | init_completion(&vfork); | 1411 | init_completion(&vfork); |
1411 | } | 1412 | } |
1412 | 1413 | ||
1413 | if (!(clone_flags & CLONE_THREAD)) | ||
1414 | perf_counter_fork(p); | ||
1415 | |||
1416 | audit_finish_fork(p); | 1414 | audit_finish_fork(p); |
1417 | tracehook_report_clone(regs, clone_flags, nr, p); | 1415 | tracehook_report_clone(regs, clone_flags, nr, p); |
1418 | 1416 | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 48471d75ae01..199ed4771315 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1; | |||
42 | static atomic_t nr_counters __read_mostly; | 42 | static atomic_t nr_counters __read_mostly; |
43 | static atomic_t nr_mmap_counters __read_mostly; | 43 | static atomic_t nr_mmap_counters __read_mostly; |
44 | static atomic_t nr_comm_counters __read_mostly; | 44 | static atomic_t nr_comm_counters __read_mostly; |
45 | static atomic_t nr_task_counters __read_mostly; | ||
45 | 46 | ||
46 | /* | 47 | /* |
47 | * perf counter paranoia level: | 48 | * perf counter paranoia level: |
@@ -1654,6 +1655,8 @@ static void free_counter(struct perf_counter *counter) | |||
1654 | atomic_dec(&nr_mmap_counters); | 1655 | atomic_dec(&nr_mmap_counters); |
1655 | if (counter->attr.comm) | 1656 | if (counter->attr.comm) |
1656 | atomic_dec(&nr_comm_counters); | 1657 | atomic_dec(&nr_comm_counters); |
1658 | if (counter->attr.task) | ||
1659 | atomic_dec(&nr_task_counters); | ||
1657 | } | 1660 | } |
1658 | 1661 | ||
1659 | if (counter->destroy) | 1662 | if (counter->destroy) |
@@ -2831,10 +2834,12 @@ perf_counter_read_event(struct perf_counter *counter, | |||
2831 | } | 2834 | } |
2832 | 2835 | ||
2833 | /* | 2836 | /* |
2834 | * fork tracking | 2837 | * task tracking -- fork/exit |
2838 | * | ||
2839 | * enabled by: attr.comm | attr.mmap | attr.task | ||
2835 | */ | 2840 | */ |
2836 | 2841 | ||
2837 | struct perf_fork_event { | 2842 | struct perf_task_event { |
2838 | struct task_struct *task; | 2843 | struct task_struct *task; |
2839 | 2844 | ||
2840 | struct { | 2845 | struct { |
@@ -2842,37 +2847,42 @@ struct perf_fork_event { | |||
2842 | 2847 | ||
2843 | u32 pid; | 2848 | u32 pid; |
2844 | u32 ppid; | 2849 | u32 ppid; |
2850 | u32 tid; | ||
2851 | u32 ptid; | ||
2845 | } event; | 2852 | } event; |
2846 | }; | 2853 | }; |
2847 | 2854 | ||
2848 | static void perf_counter_fork_output(struct perf_counter *counter, | 2855 | static void perf_counter_task_output(struct perf_counter *counter, |
2849 | struct perf_fork_event *fork_event) | 2856 | struct perf_task_event *task_event) |
2850 | { | 2857 | { |
2851 | struct perf_output_handle handle; | 2858 | struct perf_output_handle handle; |
2852 | int size = fork_event->event.header.size; | 2859 | int size = task_event->event.header.size; |
2853 | struct task_struct *task = fork_event->task; | 2860 | struct task_struct *task = task_event->task; |
2854 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | 2861 | int ret = perf_output_begin(&handle, counter, size, 0, 0); |
2855 | 2862 | ||
2856 | if (ret) | 2863 | if (ret) |
2857 | return; | 2864 | return; |
2858 | 2865 | ||
2859 | fork_event->event.pid = perf_counter_pid(counter, task); | 2866 | task_event->event.pid = perf_counter_pid(counter, task); |
2860 | fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); | 2867 | task_event->event.ppid = perf_counter_pid(counter, task->real_parent); |
2861 | 2868 | ||
2862 | perf_output_put(&handle, fork_event->event); | 2869 | task_event->event.tid = perf_counter_tid(counter, task); |
2870 | task_event->event.ptid = perf_counter_tid(counter, task->real_parent); | ||
2871 | |||
2872 | perf_output_put(&handle, task_event->event); | ||
2863 | perf_output_end(&handle); | 2873 | perf_output_end(&handle); |
2864 | } | 2874 | } |
2865 | 2875 | ||
2866 | static int perf_counter_fork_match(struct perf_counter *counter) | 2876 | static int perf_counter_task_match(struct perf_counter *counter) |
2867 | { | 2877 | { |
2868 | if (counter->attr.comm || counter->attr.mmap) | 2878 | if (counter->attr.comm || counter->attr.mmap || counter->attr.task) |
2869 | return 1; | 2879 | return 1; |
2870 | 2880 | ||
2871 | return 0; | 2881 | return 0; |
2872 | } | 2882 | } |
2873 | 2883 | ||
2874 | static void perf_counter_fork_ctx(struct perf_counter_context *ctx, | 2884 | static void perf_counter_task_ctx(struct perf_counter_context *ctx, |
2875 | struct perf_fork_event *fork_event) | 2885 | struct perf_task_event *task_event) |
2876 | { | 2886 | { |
2877 | struct perf_counter *counter; | 2887 | struct perf_counter *counter; |
2878 | 2888 | ||
@@ -2881,19 +2891,19 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx, | |||
2881 | 2891 | ||
2882 | rcu_read_lock(); | 2892 | rcu_read_lock(); |
2883 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | 2893 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { |
2884 | if (perf_counter_fork_match(counter)) | 2894 | if (perf_counter_task_match(counter)) |
2885 | perf_counter_fork_output(counter, fork_event); | 2895 | perf_counter_task_output(counter, task_event); |
2886 | } | 2896 | } |
2887 | rcu_read_unlock(); | 2897 | rcu_read_unlock(); |
2888 | } | 2898 | } |
2889 | 2899 | ||
2890 | static void perf_counter_fork_event(struct perf_fork_event *fork_event) | 2900 | static void perf_counter_task_event(struct perf_task_event *task_event) |
2891 | { | 2901 | { |
2892 | struct perf_cpu_context *cpuctx; | 2902 | struct perf_cpu_context *cpuctx; |
2893 | struct perf_counter_context *ctx; | 2903 | struct perf_counter_context *ctx; |
2894 | 2904 | ||
2895 | cpuctx = &get_cpu_var(perf_cpu_context); | 2905 | cpuctx = &get_cpu_var(perf_cpu_context); |
2896 | perf_counter_fork_ctx(&cpuctx->ctx, fork_event); | 2906 | perf_counter_task_ctx(&cpuctx->ctx, task_event); |
2897 | put_cpu_var(perf_cpu_context); | 2907 | put_cpu_var(perf_cpu_context); |
2898 | 2908 | ||
2899 | rcu_read_lock(); | 2909 | rcu_read_lock(); |
@@ -2903,32 +2913,40 @@ static void perf_counter_fork_event(struct perf_fork_event *fork_event) | |||
2903 | */ | 2913 | */ |
2904 | ctx = rcu_dereference(current->perf_counter_ctxp); | 2914 | ctx = rcu_dereference(current->perf_counter_ctxp); |
2905 | if (ctx) | 2915 | if (ctx) |
2906 | perf_counter_fork_ctx(ctx, fork_event); | 2916 | perf_counter_task_ctx(ctx, task_event); |
2907 | rcu_read_unlock(); | 2917 | rcu_read_unlock(); |
2908 | } | 2918 | } |
2909 | 2919 | ||
2910 | void perf_counter_fork(struct task_struct *task) | 2920 | static void perf_counter_task(struct task_struct *task, int new) |
2911 | { | 2921 | { |
2912 | struct perf_fork_event fork_event; | 2922 | struct perf_task_event task_event; |
2913 | 2923 | ||
2914 | if (!atomic_read(&nr_comm_counters) && | 2924 | if (!atomic_read(&nr_comm_counters) && |
2915 | !atomic_read(&nr_mmap_counters)) | 2925 | !atomic_read(&nr_mmap_counters) && |
2926 | !atomic_read(&nr_task_counters)) | ||
2916 | return; | 2927 | return; |
2917 | 2928 | ||
2918 | fork_event = (struct perf_fork_event){ | 2929 | task_event = (struct perf_task_event){ |
2919 | .task = task, | 2930 | .task = task, |
2920 | .event = { | 2931 | .event = { |
2921 | .header = { | 2932 | .header = { |
2922 | .type = PERF_EVENT_FORK, | 2933 | .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, |
2923 | .misc = 0, | 2934 | .misc = 0, |
2924 | .size = sizeof(fork_event.event), | 2935 | .size = sizeof(task_event.event), |
2925 | }, | 2936 | }, |
2926 | /* .pid */ | 2937 | /* .pid */ |
2927 | /* .ppid */ | 2938 | /* .ppid */ |
2939 | /* .tid */ | ||
2940 | /* .ptid */ | ||
2928 | }, | 2941 | }, |
2929 | }; | 2942 | }; |
2930 | 2943 | ||
2931 | perf_counter_fork_event(&fork_event); | 2944 | perf_counter_task_event(&task_event); |
2945 | } | ||
2946 | |||
2947 | void perf_counter_fork(struct task_struct *task) | ||
2948 | { | ||
2949 | perf_counter_task(task, 1); | ||
2932 | } | 2950 | } |
2933 | 2951 | ||
2934 | /* | 2952 | /* |
@@ -3887,6 +3905,8 @@ done: | |||
3887 | atomic_inc(&nr_mmap_counters); | 3905 | atomic_inc(&nr_mmap_counters); |
3888 | if (counter->attr.comm) | 3906 | if (counter->attr.comm) |
3889 | atomic_inc(&nr_comm_counters); | 3907 | atomic_inc(&nr_comm_counters); |
3908 | if (counter->attr.task) | ||
3909 | atomic_inc(&nr_task_counters); | ||
3890 | } | 3910 | } |
3891 | 3911 | ||
3892 | return counter; | 3912 | return counter; |
@@ -4248,8 +4268,10 @@ void perf_counter_exit_task(struct task_struct *child) | |||
4248 | struct perf_counter_context *child_ctx; | 4268 | struct perf_counter_context *child_ctx; |
4249 | unsigned long flags; | 4269 | unsigned long flags; |
4250 | 4270 | ||
4251 | if (likely(!child->perf_counter_ctxp)) | 4271 | if (likely(!child->perf_counter_ctxp)) { |
4272 | perf_counter_task(child, 0); | ||
4252 | return; | 4273 | return; |
4274 | } | ||
4253 | 4275 | ||
4254 | local_irq_save(flags); | 4276 | local_irq_save(flags); |
4255 | /* | 4277 | /* |
@@ -4267,15 +4289,22 @@ void perf_counter_exit_task(struct task_struct *child) | |||
4267 | * incremented the context's refcount before we do put_ctx below. | 4289 | * incremented the context's refcount before we do put_ctx below. |
4268 | */ | 4290 | */ |
4269 | spin_lock(&child_ctx->lock); | 4291 | spin_lock(&child_ctx->lock); |
4270 | child->perf_counter_ctxp = NULL; | ||
4271 | /* | 4292 | /* |
4272 | * If this context is a clone; unclone it so it can't get | 4293 | * If this context is a clone; unclone it so it can't get |
4273 | * swapped to another process while we're removing all | 4294 | * swapped to another process while we're removing all |
4274 | * the counters from it. | 4295 | * the counters from it. |
4275 | */ | 4296 | */ |
4276 | unclone_ctx(child_ctx); | 4297 | unclone_ctx(child_ctx); |
4277 | spin_unlock(&child_ctx->lock); | 4298 | spin_unlock_irqrestore(&child_ctx->lock, flags); |
4278 | local_irq_restore(flags); | 4299 | |
4300 | /* | ||
4301 | * Report the task dead after unscheduling the counters so that we | ||
4302 | * won't get any samples after PERF_EVENT_EXIT. We can however still | ||
4303 | * get a few PERF_EVENT_READ events. | ||
4304 | */ | ||
4305 | perf_counter_task(child, 0); | ||
4306 | |||
4307 | child->perf_counter_ctxp = NULL; | ||
4279 | 4308 | ||
4280 | /* | 4309 | /* |
4281 | * We can recurse on the same lock type through: | 4310 | * We can recurse on the same lock type through: |