diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2016-04-28 12:16:33 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2016-05-30 11:41:44 -0400 |
commit | 97c79a38cd454602645f0470ffb444b3b75ce574 (patch) | |
tree | 40e427e9c76c5e4e1a5f2da1f3ec81e34223fa9b | |
parent | 480ca357fd7f86a381a5b35a8157aa176eddbed4 (diff) |
perf core: Per event callchain limit
Additionally to being able to control the system wide maximum depth via
/proc/sys/kernel/perf_event_max_stack, now we are able to ask for
different depths per event, using perf_event_attr.sample_max_stack for
that.
This uses an u16 hole at the end of perf_event_attr, that, when
perf_event_attr.sample_type has the PERF_SAMPLE_CALLCHAIN, if
sample_max_stack is zero, means use perf_event_max_stack, otherwise
it'll be bounds checked under callchain_mutex.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | include/linux/perf_event.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 6 | ||||
-rw-r--r-- | kernel/bpf/stackmap.c | 2 | ||||
-rw-r--r-- | kernel/events/callchain.c | 14 | ||||
-rw-r--r-- | kernel/events/core.c | 5 |
5 files changed, 23 insertions, 6 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6b87be908790..0e43355c7aad 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -1076,7 +1076,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct | |||
1076 | extern struct perf_callchain_entry * | 1076 | extern struct perf_callchain_entry * |
1077 | get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, | 1077 | get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, |
1078 | u32 max_stack, bool crosstask, bool add_mark); | 1078 | u32 max_stack, bool crosstask, bool add_mark); |
1079 | extern int get_callchain_buffers(void); | 1079 | extern int get_callchain_buffers(int max_stack); |
1080 | extern void put_callchain_buffers(void); | 1080 | extern void put_callchain_buffers(void); |
1081 | 1081 | ||
1082 | extern int sysctl_perf_event_max_stack; | 1082 | extern int sysctl_perf_event_max_stack; |
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 36ce552cf6a9..c66a485a24ac 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h | |||
@@ -276,6 +276,9 @@ enum perf_event_read_format { | |||
276 | 276 | ||
277 | /* | 277 | /* |
278 | * Hardware event_id to monitor via a performance monitoring event: | 278 | * Hardware event_id to monitor via a performance monitoring event: |
279 | * | ||
280 | * @sample_max_stack: Max number of frame pointers in a callchain, | ||
281 | * should be < /proc/sys/kernel/perf_event_max_stack | ||
279 | */ | 282 | */ |
280 | struct perf_event_attr { | 283 | struct perf_event_attr { |
281 | 284 | ||
@@ -385,7 +388,8 @@ struct perf_event_attr { | |||
385 | * Wakeup watermark for AUX area | 388 | * Wakeup watermark for AUX area |
386 | */ | 389 | */ |
387 | __u32 aux_watermark; | 390 | __u32 aux_watermark; |
388 | __u32 __reserved_2; /* align to __u64 */ | 391 | __u16 sample_max_stack; |
392 | __u16 __reserved_2; /* align to __u64 */ | ||
389 | }; | 393 | }; |
390 | 394 | ||
391 | #define perf_flags(attr) (*(&(attr)->read_format + 1)) | 395 | #define perf_flags(attr) (*(&(attr)->read_format + 1)) |
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index a82d7605db3f..f1de5c1a2af6 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
@@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | |||
99 | if (err) | 99 | if (err) |
100 | goto free_smap; | 100 | goto free_smap; |
101 | 101 | ||
102 | err = get_callchain_buffers(); | 102 | err = get_callchain_buffers(sysctl_perf_event_max_stack); |
103 | if (err) | 103 | if (err) |
104 | goto free_smap; | 104 | goto free_smap; |
105 | 105 | ||
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 179ef4640964..e9fdb5203de5 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c | |||
@@ -104,7 +104,7 @@ fail: | |||
104 | return -ENOMEM; | 104 | return -ENOMEM; |
105 | } | 105 | } |
106 | 106 | ||
107 | int get_callchain_buffers(void) | 107 | int get_callchain_buffers(int event_max_stack) |
108 | { | 108 | { |
109 | int err = 0; | 109 | int err = 0; |
110 | int count; | 110 | int count; |
@@ -121,6 +121,15 @@ int get_callchain_buffers(void) | |||
121 | /* If the allocation failed, give up */ | 121 | /* If the allocation failed, give up */ |
122 | if (!callchain_cpus_entries) | 122 | if (!callchain_cpus_entries) |
123 | err = -ENOMEM; | 123 | err = -ENOMEM; |
124 | /* | ||
125 | * If requesting per event more than the global cap, | ||
126 | * return a different error to help userspace figure | ||
127 | * this out. | ||
128 | * | ||
129 | * And also do it here so that we have &callchain_mutex held. | ||
130 | */ | ||
131 | if (event_max_stack > sysctl_perf_event_max_stack) | ||
132 | err = -EOVERFLOW; | ||
124 | goto exit; | 133 | goto exit; |
125 | } | 134 | } |
126 | 135 | ||
@@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) | |||
174 | bool user = !event->attr.exclude_callchain_user; | 183 | bool user = !event->attr.exclude_callchain_user; |
175 | /* Disallow cross-task user callchains. */ | 184 | /* Disallow cross-task user callchains. */ |
176 | bool crosstask = event->ctx->task && event->ctx->task != current; | 185 | bool crosstask = event->ctx->task && event->ctx->task != current; |
186 | const u32 max_stack = event->attr.sample_max_stack; | ||
177 | 187 | ||
178 | if (!kernel && !user) | 188 | if (!kernel && !user) |
179 | return NULL; | 189 | return NULL; |
180 | 190 | ||
181 | return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true); | 191 | return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true); |
182 | } | 192 | } |
183 | 193 | ||
184 | struct perf_callchain_entry * | 194 | struct perf_callchain_entry * |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 050a290c72c7..79363f298445 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -8843,7 +8843,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
8843 | 8843 | ||
8844 | if (!event->parent) { | 8844 | if (!event->parent) { |
8845 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { | 8845 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { |
8846 | err = get_callchain_buffers(); | 8846 | err = get_callchain_buffers(attr->sample_max_stack); |
8847 | if (err) | 8847 | if (err) |
8848 | goto err_addr_filters; | 8848 | goto err_addr_filters; |
8849 | } | 8849 | } |
@@ -9165,6 +9165,9 @@ SYSCALL_DEFINE5(perf_event_open, | |||
9165 | return -EINVAL; | 9165 | return -EINVAL; |
9166 | } | 9166 | } |
9167 | 9167 | ||
9168 | if (!attr.sample_max_stack) | ||
9169 | attr.sample_max_stack = sysctl_perf_event_max_stack; | ||
9170 | |||
9168 | /* | 9171 | /* |
9169 | * In cgroup mode, the pid argument is used to pass the fd | 9172 | * In cgroup mode, the pid argument is used to pass the fd |
9170 | * opened to the cgroup directory in cgroupfs. The cpu argument | 9173 | * opened to the cgroup directory in cgroupfs. The cpu argument |