aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2016-04-28 12:16:33 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-05-30 11:41:44 -0400
commit97c79a38cd454602645f0470ffb444b3b75ce574 (patch)
tree40e427e9c76c5e4e1a5f2da1f3ec81e34223fa9b
parent480ca357fd7f86a381a5b35a8157aa176eddbed4 (diff)
perf core: Per event callchain limit
Additionally to being able to control the system wide maximum depth via /proc/sys/kernel/perf_event_max_stack, now we are able to ask for different depths per event, using perf_event_attr.sample_max_stack for that. This uses an u16 hole at the end of perf_event_attr, that, when perf_event_attr.sample_type has the PERF_SAMPLE_CALLCHAIN, if sample_max_stack is zero, means use perf_event_max_stack, otherwise it'll be bounds checked under callchain_mutex. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Milian Wolff <milian.wolff@kdab.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: Wang Nan <wangnan0@huawei.com> Cc: Zefan Li <lizefan@huawei.com> Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--include/uapi/linux/perf_event.h6
-rw-r--r--kernel/bpf/stackmap.c2
-rw-r--r--kernel/events/callchain.c14
-rw-r--r--kernel/events/core.c5
5 files changed, 23 insertions, 6 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6b87be908790..0e43355c7aad 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1076,7 +1076,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
1076extern struct perf_callchain_entry * 1076extern struct perf_callchain_entry *
1077get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, 1077get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
1078 u32 max_stack, bool crosstask, bool add_mark); 1078 u32 max_stack, bool crosstask, bool add_mark);
1079extern int get_callchain_buffers(void); 1079extern int get_callchain_buffers(int max_stack);
1080extern void put_callchain_buffers(void); 1080extern void put_callchain_buffers(void);
1081 1081
1082extern int sysctl_perf_event_max_stack; 1082extern int sysctl_perf_event_max_stack;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 36ce552cf6a9..c66a485a24ac 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -276,6 +276,9 @@ enum perf_event_read_format {
276 276
277/* 277/*
278 * Hardware event_id to monitor via a performance monitoring event: 278 * Hardware event_id to monitor via a performance monitoring event:
279 *
280 * @sample_max_stack: Max number of frame pointers in a callchain,
281 * should be < /proc/sys/kernel/perf_event_max_stack
279 */ 282 */
280struct perf_event_attr { 283struct perf_event_attr {
281 284
@@ -385,7 +388,8 @@ struct perf_event_attr {
385 * Wakeup watermark for AUX area 388 * Wakeup watermark for AUX area
386 */ 389 */
387 __u32 aux_watermark; 390 __u32 aux_watermark;
388 __u32 __reserved_2; /* align to __u64 */ 391 __u16 sample_max_stack;
392 __u16 __reserved_2; /* align to __u64 */
389}; 393};
390 394
391#define perf_flags(attr) (*(&(attr)->read_format + 1)) 395#define perf_flags(attr) (*(&(attr)->read_format + 1))
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index a82d7605db3f..f1de5c1a2af6 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
99 if (err) 99 if (err)
100 goto free_smap; 100 goto free_smap;
101 101
102 err = get_callchain_buffers(); 102 err = get_callchain_buffers(sysctl_perf_event_max_stack);
103 if (err) 103 if (err)
104 goto free_smap; 104 goto free_smap;
105 105
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 179ef4640964..e9fdb5203de5 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -104,7 +104,7 @@ fail:
104 return -ENOMEM; 104 return -ENOMEM;
105} 105}
106 106
107int get_callchain_buffers(void) 107int get_callchain_buffers(int event_max_stack)
108{ 108{
109 int err = 0; 109 int err = 0;
110 int count; 110 int count;
@@ -121,6 +121,15 @@ int get_callchain_buffers(void)
121 /* If the allocation failed, give up */ 121 /* If the allocation failed, give up */
122 if (!callchain_cpus_entries) 122 if (!callchain_cpus_entries)
123 err = -ENOMEM; 123 err = -ENOMEM;
124 /*
125 * If requesting per event more than the global cap,
126 * return a different error to help userspace figure
127 * this out.
128 *
129 * And also do it here so that we have &callchain_mutex held.
130 */
131 if (event_max_stack > sysctl_perf_event_max_stack)
132 err = -EOVERFLOW;
124 goto exit; 133 goto exit;
125 } 134 }
126 135
@@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
174 bool user = !event->attr.exclude_callchain_user; 183 bool user = !event->attr.exclude_callchain_user;
175 /* Disallow cross-task user callchains. */ 184 /* Disallow cross-task user callchains. */
176 bool crosstask = event->ctx->task && event->ctx->task != current; 185 bool crosstask = event->ctx->task && event->ctx->task != current;
186 const u32 max_stack = event->attr.sample_max_stack;
177 187
178 if (!kernel && !user) 188 if (!kernel && !user)
179 return NULL; 189 return NULL;
180 190
181 return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true); 191 return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
182} 192}
183 193
184struct perf_callchain_entry * 194struct perf_callchain_entry *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 050a290c72c7..79363f298445 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8843,7 +8843,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
8843 8843
8844 if (!event->parent) { 8844 if (!event->parent) {
8845 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { 8845 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
8846 err = get_callchain_buffers(); 8846 err = get_callchain_buffers(attr->sample_max_stack);
8847 if (err) 8847 if (err)
8848 goto err_addr_filters; 8848 goto err_addr_filters;
8849 } 8849 }
@@ -9165,6 +9165,9 @@ SYSCALL_DEFINE5(perf_event_open,
9165 return -EINVAL; 9165 return -EINVAL;
9166 } 9166 }
9167 9167
9168 if (!attr.sample_max_stack)
9169 attr.sample_max_stack = sysctl_perf_event_max_stack;
9170
9168 /* 9171 /*
9169 * In cgroup mode, the pid argument is used to pass the fd 9172 * In cgroup mode, the pid argument is used to pass the fd
9170 * opened to the cgroup directory in cgroupfs. The cpu argument 9173 * opened to the cgroup directory in cgroupfs. The cpu argument