aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2016-04-21 11:28:50 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-04-27 09:20:39 -0400
commitc5dfd78eb79851e278b7973031b9ca363da87a7e (patch)
treeeb48703a86c059b4de2a13e4c7021232c22e3715 /kernel
parentc2a218c63ba36946aca5943c0c8ebd3a42e3dc4b (diff)
perf core: Allow setting up max frame stack depth via sysctl
The default remains 127, which is good for most cases, and not even hit most of the time, but then for some cases, as reported by Brendan, 1024+ deep frames are appearing on the radar for things like groovy, ruby. And in some workloads putting a _lower_ cap on this may make sense. One that is per event still needs to be put in place tho. The new file is: # cat /proc/sys/kernel/perf_event_max_stack 127 Chaging it: # echo 256 > /proc/sys/kernel/perf_event_max_stack # cat /proc/sys/kernel/perf_event_max_stack 256 But as soon as there is some event using callchains we get: # echo 512 > /proc/sys/kernel/perf_event_max_stack -bash: echo: write error: Device or resource busy # Because we only allocate the callchain percpu data structures when there is a user, which allows for changing the max easily, its just a matter of having no callchain users at that point. Reported-and-Tested-by: Brendan Gregg <brendan.d.gregg@gmail.com> Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Acked-by: David Ahern <dsahern@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Milian Wolff <milian.wolff@kdab.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: Wang Nan <wangnan0@huawei.com> Cc: Zefan Li <lizefan@huawei.com> Link: http://lkml.kernel.org/r/20160426002928.GB16708@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/stackmap.c8
-rw-r--r--kernel/events/callchain.c35
-rw-r--r--kernel/sysctl.c12
3 files changed, 49 insertions, 6 deletions
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 499d9e933f8e..f5a19548be12 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
66 /* check sanity of attributes */ 66 /* check sanity of attributes */
67 if (attr->max_entries == 0 || attr->key_size != 4 || 67 if (attr->max_entries == 0 || attr->key_size != 4 ||
68 value_size < 8 || value_size % 8 || 68 value_size < 8 || value_size % 8 ||
69 value_size / 8 > PERF_MAX_STACK_DEPTH) 69 value_size / 8 > sysctl_perf_event_max_stack)
70 return ERR_PTR(-EINVAL); 70 return ERR_PTR(-EINVAL);
71 71
72 /* hash table size must be power of 2 */ 72 /* hash table size must be power of 2 */
@@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
124 struct perf_callchain_entry *trace; 124 struct perf_callchain_entry *trace;
125 struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 125 struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
126 u32 max_depth = map->value_size / 8; 126 u32 max_depth = map->value_size / 8;
127 /* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */ 127 /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
128 u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth; 128 u32 init_nr = sysctl_perf_event_max_stack - max_depth;
129 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 129 u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
130 u32 hash, id, trace_nr, trace_len; 130 u32 hash, id, trace_nr, trace_len;
131 bool user = flags & BPF_F_USER_STACK; 131 bool user = flags & BPF_F_USER_STACK;
@@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
143 return -EFAULT; 143 return -EFAULT;
144 144
145 /* get_perf_callchain() guarantees that trace->nr >= init_nr 145 /* get_perf_callchain() guarantees that trace->nr >= init_nr
146 * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth 146 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
147 */ 147 */
148 trace_nr = trace->nr - init_nr; 148 trace_nr = trace->nr - init_nr;
149 149
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 343c22f5e867..b9325e7dcba1 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -18,6 +18,14 @@ struct callchain_cpus_entries {
18 struct perf_callchain_entry *cpu_entries[0]; 18 struct perf_callchain_entry *cpu_entries[0];
19}; 19};
20 20
21int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
22
23static inline size_t perf_callchain_entry__sizeof(void)
24{
25 return (sizeof(struct perf_callchain_entry) +
26 sizeof(__u64) * sysctl_perf_event_max_stack);
27}
28
21static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); 29static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
22static atomic_t nr_callchain_events; 30static atomic_t nr_callchain_events;
23static DEFINE_MUTEX(callchain_mutex); 31static DEFINE_MUTEX(callchain_mutex);
@@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
73 if (!entries) 81 if (!entries)
74 return -ENOMEM; 82 return -ENOMEM;
75 83
76 size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; 84 size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
77 85
78 for_each_possible_cpu(cpu) { 86 for_each_possible_cpu(cpu) {
79 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, 87 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
@@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
147 155
148 cpu = smp_processor_id(); 156 cpu = smp_processor_id();
149 157
150 return &entries->cpu_entries[cpu][*rctx]; 158 return (((void *)entries->cpu_entries[cpu]) +
159 (*rctx * perf_callchain_entry__sizeof()));
151} 160}
152 161
153static void 162static void
@@ -215,3 +224,25 @@ exit_put:
215 224
216 return entry; 225 return entry;
217} 226}
227
228int perf_event_max_stack_handler(struct ctl_table *table, int write,
229 void __user *buffer, size_t *lenp, loff_t *ppos)
230{
231 int new_value = sysctl_perf_event_max_stack, ret;
232 struct ctl_table new_table = *table;
233
234 new_table.data = &new_value;
235 ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
236 if (ret || !write)
237 return ret;
238
239 mutex_lock(&callchain_mutex);
240 if (atomic_read(&nr_callchain_events))
241 ret = -EBUSY;
242 else
243 sysctl_perf_event_max_stack = new_value;
244
245 mutex_unlock(&callchain_mutex);
246
247 return ret;
248}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 725587f10667..c8b318663525 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -130,6 +130,9 @@ static int one_thousand = 1000;
130#ifdef CONFIG_PRINTK 130#ifdef CONFIG_PRINTK
131static int ten_thousand = 10000; 131static int ten_thousand = 10000;
132#endif 132#endif
133#ifdef CONFIG_PERF_EVENTS
134static int six_hundred_forty_kb = 640 * 1024;
135#endif
133 136
134/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ 137/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
135static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; 138static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = {
1144 .extra1 = &zero, 1147 .extra1 = &zero,
1145 .extra2 = &one_hundred, 1148 .extra2 = &one_hundred,
1146 }, 1149 },
1150 {
1151 .procname = "perf_event_max_stack",
1152 .data = NULL, /* filled in by handler */
1153 .maxlen = sizeof(sysctl_perf_event_max_stack),
1154 .mode = 0644,
1155 .proc_handler = perf_event_max_stack_handler,
1156 .extra1 = &zero,
1157 .extra2 = &six_hundred_forty_kb,
1158 },
1147#endif 1159#endif
1148#ifdef CONFIG_KMEMCHECK 1160#ifdef CONFIG_KMEMCHECK
1149 { 1161 {