aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/stackmap.c
diff options
context:
space:
mode:
authorSong Liu <songliubraving@fb.com>2018-05-07 13:50:48 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-05-14 17:29:45 -0400
commitbae77c5eb5b2107e300fb02da2311f2aa0d8ee3c (patch)
tree54161e53098e6b684e7eb03bab0082626f0307fe /kernel/bpf/stackmap.c
parenta84880ef4352c61896028448d809e32f8646628d (diff)
bpf: enable stackmap with build_id in nmi context
Currently, we cannot parse build_id in nmi context because of up_read(&current->mm->mmap_sem), this makes stackmap with build_id less useful. This patch enables parsing build_id in nmi by putting the up_read() call in irq_work. To avoid memory allocation in nmi context, we use per cpu variable for the irq_work. As a result, only one irq_work per cpu is allowed. If the irq_work is in-use, we fallback to only report ips. Cc: Alexei Starovoitov <ast@kernel.org> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/stackmap.c')
-rw-r--r--kernel/bpf/stackmap.c59
1 files changed, 53 insertions, 6 deletions
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 3ba102b41512..b59ace0f0f09 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -11,6 +11,7 @@
11#include <linux/perf_event.h> 11#include <linux/perf_event.h>
12#include <linux/elf.h> 12#include <linux/elf.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/irq_work.h>
14#include "percpu_freelist.h" 15#include "percpu_freelist.h"
15 16
16#define STACK_CREATE_FLAG_MASK \ 17#define STACK_CREATE_FLAG_MASK \
@@ -32,6 +33,23 @@ struct bpf_stack_map {
32 struct stack_map_bucket *buckets[]; 33 struct stack_map_bucket *buckets[];
33}; 34};
34 35
36/* irq_work to run up_read() for build_id lookup in nmi context */
37struct stack_map_irq_work {
38 struct irq_work irq_work;
39 struct rw_semaphore *sem;
40};
41
42static void do_up_read(struct irq_work *entry)
43{
44 struct stack_map_irq_work *work;
45
46 work = container_of(entry, struct stack_map_irq_work, irq_work);
47 up_read(work->sem);
48 work->sem = NULL;
49}
50
51static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
52
35static inline bool stack_map_use_build_id(struct bpf_map *map) 53static inline bool stack_map_use_build_id(struct bpf_map *map)
36{ 54{
37 return (map->map_flags & BPF_F_STACK_BUILD_ID); 55 return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
267{ 285{
268 int i; 286 int i;
269 struct vm_area_struct *vma; 287 struct vm_area_struct *vma;
288 bool in_nmi_ctx = in_nmi();
289 bool irq_work_busy = false;
290 struct stack_map_irq_work *work;
291
292 if (in_nmi_ctx) {
293 work = this_cpu_ptr(&up_read_work);
294 if (work->irq_work.flags & IRQ_WORK_BUSY)
295 /* cannot queue more up_read, fallback */
296 irq_work_busy = true;
297 }
270 298
271 /* 299 /*
272 * We cannot do up_read() in nmi context, so build_id lookup is 300 * We cannot do up_read() in nmi context. To do build_id lookup
273 * only supported for non-nmi events. If at some point, it is 301 * in nmi context, we need to run up_read() in irq_work. We use
274 * possible to run find_vma() without taking the semaphore, we 302 * a percpu variable to do the irq_work. If the irq_work is
275 * would like to allow build_id lookup in nmi context. 303 * already used by another lookup, we fall back to report ips.
276 * 304 *
277 * Same fallback is used for kernel stack (!user) on a stackmap 305 * Same fallback is used for kernel stack (!user) on a stackmap
278 * with build_id. 306 * with build_id.
279 */ 307 */
280 if (!user || !current || !current->mm || in_nmi() || 308 if (!user || !current || !current->mm || irq_work_busy ||
281 down_read_trylock(&current->mm->mmap_sem) == 0) { 309 down_read_trylock(&current->mm->mmap_sem) == 0) {
282 /* cannot access current->mm, fall back to ips */ 310 /* cannot access current->mm, fall back to ips */
283 for (i = 0; i < trace_nr; i++) { 311 for (i = 0; i < trace_nr; i++) {
@@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
299 - vma->vm_start; 327 - vma->vm_start;
300 id_offs[i].status = BPF_STACK_BUILD_ID_VALID; 328 id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
301 } 329 }
302 up_read(&current->mm->mmap_sem); 330
331 if (!in_nmi_ctx) {
332 up_read(&current->mm->mmap_sem);
333 } else {
334 work->sem = &current->mm->mmap_sem;
335 irq_work_queue(&work->irq_work);
336 }
303} 337}
304 338
305BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 339BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
@@ -575,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = {
575 .map_update_elem = stack_map_update_elem, 609 .map_update_elem = stack_map_update_elem,
576 .map_delete_elem = stack_map_delete_elem, 610 .map_delete_elem = stack_map_delete_elem,
577}; 611};
612
613static int __init stack_map_init(void)
614{
615 int cpu;
616 struct stack_map_irq_work *work;
617
618 for_each_possible_cpu(cpu) {
619 work = per_cpu_ptr(&up_read_work, cpu);
620 init_irq_work(&work->irq_work, do_up_read);
621 }
622 return 0;
623}
624subsys_initcall(stack_map_init);