diff options
author | Song Liu <songliubraving@fb.com> | 2018-05-07 13:50:48 -0400 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-05-14 17:29:45 -0400 |
commit | bae77c5eb5b2107e300fb02da2311f2aa0d8ee3c (patch) | |
tree | 54161e53098e6b684e7eb03bab0082626f0307fe /kernel/bpf/stackmap.c | |
parent | a84880ef4352c61896028448d809e32f8646628d (diff) |
bpf: enable stackmap with build_id in nmi context
Currently, we cannot parse build_id in nmi context because of
up_read(¤t->mm->mmap_sem), this makes stackmap with build_id
less useful. This patch enables parsing build_id in nmi by putting
the up_read() call in irq_work. To avoid memory allocation in nmi
context, we use per cpu variable for the irq_work. As a result, only
one irq_work per cpu is allowed. If the irq_work is in-use, we
fallback to only report ips.
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/stackmap.c')
-rw-r--r-- | kernel/bpf/stackmap.c | 59 |
1 files changed, 53 insertions, 6 deletions
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 3ba102b41512..b59ace0f0f09 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/perf_event.h> | 11 | #include <linux/perf_event.h> |
12 | #include <linux/elf.h> | 12 | #include <linux/elf.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/irq_work.h> | ||
14 | #include "percpu_freelist.h" | 15 | #include "percpu_freelist.h" |
15 | 16 | ||
16 | #define STACK_CREATE_FLAG_MASK \ | 17 | #define STACK_CREATE_FLAG_MASK \ |
@@ -32,6 +33,23 @@ struct bpf_stack_map { | |||
32 | struct stack_map_bucket *buckets[]; | 33 | struct stack_map_bucket *buckets[]; |
33 | }; | 34 | }; |
34 | 35 | ||
36 | /* irq_work to run up_read() for build_id lookup in nmi context */ | ||
37 | struct stack_map_irq_work { | ||
38 | struct irq_work irq_work; | ||
39 | struct rw_semaphore *sem; | ||
40 | }; | ||
41 | |||
42 | static void do_up_read(struct irq_work *entry) | ||
43 | { | ||
44 | struct stack_map_irq_work *work; | ||
45 | |||
46 | work = container_of(entry, struct stack_map_irq_work, irq_work); | ||
47 | up_read(work->sem); | ||
48 | work->sem = NULL; | ||
49 | } | ||
50 | |||
51 | static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); | ||
52 | |||
35 | static inline bool stack_map_use_build_id(struct bpf_map *map) | 53 | static inline bool stack_map_use_build_id(struct bpf_map *map) |
36 | { | 54 | { |
37 | return (map->map_flags & BPF_F_STACK_BUILD_ID); | 55 | return (map->map_flags & BPF_F_STACK_BUILD_ID); |
@@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
267 | { | 285 | { |
268 | int i; | 286 | int i; |
269 | struct vm_area_struct *vma; | 287 | struct vm_area_struct *vma; |
288 | bool in_nmi_ctx = in_nmi(); | ||
289 | bool irq_work_busy = false; | ||
290 | struct stack_map_irq_work *work; | ||
291 | |||
292 | if (in_nmi_ctx) { | ||
293 | work = this_cpu_ptr(&up_read_work); | ||
294 | if (work->irq_work.flags & IRQ_WORK_BUSY) | ||
295 | /* cannot queue more up_read, fallback */ | ||
296 | irq_work_busy = true; | ||
297 | } | ||
270 | 298 | ||
271 | /* | 299 | /* |
272 | * We cannot do up_read() in nmi context, so build_id lookup is | 300 | * We cannot do up_read() in nmi context. To do build_id lookup |
273 | * only supported for non-nmi events. If at some point, it is | 301 | * in nmi context, we need to run up_read() in irq_work. We use |
274 | * possible to run find_vma() without taking the semaphore, we | 302 | * a percpu variable to do the irq_work. If the irq_work is |
275 | * would like to allow build_id lookup in nmi context. | 303 | * already used by another lookup, we fall back to report ips. |
276 | * | 304 | * |
277 | * Same fallback is used for kernel stack (!user) on a stackmap | 305 | * Same fallback is used for kernel stack (!user) on a stackmap |
278 | * with build_id. | 306 | * with build_id. |
279 | */ | 307 | */ |
280 | if (!user || !current || !current->mm || in_nmi() || | 308 | if (!user || !current || !current->mm || irq_work_busy || |
281 | down_read_trylock(¤t->mm->mmap_sem) == 0) { | 309 | down_read_trylock(¤t->mm->mmap_sem) == 0) { |
282 | /* cannot access current->mm, fall back to ips */ | 310 | /* cannot access current->mm, fall back to ips */ |
283 | for (i = 0; i < trace_nr; i++) { | 311 | for (i = 0; i < trace_nr; i++) { |
@@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
299 | - vma->vm_start; | 327 | - vma->vm_start; |
300 | id_offs[i].status = BPF_STACK_BUILD_ID_VALID; | 328 | id_offs[i].status = BPF_STACK_BUILD_ID_VALID; |
301 | } | 329 | } |
302 | up_read(¤t->mm->mmap_sem); | 330 | |
331 | if (!in_nmi_ctx) { | ||
332 | up_read(¤t->mm->mmap_sem); | ||
333 | } else { | ||
334 | work->sem = ¤t->mm->mmap_sem; | ||
335 | irq_work_queue(&work->irq_work); | ||
336 | } | ||
303 | } | 337 | } |
304 | 338 | ||
305 | BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, | 339 | BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, |
@@ -575,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = { | |||
575 | .map_update_elem = stack_map_update_elem, | 609 | .map_update_elem = stack_map_update_elem, |
576 | .map_delete_elem = stack_map_delete_elem, | 610 | .map_delete_elem = stack_map_delete_elem, |
577 | }; | 611 | }; |
612 | |||
613 | static int __init stack_map_init(void) | ||
614 | { | ||
615 | int cpu; | ||
616 | struct stack_map_irq_work *work; | ||
617 | |||
618 | for_each_possible_cpu(cpu) { | ||
619 | work = per_cpu_ptr(&up_read_work, cpu); | ||
620 | init_irq_work(&work->irq_work, do_up_read); | ||
621 | } | ||
622 | return 0; | ||
623 | } | ||
624 | subsys_initcall(stack_map_init); | ||