aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/kallsyms.c
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2017-02-16 16:24:50 -0500
committerDavid S. Miller <davem@davemloft.net>2017-02-17 13:40:05 -0500
commit74451e66d516c55e309e8d89a4a1e7596e46aacd (patch)
tree62d6243fab97ce01fee6863bae0f36e87efcb92b /kernel/kallsyms.c
parent9383191da4e40360a5d880fbe6bb03911c61621b (diff)
bpf: make jited programs visible in traces
Long standing issue with JITed programs is that stack traces from function tracing check whether a given address is kernel code through {__,}kernel_text_address(), which checks for code in core kernel, modules and dynamically allocated ftrace trampolines. But what is still missing is BPF JITed programs (interpreted programs are not an issue as __bpf_prog_run() will be attributed to them), thus when a stack trace is triggered, the code walking the stack won't see any of the JITed ones. The same for address correlation done from user space via reading /proc/kallsyms. This is read by tools like perf, but the latter is also useful for permanent live tracing with eBPF itself in combination with stack maps when other eBPF types are part of the callchain. See offwaketime example on dumping stack from a map. This work tries to tackle that issue by making the addresses and symbols known to the kernel. The lookup from *kernel_text_address() is implemented through a latched RB tree that can be read under RCU in fast-path that is also shared for symbol/size/offset lookup for a specific given address in kallsyms. The slow-path iteration through all symbols in the seq file done via RCU list, which holds a tiny fraction of all exported ksyms, usually below 0.1 percent. Function symbols are exported as bpf_prog_<tag>, in order to aide debugging and attribution. This facility is currently enabled for root-only when bpf_jit_kallsyms is set to 1, and disabled if hardening is active in any mode. The rationale behind this is that still a lot of systems ship with world read permissions on kallsyms thus addresses should not get suddenly exposed for them. If that situation gets much better in future, we always have the option to change the default on this. Likewise, unprivileged programs are not allowed to add entries there either, but that is less of a concern as most such programs types relevant in this context are for root-only anyway. If enabled, call graphs and stack traces will then show a correct attribution; one example is illustrated below, where the trace is now visible in tooling such as perf script --kallsyms=/proc/kallsyms and friends. Before: 7fff8166889d bpf_clone_redirect+0x80007f0020ed (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff006451f1a007 (/usr/lib64/libc-2.18.so) After: 7fff816688b7 bpf_clone_redirect+0x80007f002107 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa0575728 bpf_prog_33c45a467c9e061a+0x8000600020fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa07ef1fc cls_bpf_classify+0x8000600020dc (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81678b68 tc_classify+0x80007f002078 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d40b __netif_receive_skb_core+0x80007f0025fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d718 __netif_receive_skb+0x80007f002018 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164e565 process_backlog+0x80007f002095 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164dc71 net_rx_action+0x80007f002231 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81767461 __softirqentry_text_start+0x80007f0020d1 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817658ac do_softirq_own_stack+0x80007f00201c (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2c20 do_softirq+0x80007f002050 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2cb5 __local_bh_enable_ip+0x80007f002085 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168d452 ip_finish_output2+0x80007f002152 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168ea3d ip_finish_output+0x80007f00217d (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168f2af ip_output+0x80007f00203f (/lib/modules/4.9.0-rc8+/build/vmlinux) [...] 7fff81005854 do_syscall_64+0x80007f002054 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817649eb return_from_SYSCALL_64+0x80007f002000 (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff01c484812007 (/usr/lib64/libc-2.18.so) Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/kallsyms.c')
-rw-r--r--kernel/kallsyms.c61
1 files changed, 49 insertions, 12 deletions
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index fafd1a3ef0da..6a3b249a2ae1 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/ctype.h> 24#include <linux/ctype.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/filter.h>
26#include <linux/compiler.h> 27#include <linux/compiler.h>
27 28
28#include <asm/sections.h> 29#include <asm/sections.h>
@@ -300,10 +301,11 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
300 unsigned long *offset) 301 unsigned long *offset)
301{ 302{
302 char namebuf[KSYM_NAME_LEN]; 303 char namebuf[KSYM_NAME_LEN];
304
303 if (is_ksym_addr(addr)) 305 if (is_ksym_addr(addr))
304 return !!get_symbol_pos(addr, symbolsize, offset); 306 return !!get_symbol_pos(addr, symbolsize, offset);
305 307 return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) ||
306 return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf); 308 !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
307} 309}
308 310
309/* 311/*
@@ -318,6 +320,8 @@ const char *kallsyms_lookup(unsigned long addr,
318 unsigned long *offset, 320 unsigned long *offset,
319 char **modname, char *namebuf) 321 char **modname, char *namebuf)
320{ 322{
323 const char *ret;
324
321 namebuf[KSYM_NAME_LEN - 1] = 0; 325 namebuf[KSYM_NAME_LEN - 1] = 0;
322 namebuf[0] = 0; 326 namebuf[0] = 0;
323 327
@@ -333,9 +337,13 @@ const char *kallsyms_lookup(unsigned long addr,
333 return namebuf; 337 return namebuf;
334 } 338 }
335 339
336 /* See if it's in a module. */ 340 /* See if it's in a module or a BPF JITed image. */
337 return module_address_lookup(addr, symbolsize, offset, modname, 341 ret = module_address_lookup(addr, symbolsize, offset,
338 namebuf); 342 modname, namebuf);
343 if (!ret)
344 ret = bpf_address_lookup(addr, symbolsize,
345 offset, modname, namebuf);
346 return ret;
339} 347}
340 348
341int lookup_symbol_name(unsigned long addr, char *symname) 349int lookup_symbol_name(unsigned long addr, char *symname)
@@ -471,6 +479,7 @@ EXPORT_SYMBOL(__print_symbol);
471/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ 479/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
472struct kallsym_iter { 480struct kallsym_iter {
473 loff_t pos; 481 loff_t pos;
482 loff_t pos_mod_end;
474 unsigned long value; 483 unsigned long value;
475 unsigned int nameoff; /* If iterating in core kernel symbols. */ 484 unsigned int nameoff; /* If iterating in core kernel symbols. */
476 char type; 485 char type;
@@ -481,13 +490,27 @@ struct kallsym_iter {
481 490
482static int get_ksymbol_mod(struct kallsym_iter *iter) 491static int get_ksymbol_mod(struct kallsym_iter *iter)
483{ 492{
484 if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value, 493 int ret = module_get_kallsym(iter->pos - kallsyms_num_syms,
485 &iter->type, iter->name, iter->module_name, 494 &iter->value, &iter->type,
486 &iter->exported) < 0) 495 iter->name, iter->module_name,
496 &iter->exported);
497 if (ret < 0) {
498 iter->pos_mod_end = iter->pos;
487 return 0; 499 return 0;
500 }
501
488 return 1; 502 return 1;
489} 503}
490 504
505static int get_ksymbol_bpf(struct kallsym_iter *iter)
506{
507 iter->module_name[0] = '\0';
508 iter->exported = 0;
509 return bpf_get_kallsym(iter->pos - iter->pos_mod_end,
510 &iter->value, &iter->type,
511 iter->name) < 0 ? 0 : 1;
512}
513
491/* Returns space to next name. */ 514/* Returns space to next name. */
492static unsigned long get_ksymbol_core(struct kallsym_iter *iter) 515static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
493{ 516{
@@ -508,16 +531,30 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
508 iter->name[0] = '\0'; 531 iter->name[0] = '\0';
509 iter->nameoff = get_symbol_offset(new_pos); 532 iter->nameoff = get_symbol_offset(new_pos);
510 iter->pos = new_pos; 533 iter->pos = new_pos;
534 if (new_pos == 0)
535 iter->pos_mod_end = 0;
536}
537
538static int update_iter_mod(struct kallsym_iter *iter, loff_t pos)
539{
540 iter->pos = pos;
541
542 if (iter->pos_mod_end > 0 &&
543 iter->pos_mod_end < iter->pos)
544 return get_ksymbol_bpf(iter);
545
546 if (!get_ksymbol_mod(iter))
547 return get_ksymbol_bpf(iter);
548
549 return 1;
511} 550}
512 551
513/* Returns false if pos at or past end of file. */ 552/* Returns false if pos at or past end of file. */
514static int update_iter(struct kallsym_iter *iter, loff_t pos) 553static int update_iter(struct kallsym_iter *iter, loff_t pos)
515{ 554{
516 /* Module symbols can be accessed randomly. */ 555 /* Module symbols can be accessed randomly. */
517 if (pos >= kallsyms_num_syms) { 556 if (pos >= kallsyms_num_syms)
518 iter->pos = pos; 557 return update_iter_mod(iter, pos);
519 return get_ksymbol_mod(iter);
520 }
521 558
522 /* If we're not on the desired position, reset to new position. */ 559 /* If we're not on the desired position, reset to new position. */
523 if (pos != iter->pos) 560 if (pos != iter->pos)