aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-04-09 17:20:03 -0400
committerAlexei Starovoitov <ast@kernel.org>2019-04-09 20:05:46 -0400
commitd8eca5bbb2be9bc7546f9e733786fa2f1a594c67 (patch)
tree2849428915f4a9604fe11b4c9422627d6b127716 /kernel/bpf/syscall.c
parentff466b58055f2d28d8ddc1388af312e87a693efe (diff)
bpf: implement lookup-free direct value access for maps
This generic extension to BPF maps allows for directly loading an address residing inside a BPF map value as a single BPF ldimm64 instruction! The idea is similar to what BPF_PSEUDO_MAP_FD does today, which is a special src_reg flag for ldimm64 instruction that indicates that inside the first part of the double insns's imm field is a file descriptor which the verifier then replaces as a full 64bit address of the map into both imm parts. For the newly added BPF_PSEUDO_MAP_VALUE src_reg flag, the idea is the following: the first part of the double insns's imm field is again a file descriptor corresponding to the map, and the second part of the imm field is an offset into the value. The verifier will then replace both imm parts with an address that points into the BPF map value at the given value offset for maps that support this operation. Currently supported is array map with single entry. It is possible to support more than just single map element by reusing both 16bit off fields of the insns as a map index, so full array map lookup could be expressed that way. It hasn't been implemented here due to lack of concrete use case, but could easily be done so in future in a compatible way, since both off fields right now have to be 0 and would correctly denote a map index 0. The BPF_PSEUDO_MAP_VALUE is a distinct flag as otherwise with BPF_PSEUDO_MAP_FD we could not differ offset 0 between load of map pointer versus load of map's value at offset 0, and changing BPF_PSEUDO_MAP_FD's encoding into off by one to differ between regular map pointer and map value pointer would add unnecessary complexity and increases barrier for debugability thus less suitable. Using the second part of the imm field as an offset into the value does /not/ come with limitations since maximum possible value size is in u32 universe anyway. This optimization allows for efficiently retrieving an address to a map value memory area without having to issue a helper call which needs to prepare registers according to calling convention, etc, without needing the extra NULL test, and without having to add the offset in an additional instruction to the value base pointer. The verifier then treats the destination register as PTR_TO_MAP_VALUE with constant reg->off from the user passed offset from the second imm field, and guarantees that this is within bounds of the map value. Any subsequent operations are normally treated as typical map value handling without anything extra needed from verification side. The two map operations for direct value access have been added to array map for now. In future other types could be supported as well depending on the use case. The main use case for this commit is to allow for BPF loader support for global variables that reside in .data/.rodata/.bss sections such that we can directly load the address of them with minimal additional infrastructure required. Loader support has been added in subsequent commits for libbpf library. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c28
1 files changed, 21 insertions, 7 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1d65e56594db..828518bb947b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2072,13 +2072,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
2072} 2072}
2073 2073
2074static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 2074static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
2075 unsigned long addr) 2075 unsigned long addr, u32 *off,
2076 u32 *type)
2076{ 2077{
2078 const struct bpf_map *map;
2077 int i; 2079 int i;
2078 2080
2079 for (i = 0; i < prog->aux->used_map_cnt; i++) 2081 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
2080 if (prog->aux->used_maps[i] == (void *)addr) 2082 map = prog->aux->used_maps[i];
2081 return prog->aux->used_maps[i]; 2083 if (map == (void *)addr) {
2084 *type = BPF_PSEUDO_MAP_FD;
2085 return map;
2086 }
2087 if (!map->ops->map_direct_value_meta)
2088 continue;
2089 if (!map->ops->map_direct_value_meta(map, addr, off)) {
2090 *type = BPF_PSEUDO_MAP_VALUE;
2091 return map;
2092 }
2093 }
2094
2082 return NULL; 2095 return NULL;
2083} 2096}
2084 2097
@@ -2086,6 +2099,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
2086{ 2099{
2087 const struct bpf_map *map; 2100 const struct bpf_map *map;
2088 struct bpf_insn *insns; 2101 struct bpf_insn *insns;
2102 u32 off, type;
2089 u64 imm; 2103 u64 imm;
2090 int i; 2104 int i;
2091 2105
@@ -2113,11 +2127,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
2113 continue; 2127 continue;
2114 2128
2115 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 2129 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
2116 map = bpf_map_from_imm(prog, imm); 2130 map = bpf_map_from_imm(prog, imm, &off, &type);
2117 if (map) { 2131 if (map) {
2118 insns[i].src_reg = BPF_PSEUDO_MAP_FD; 2132 insns[i].src_reg = type;
2119 insns[i].imm = map->id; 2133 insns[i].imm = map->id;
2120 insns[i + 1].imm = 0; 2134 insns[i + 1].imm = off;
2121 continue; 2135 continue;
2122 } 2136 }
2123 } 2137 }