diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-16 22:42:14 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-16 22:42:14 -0500 |
commit | 7018d1b3f20fb4308ed9bc577160cb8ffb79b62a (patch) | |
tree | b61a17c694d3cdc3490b190c35104b936bcc6638 /kernel/bpf/hashtab.c | |
parent | e7e70fa6784b48a811fdd4253c41fc7195300570 (diff) | |
parent | e8a9d9683c8a62f917c19e57f1618363fb9ed04e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2018-01-17
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Add initial BPF map offloading for nfp driver. Currently only
programs were supported so far w/o being able to access maps.
Offloaded programs are right now only allowed to perform map
lookups, and control path is responsible for populating the
maps. BPF core infrastructure along with nfp implementation is
provided, from Jakub.
2) Various follow-ups to Josef's BPF error injections. More
specifically that includes: properly check whether the error
injectable event is on function entry or not, remove the percpu
bpf_kprobe_override and rather compare instruction pointer
with original one, separate error-injection from kprobes since
it's not limited to it, add injectable error types in order to
specify what is the expected type of failure, and last but not
least also support the kernel's fault injection framework, all
from Masami.
3) Various misc improvements and cleanups to the libbpf Makefile.
That is, fix permissions when installing BPF header files, remove
unused variables and functions, and also install the libbpf.h
header, from Jesper.
4) When offloading to nfp JIT and the BPF insn is unsupported in the
JIT, then reject right at verification time. Also fix libbpf with
regards to ELF section name matching by properly treating the
program type as prefix. Both from Quentin.
5) Add -DPACKAGE to bpftool when including bfd.h for the disassembler.
This is needed, for example, when building libfd from source as
bpftool doesn't supply a config.h for bfd.h. Fix from Jiong.
6) xdp_convert_ctx_access() is simplified since it doesn't need to
set target size during verification, from Jesper.
7) Let bpftool properly recognize BPF_PROG_TYPE_CGROUP_DEVICE
program types, from Roman.
8) Various functions in BPF cpumap were not declared static, from Wei.
9) Fix a double semicolon in BPF samples, from Luis.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r-- | kernel/bpf/hashtab.c | 103 |
1 files changed, 59 insertions, 44 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3905d4bc5b80..b76828f23b49 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -227,7 +227,7 @@ static int alloc_extra_elems(struct bpf_htab *htab) | |||
227 | } | 227 | } |
228 | 228 | ||
229 | /* Called from syscall */ | 229 | /* Called from syscall */ |
230 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | 230 | static int htab_map_alloc_check(union bpf_attr *attr) |
231 | { | 231 | { |
232 | bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || | 232 | bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
233 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); | 233 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); |
@@ -241,9 +241,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
241 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); | 241 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); |
242 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); | 242 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); |
243 | int numa_node = bpf_map_attr_numa_node(attr); | 243 | int numa_node = bpf_map_attr_numa_node(attr); |
244 | struct bpf_htab *htab; | ||
245 | int err, i; | ||
246 | u64 cost; | ||
247 | 244 | ||
248 | BUILD_BUG_ON(offsetof(struct htab_elem, htab) != | 245 | BUILD_BUG_ON(offsetof(struct htab_elem, htab) != |
249 | offsetof(struct htab_elem, hash_node.pprev)); | 246 | offsetof(struct htab_elem, hash_node.pprev)); |
@@ -254,40 +251,68 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
254 | /* LRU implementation is much complicated than other | 251 | /* LRU implementation is much complicated than other |
255 | * maps. Hence, limit to CAP_SYS_ADMIN for now. | 252 | * maps. Hence, limit to CAP_SYS_ADMIN for now. |
256 | */ | 253 | */ |
257 | return ERR_PTR(-EPERM); | 254 | return -EPERM; |
258 | 255 | ||
259 | if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) | 256 | if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) |
260 | /* reserved bits should not be used */ | 257 | /* reserved bits should not be used */ |
261 | return ERR_PTR(-EINVAL); | 258 | return -EINVAL; |
262 | 259 | ||
263 | if (!lru && percpu_lru) | 260 | if (!lru && percpu_lru) |
264 | return ERR_PTR(-EINVAL); | 261 | return -EINVAL; |
265 | 262 | ||
266 | if (lru && !prealloc) | 263 | if (lru && !prealloc) |
267 | return ERR_PTR(-ENOTSUPP); | 264 | return -ENOTSUPP; |
268 | 265 | ||
269 | if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) | 266 | if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) |
270 | return ERR_PTR(-EINVAL); | 267 | return -EINVAL; |
268 | |||
269 | /* check sanity of attributes. | ||
270 | * value_size == 0 may be allowed in the future to use map as a set | ||
271 | */ | ||
272 | if (attr->max_entries == 0 || attr->key_size == 0 || | ||
273 | attr->value_size == 0) | ||
274 | return -EINVAL; | ||
275 | |||
276 | if (attr->key_size > MAX_BPF_STACK) | ||
277 | /* eBPF programs initialize keys on stack, so they cannot be | ||
278 | * larger than max stack size | ||
279 | */ | ||
280 | return -E2BIG; | ||
281 | |||
282 | if (attr->value_size >= KMALLOC_MAX_SIZE - | ||
283 | MAX_BPF_STACK - sizeof(struct htab_elem)) | ||
284 | /* if value_size is bigger, the user space won't be able to | ||
285 | * access the elements via bpf syscall. This check also makes | ||
286 | * sure that the elem_size doesn't overflow and it's | ||
287 | * kmalloc-able later in htab_map_update_elem() | ||
288 | */ | ||
289 | return -E2BIG; | ||
290 | |||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | ||
295 | { | ||
296 | bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || | ||
297 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); | ||
298 | bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || | ||
299 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); | ||
300 | /* percpu_lru means each cpu has its own LRU list. | ||
301 | * it is different from BPF_MAP_TYPE_PERCPU_HASH where | ||
302 | * the map's value itself is percpu. percpu_lru has | ||
303 | * nothing to do with the map's value. | ||
304 | */ | ||
305 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); | ||
306 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); | ||
307 | struct bpf_htab *htab; | ||
308 | int err, i; | ||
309 | u64 cost; | ||
271 | 310 | ||
272 | htab = kzalloc(sizeof(*htab), GFP_USER); | 311 | htab = kzalloc(sizeof(*htab), GFP_USER); |
273 | if (!htab) | 312 | if (!htab) |
274 | return ERR_PTR(-ENOMEM); | 313 | return ERR_PTR(-ENOMEM); |
275 | 314 | ||
276 | /* mandatory map attributes */ | 315 | bpf_map_init_from_attr(&htab->map, attr); |
277 | htab->map.map_type = attr->map_type; | ||
278 | htab->map.key_size = attr->key_size; | ||
279 | htab->map.value_size = attr->value_size; | ||
280 | htab->map.max_entries = attr->max_entries; | ||
281 | htab->map.map_flags = attr->map_flags; | ||
282 | htab->map.numa_node = numa_node; | ||
283 | |||
284 | /* check sanity of attributes. | ||
285 | * value_size == 0 may be allowed in the future to use map as a set | ||
286 | */ | ||
287 | err = -EINVAL; | ||
288 | if (htab->map.max_entries == 0 || htab->map.key_size == 0 || | ||
289 | htab->map.value_size == 0) | ||
290 | goto free_htab; | ||
291 | 316 | ||
292 | if (percpu_lru) { | 317 | if (percpu_lru) { |
293 | /* ensure each CPU's lru list has >=1 elements. | 318 | /* ensure each CPU's lru list has >=1 elements. |
@@ -304,22 +329,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
304 | /* hash table size must be power of 2 */ | 329 | /* hash table size must be power of 2 */ |
305 | htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); | 330 | htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); |
306 | 331 | ||
307 | err = -E2BIG; | ||
308 | if (htab->map.key_size > MAX_BPF_STACK) | ||
309 | /* eBPF programs initialize keys on stack, so they cannot be | ||
310 | * larger than max stack size | ||
311 | */ | ||
312 | goto free_htab; | ||
313 | |||
314 | if (htab->map.value_size >= KMALLOC_MAX_SIZE - | ||
315 | MAX_BPF_STACK - sizeof(struct htab_elem)) | ||
316 | /* if value_size is bigger, the user space won't be able to | ||
317 | * access the elements via bpf syscall. This check also makes | ||
318 | * sure that the elem_size doesn't overflow and it's | ||
319 | * kmalloc-able later in htab_map_update_elem() | ||
320 | */ | ||
321 | goto free_htab; | ||
322 | |||
323 | htab->elem_size = sizeof(struct htab_elem) + | 332 | htab->elem_size = sizeof(struct htab_elem) + |
324 | round_up(htab->map.key_size, 8); | 333 | round_up(htab->map.key_size, 8); |
325 | if (percpu) | 334 | if (percpu) |
@@ -327,6 +336,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
327 | else | 336 | else |
328 | htab->elem_size += round_up(htab->map.value_size, 8); | 337 | htab->elem_size += round_up(htab->map.value_size, 8); |
329 | 338 | ||
339 | err = -E2BIG; | ||
330 | /* prevent zero size kmalloc and check for u32 overflow */ | 340 | /* prevent zero size kmalloc and check for u32 overflow */ |
331 | if (htab->n_buckets == 0 || | 341 | if (htab->n_buckets == 0 || |
332 | htab->n_buckets > U32_MAX / sizeof(struct bucket)) | 342 | htab->n_buckets > U32_MAX / sizeof(struct bucket)) |
@@ -1143,6 +1153,7 @@ static void htab_map_free(struct bpf_map *map) | |||
1143 | } | 1153 | } |
1144 | 1154 | ||
1145 | const struct bpf_map_ops htab_map_ops = { | 1155 | const struct bpf_map_ops htab_map_ops = { |
1156 | .map_alloc_check = htab_map_alloc_check, | ||
1146 | .map_alloc = htab_map_alloc, | 1157 | .map_alloc = htab_map_alloc, |
1147 | .map_free = htab_map_free, | 1158 | .map_free = htab_map_free, |
1148 | .map_get_next_key = htab_map_get_next_key, | 1159 | .map_get_next_key = htab_map_get_next_key, |
@@ -1153,6 +1164,7 @@ const struct bpf_map_ops htab_map_ops = { | |||
1153 | }; | 1164 | }; |
1154 | 1165 | ||
1155 | const struct bpf_map_ops htab_lru_map_ops = { | 1166 | const struct bpf_map_ops htab_lru_map_ops = { |
1167 | .map_alloc_check = htab_map_alloc_check, | ||
1156 | .map_alloc = htab_map_alloc, | 1168 | .map_alloc = htab_map_alloc, |
1157 | .map_free = htab_map_free, | 1169 | .map_free = htab_map_free, |
1158 | .map_get_next_key = htab_map_get_next_key, | 1170 | .map_get_next_key = htab_map_get_next_key, |
@@ -1236,6 +1248,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, | |||
1236 | } | 1248 | } |
1237 | 1249 | ||
1238 | const struct bpf_map_ops htab_percpu_map_ops = { | 1250 | const struct bpf_map_ops htab_percpu_map_ops = { |
1251 | .map_alloc_check = htab_map_alloc_check, | ||
1239 | .map_alloc = htab_map_alloc, | 1252 | .map_alloc = htab_map_alloc, |
1240 | .map_free = htab_map_free, | 1253 | .map_free = htab_map_free, |
1241 | .map_get_next_key = htab_map_get_next_key, | 1254 | .map_get_next_key = htab_map_get_next_key, |
@@ -1245,6 +1258,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { | |||
1245 | }; | 1258 | }; |
1246 | 1259 | ||
1247 | const struct bpf_map_ops htab_lru_percpu_map_ops = { | 1260 | const struct bpf_map_ops htab_lru_percpu_map_ops = { |
1261 | .map_alloc_check = htab_map_alloc_check, | ||
1248 | .map_alloc = htab_map_alloc, | 1262 | .map_alloc = htab_map_alloc, |
1249 | .map_free = htab_map_free, | 1263 | .map_free = htab_map_free, |
1250 | .map_get_next_key = htab_map_get_next_key, | 1264 | .map_get_next_key = htab_map_get_next_key, |
@@ -1253,11 +1267,11 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { | |||
1253 | .map_delete_elem = htab_lru_map_delete_elem, | 1267 | .map_delete_elem = htab_lru_map_delete_elem, |
1254 | }; | 1268 | }; |
1255 | 1269 | ||
1256 | static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr) | 1270 | static int fd_htab_map_alloc_check(union bpf_attr *attr) |
1257 | { | 1271 | { |
1258 | if (attr->value_size != sizeof(u32)) | 1272 | if (attr->value_size != sizeof(u32)) |
1259 | return ERR_PTR(-EINVAL); | 1273 | return -EINVAL; |
1260 | return htab_map_alloc(attr); | 1274 | return htab_map_alloc_check(attr); |
1261 | } | 1275 | } |
1262 | 1276 | ||
1263 | static void fd_htab_map_free(struct bpf_map *map) | 1277 | static void fd_htab_map_free(struct bpf_map *map) |
@@ -1328,7 +1342,7 @@ static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) | |||
1328 | if (IS_ERR(inner_map_meta)) | 1342 | if (IS_ERR(inner_map_meta)) |
1329 | return inner_map_meta; | 1343 | return inner_map_meta; |
1330 | 1344 | ||
1331 | map = fd_htab_map_alloc(attr); | 1345 | map = htab_map_alloc(attr); |
1332 | if (IS_ERR(map)) { | 1346 | if (IS_ERR(map)) { |
1333 | bpf_map_meta_free(inner_map_meta); | 1347 | bpf_map_meta_free(inner_map_meta); |
1334 | return map; | 1348 | return map; |
@@ -1372,6 +1386,7 @@ static void htab_of_map_free(struct bpf_map *map) | |||
1372 | } | 1386 | } |
1373 | 1387 | ||
1374 | const struct bpf_map_ops htab_of_maps_map_ops = { | 1388 | const struct bpf_map_ops htab_of_maps_map_ops = { |
1389 | .map_alloc_check = fd_htab_map_alloc_check, | ||
1375 | .map_alloc = htab_of_map_alloc, | 1390 | .map_alloc = htab_of_map_alloc, |
1376 | .map_free = htab_of_map_free, | 1391 | .map_free = htab_of_map_free, |
1377 | .map_get_next_key = htab_map_get_next_key, | 1392 | .map_get_next_key = htab_map_get_next_key, |