summaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-04-11 20:00:05 -0400
committerDavid S. Miller <davem@davemloft.net>2019-04-11 20:00:05 -0400
commitbb23581b9b38703257acabd520aa5ebf1db008af (patch)
tree9d9f7b7ccad9697dfd2eab3b1fda37056b0e47f6 /kernel/bpf
parent78f07adac86186b5ef0318b7faec377b6d31ea9f (diff)
parent947e8b595b82d3551750641445d0a97b8f29b536 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2019-04-12 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Improve BPF verifier scalability for large programs through two optimizations: i) remove verifier states that are not useful in pruning, ii) stop walking parentage chain once first LIVE_READ is seen. Combined gives approx 20x speedup. Increase limits for accepting large programs under root, and add various stress tests, from Alexei. 2) Implement global data support in BPF. This enables static global variables for .data, .rodata and .bss sections to be properly handled which allows for more natural program development. This also opens up the possibility to optimize program workflow by compiling ELFs only once and later only rewriting section data before reload, from Daniel and with test cases and libbpf refactoring from Joe. 3) Add config option to generate BTF type info for vmlinux as part of the kernel build process. DWARF debug info is converted via pahole to BTF. Latter relies on libbpf and makes use of BTF deduplication algorithm which results in 100x savings compared to DWARF data. Resulting .BTF section is typically about 2MB in size, from Andrii. 4) Add BPF verifier support for stack access with variable offset from helpers and add various test cases along with it, from Andrey. 5) Extend bpf_skb_adjust_room() growth BPF helper to mark inner MAC header so that L2 encapsulation can be used for tc tunnels, from Alan. 6) Add support for input __sk_buff context in BPF_PROG_TEST_RUN so that users can define a subset of allowed __sk_buff fields that get fed into the test program, from Stanislav. 7) Add bpf fs multi-dimensional array tests for BTF test suite and fix up various UBSAN warnings in bpftool, from Yonghong. 8) Generate a pkg-config file for libbpf, from Luca. 9) Dump program's BTF id in bpftool, from Prashant. 10) libbpf fix to use smaller BPF log buffer size for AF_XDP's XDP program, from Magnus. 11) kallsyms related fixes for the case when symbols are not present in BPF selftests and samples, from Daniel ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c53
-rw-r--r--kernel/bpf/btf.c419
-rw-r--r--kernel/bpf/core.c14
-rw-r--r--kernel/bpf/disasm.c5
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/local_storage.c6
-rw-r--r--kernel/bpf/lpm_trie.c3
-rw-r--r--kernel/bpf/queue_stack_maps.c6
-rw-r--r--kernel/bpf/syscall.c144
-rw-r--r--kernel/bpf/verifier.c397
10 files changed, 903 insertions, 150 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c72e0d8e1e65..584636c9e2eb 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -22,7 +22,7 @@
22#include "map_in_map.h" 22#include "map_in_map.h"
23 23
24#define ARRAY_CREATE_FLAG_MASK \ 24#define ARRAY_CREATE_FLAG_MASK \
25 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 25 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
26 26
27static void bpf_array_free_percpu(struct bpf_array *array) 27static void bpf_array_free_percpu(struct bpf_array *array)
28{ 28{
@@ -63,6 +63,7 @@ int array_map_alloc_check(union bpf_attr *attr)
63 if (attr->max_entries == 0 || attr->key_size != 4 || 63 if (attr->max_entries == 0 || attr->key_size != 4 ||
64 attr->value_size == 0 || 64 attr->value_size == 0 ||
65 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 65 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
66 !bpf_map_flags_access_ok(attr->map_flags) ||
66 (percpu && numa_node != NUMA_NO_NODE)) 67 (percpu && numa_node != NUMA_NO_NODE))
67 return -EINVAL; 68 return -EINVAL;
68 69
@@ -160,6 +161,36 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
160 return array->value + array->elem_size * (index & array->index_mask); 161 return array->value + array->elem_size * (index & array->index_mask);
161} 162}
162 163
164static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
165 u32 off)
166{
167 struct bpf_array *array = container_of(map, struct bpf_array, map);
168
169 if (map->max_entries != 1)
170 return -ENOTSUPP;
171 if (off >= map->value_size)
172 return -EINVAL;
173
174 *imm = (unsigned long)array->value;
175 return 0;
176}
177
178static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
179 u32 *off)
180{
181 struct bpf_array *array = container_of(map, struct bpf_array, map);
182 u64 base = (unsigned long)array->value;
183 u64 range = array->elem_size;
184
185 if (map->max_entries != 1)
186 return -ENOTSUPP;
187 if (imm < base || imm >= base + range)
188 return -ENOENT;
189
190 *off = imm - base;
191 return 0;
192}
193
163/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 194/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
164static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 195static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
165{ 196{
@@ -360,7 +391,8 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
360 return; 391 return;
361 } 392 }
362 393
363 seq_printf(m, "%u: ", *(u32 *)key); 394 if (map->btf_key_type_id)
395 seq_printf(m, "%u: ", *(u32 *)key);
364 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 396 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
365 seq_puts(m, "\n"); 397 seq_puts(m, "\n");
366 398
@@ -397,6 +429,18 @@ static int array_map_check_btf(const struct bpf_map *map,
397{ 429{
398 u32 int_data; 430 u32 int_data;
399 431
432 /* One exception for keyless BTF: .bss/.data/.rodata map */
433 if (btf_type_is_void(key_type)) {
434 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
435 map->max_entries != 1)
436 return -EINVAL;
437
438 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
439 return -EINVAL;
440
441 return 0;
442 }
443
400 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 444 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
401 return -EINVAL; 445 return -EINVAL;
402 446
@@ -419,6 +463,8 @@ const struct bpf_map_ops array_map_ops = {
419 .map_update_elem = array_map_update_elem, 463 .map_update_elem = array_map_update_elem,
420 .map_delete_elem = array_map_delete_elem, 464 .map_delete_elem = array_map_delete_elem,
421 .map_gen_lookup = array_map_gen_lookup, 465 .map_gen_lookup = array_map_gen_lookup,
466 .map_direct_value_addr = array_map_direct_value_addr,
467 .map_direct_value_meta = array_map_direct_value_meta,
422 .map_seq_show_elem = array_map_seq_show_elem, 468 .map_seq_show_elem = array_map_seq_show_elem,
423 .map_check_btf = array_map_check_btf, 469 .map_check_btf = array_map_check_btf,
424}; 470};
@@ -440,6 +486,9 @@ static int fd_array_map_alloc_check(union bpf_attr *attr)
440 /* only file descriptors can be stored in this type of map */ 486 /* only file descriptors can be stored in this type of map */
441 if (attr->value_size != sizeof(u32)) 487 if (attr->value_size != sizeof(u32))
442 return -EINVAL; 488 return -EINVAL;
489 /* Program read-only/write-only not supported for special maps yet. */
490 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
491 return -EINVAL;
443 return array_map_alloc_check(attr); 492 return array_map_alloc_check(attr);
444} 493}
445 494
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index bd3921b1514b..cad09858a5f2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -185,6 +185,16 @@
185 i < btf_type_vlen(struct_type); \ 185 i < btf_type_vlen(struct_type); \
186 i++, member++) 186 i++, member++)
187 187
188#define for_each_vsi(i, struct_type, member) \
189 for (i = 0, member = btf_type_var_secinfo(struct_type); \
190 i < btf_type_vlen(struct_type); \
191 i++, member++)
192
193#define for_each_vsi_from(i, from, struct_type, member) \
194 for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
195 i < btf_type_vlen(struct_type); \
196 i++, member++)
197
188static DEFINE_IDR(btf_idr); 198static DEFINE_IDR(btf_idr);
189static DEFINE_SPINLOCK(btf_idr_lock); 199static DEFINE_SPINLOCK(btf_idr_lock);
190 200
@@ -262,6 +272,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
262 [BTF_KIND_RESTRICT] = "RESTRICT", 272 [BTF_KIND_RESTRICT] = "RESTRICT",
263 [BTF_KIND_FUNC] = "FUNC", 273 [BTF_KIND_FUNC] = "FUNC",
264 [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", 274 [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
275 [BTF_KIND_VAR] = "VAR",
276 [BTF_KIND_DATASEC] = "DATASEC",
265}; 277};
266 278
267struct btf_kind_operations { 279struct btf_kind_operations {
@@ -314,7 +326,7 @@ static bool btf_type_is_modifier(const struct btf_type *t)
314 return false; 326 return false;
315} 327}
316 328
317static bool btf_type_is_void(const struct btf_type *t) 329bool btf_type_is_void(const struct btf_type *t)
318{ 330{
319 return t == &btf_void; 331 return t == &btf_void;
320} 332}
@@ -375,13 +387,36 @@ static bool btf_type_is_int(const struct btf_type *t)
375 return BTF_INFO_KIND(t->info) == BTF_KIND_INT; 387 return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
376} 388}
377 389
390static bool btf_type_is_var(const struct btf_type *t)
391{
392 return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
393}
394
395static bool btf_type_is_datasec(const struct btf_type *t)
396{
397 return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
398}
399
400/* Types that act only as a source, not sink or intermediate
401 * type when resolving.
402 */
403static bool btf_type_is_resolve_source_only(const struct btf_type *t)
404{
405 return btf_type_is_var(t) ||
406 btf_type_is_datasec(t);
407}
408
378/* What types need to be resolved? 409/* What types need to be resolved?
379 * 410 *
380 * btf_type_is_modifier() is an obvious one. 411 * btf_type_is_modifier() is an obvious one.
381 * 412 *
382 * btf_type_is_struct() because its member refers to 413 * btf_type_is_struct() because its member refers to
383 * another type (through member->type). 414 * another type (through member->type).
384 415 *
416 * btf_type_is_var() because the variable refers to
417 * another type. btf_type_is_datasec() holds multiple
418 * btf_type_is_var() types that need resolving.
419 *
385 * btf_type_is_array() because its element (array->type) 420 * btf_type_is_array() because its element (array->type)
386 * refers to another type. Array can be thought of a 421 * refers to another type. Array can be thought of a
387 * special case of struct while array just has the same 422 * special case of struct while array just has the same
@@ -390,9 +425,11 @@ static bool btf_type_is_int(const struct btf_type *t)
390static bool btf_type_needs_resolve(const struct btf_type *t) 425static bool btf_type_needs_resolve(const struct btf_type *t)
391{ 426{
392 return btf_type_is_modifier(t) || 427 return btf_type_is_modifier(t) ||
393 btf_type_is_ptr(t) || 428 btf_type_is_ptr(t) ||
394 btf_type_is_struct(t) || 429 btf_type_is_struct(t) ||
395 btf_type_is_array(t); 430 btf_type_is_array(t) ||
431 btf_type_is_var(t) ||
432 btf_type_is_datasec(t);
396} 433}
397 434
398/* t->size can be used */ 435/* t->size can be used */
@@ -403,6 +440,7 @@ static bool btf_type_has_size(const struct btf_type *t)
403 case BTF_KIND_STRUCT: 440 case BTF_KIND_STRUCT:
404 case BTF_KIND_UNION: 441 case BTF_KIND_UNION:
405 case BTF_KIND_ENUM: 442 case BTF_KIND_ENUM:
443 case BTF_KIND_DATASEC:
406 return true; 444 return true;
407 } 445 }
408 446
@@ -467,6 +505,16 @@ static const struct btf_enum *btf_type_enum(const struct btf_type *t)
467 return (const struct btf_enum *)(t + 1); 505 return (const struct btf_enum *)(t + 1);
468} 506}
469 507
508static const struct btf_var *btf_type_var(const struct btf_type *t)
509{
510 return (const struct btf_var *)(t + 1);
511}
512
513static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
514{
515 return (const struct btf_var_secinfo *)(t + 1);
516}
517
470static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) 518static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
471{ 519{
472 return kind_ops[BTF_INFO_KIND(t->info)]; 520 return kind_ops[BTF_INFO_KIND(t->info)];
@@ -478,23 +526,31 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
478 offset < btf->hdr.str_len; 526 offset < btf->hdr.str_len;
479} 527}
480 528
481/* Only C-style identifier is permitted. This can be relaxed if 529static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
482 * necessary. 530{
483 */ 531 if ((first ? !isalpha(c) :
484static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) 532 !isalnum(c)) &&
533 c != '_' &&
534 ((c == '.' && !dot_ok) ||
535 c != '.'))
536 return false;
537 return true;
538}
539
540static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
485{ 541{
486 /* offset must be valid */ 542 /* offset must be valid */
487 const char *src = &btf->strings[offset]; 543 const char *src = &btf->strings[offset];
488 const char *src_limit; 544 const char *src_limit;
489 545
490 if (!isalpha(*src) && *src != '_') 546 if (!__btf_name_char_ok(*src, true, dot_ok))
491 return false; 547 return false;
492 548
493 /* set a limit on identifier length */ 549 /* set a limit on identifier length */
494 src_limit = src + KSYM_NAME_LEN; 550 src_limit = src + KSYM_NAME_LEN;
495 src++; 551 src++;
496 while (*src && src < src_limit) { 552 while (*src && src < src_limit) {
497 if (!isalnum(*src) && *src != '_') 553 if (!__btf_name_char_ok(*src, false, dot_ok))
498 return false; 554 return false;
499 src++; 555 src++;
500 } 556 }
@@ -502,6 +558,19 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
502 return !*src; 558 return !*src;
503} 559}
504 560
561/* Only C-style identifier is permitted. This can be relaxed if
562 * necessary.
563 */
564static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
565{
566 return __btf_name_valid(btf, offset, false);
567}
568
569static bool btf_name_valid_section(const struct btf *btf, u32 offset)
570{
571 return __btf_name_valid(btf, offset, true);
572}
573
505static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) 574static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
506{ 575{
507 if (!offset) 576 if (!offset)
@@ -697,6 +766,32 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
697 __btf_verifier_log(log, "\n"); 766 __btf_verifier_log(log, "\n");
698} 767}
699 768
769__printf(4, 5)
770static void btf_verifier_log_vsi(struct btf_verifier_env *env,
771 const struct btf_type *datasec_type,
772 const struct btf_var_secinfo *vsi,
773 const char *fmt, ...)
774{
775 struct bpf_verifier_log *log = &env->log;
776 va_list args;
777
778 if (!bpf_verifier_log_needed(log))
779 return;
780 if (env->phase != CHECK_META)
781 btf_verifier_log_type(env, datasec_type, NULL);
782
783 __btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
784 vsi->type, vsi->offset, vsi->size);
785 if (fmt && *fmt) {
786 __btf_verifier_log(log, " ");
787 va_start(args, fmt);
788 bpf_verifier_vlog(log, fmt, args);
789 va_end(args);
790 }
791
792 __btf_verifier_log(log, "\n");
793}
794
700static void btf_verifier_log_hdr(struct btf_verifier_env *env, 795static void btf_verifier_log_hdr(struct btf_verifier_env *env,
701 u32 btf_data_size) 796 u32 btf_data_size)
702{ 797{
@@ -974,7 +1069,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
974 } else if (btf_type_is_ptr(size_type)) { 1069 } else if (btf_type_is_ptr(size_type)) {
975 size = sizeof(void *); 1070 size = sizeof(void *);
976 } else { 1071 } else {
977 if (WARN_ON_ONCE(!btf_type_is_modifier(size_type))) 1072 if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
1073 !btf_type_is_var(size_type)))
978 return NULL; 1074 return NULL;
979 1075
980 size = btf->resolved_sizes[size_type_id]; 1076 size = btf->resolved_sizes[size_type_id];
@@ -1509,7 +1605,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
1509 u32 next_type_size = 0; 1605 u32 next_type_size = 0;
1510 1606
1511 next_type = btf_type_by_id(btf, next_type_id); 1607 next_type = btf_type_by_id(btf, next_type_id);
1512 if (!next_type) { 1608 if (!next_type || btf_type_is_resolve_source_only(next_type)) {
1513 btf_verifier_log_type(env, v->t, "Invalid type_id"); 1609 btf_verifier_log_type(env, v->t, "Invalid type_id");
1514 return -EINVAL; 1610 return -EINVAL;
1515 } 1611 }
@@ -1542,6 +1638,53 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
1542 return 0; 1638 return 0;
1543} 1639}
1544 1640
1641static int btf_var_resolve(struct btf_verifier_env *env,
1642 const struct resolve_vertex *v)
1643{
1644 const struct btf_type *next_type;
1645 const struct btf_type *t = v->t;
1646 u32 next_type_id = t->type;
1647 struct btf *btf = env->btf;
1648 u32 next_type_size;
1649
1650 next_type = btf_type_by_id(btf, next_type_id);
1651 if (!next_type || btf_type_is_resolve_source_only(next_type)) {
1652 btf_verifier_log_type(env, v->t, "Invalid type_id");
1653 return -EINVAL;
1654 }
1655
1656 if (!env_type_is_resolve_sink(env, next_type) &&
1657 !env_type_is_resolved(env, next_type_id))
1658 return env_stack_push(env, next_type, next_type_id);
1659
1660 if (btf_type_is_modifier(next_type)) {
1661 const struct btf_type *resolved_type;
1662 u32 resolved_type_id;
1663
1664 resolved_type_id = next_type_id;
1665 resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
1666
1667 if (btf_type_is_ptr(resolved_type) &&
1668 !env_type_is_resolve_sink(env, resolved_type) &&
1669 !env_type_is_resolved(env, resolved_type_id))
1670 return env_stack_push(env, resolved_type,
1671 resolved_type_id);
1672 }
1673
1674 /* We must resolve to something concrete at this point, no
1675 * forward types or similar that would resolve to size of
1676 * zero is allowed.
1677 */
1678 if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
1679 btf_verifier_log_type(env, v->t, "Invalid type_id");
1680 return -EINVAL;
1681 }
1682
1683 env_stack_pop_resolved(env, next_type_id, next_type_size);
1684
1685 return 0;
1686}
1687
1545static int btf_ptr_resolve(struct btf_verifier_env *env, 1688static int btf_ptr_resolve(struct btf_verifier_env *env,
1546 const struct resolve_vertex *v) 1689 const struct resolve_vertex *v)
1547{ 1690{
@@ -1551,7 +1694,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
1551 struct btf *btf = env->btf; 1694 struct btf *btf = env->btf;
1552 1695
1553 next_type = btf_type_by_id(btf, next_type_id); 1696 next_type = btf_type_by_id(btf, next_type_id);
1554 if (!next_type) { 1697 if (!next_type || btf_type_is_resolve_source_only(next_type)) {
1555 btf_verifier_log_type(env, v->t, "Invalid type_id"); 1698 btf_verifier_log_type(env, v->t, "Invalid type_id");
1556 return -EINVAL; 1699 return -EINVAL;
1557 } 1700 }
@@ -1609,6 +1752,15 @@ static void btf_modifier_seq_show(const struct btf *btf,
1609 btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); 1752 btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
1610} 1753}
1611 1754
1755static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t,
1756 u32 type_id, void *data, u8 bits_offset,
1757 struct seq_file *m)
1758{
1759 t = btf_type_id_resolve(btf, &type_id);
1760
1761 btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
1762}
1763
1612static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t, 1764static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
1613 u32 type_id, void *data, u8 bits_offset, 1765 u32 type_id, void *data, u8 bits_offset,
1614 struct seq_file *m) 1766 struct seq_file *m)
@@ -1776,7 +1928,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
1776 /* Check array->index_type */ 1928 /* Check array->index_type */
1777 index_type_id = array->index_type; 1929 index_type_id = array->index_type;
1778 index_type = btf_type_by_id(btf, index_type_id); 1930 index_type = btf_type_by_id(btf, index_type_id);
1779 if (btf_type_nosize_or_null(index_type)) { 1931 if (btf_type_is_resolve_source_only(index_type) ||
1932 btf_type_nosize_or_null(index_type)) {
1780 btf_verifier_log_type(env, v->t, "Invalid index"); 1933 btf_verifier_log_type(env, v->t, "Invalid index");
1781 return -EINVAL; 1934 return -EINVAL;
1782 } 1935 }
@@ -1795,7 +1948,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
1795 /* Check array->type */ 1948 /* Check array->type */
1796 elem_type_id = array->type; 1949 elem_type_id = array->type;
1797 elem_type = btf_type_by_id(btf, elem_type_id); 1950 elem_type = btf_type_by_id(btf, elem_type_id);
1798 if (btf_type_nosize_or_null(elem_type)) { 1951 if (btf_type_is_resolve_source_only(elem_type) ||
1952 btf_type_nosize_or_null(elem_type)) {
1799 btf_verifier_log_type(env, v->t, 1953 btf_verifier_log_type(env, v->t,
1800 "Invalid elem"); 1954 "Invalid elem");
1801 return -EINVAL; 1955 return -EINVAL;
@@ -2016,7 +2170,8 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
2016 const struct btf_type *member_type = btf_type_by_id(env->btf, 2170 const struct btf_type *member_type = btf_type_by_id(env->btf,
2017 member_type_id); 2171 member_type_id);
2018 2172
2019 if (btf_type_nosize_or_null(member_type)) { 2173 if (btf_type_is_resolve_source_only(member_type) ||
2174 btf_type_nosize_or_null(member_type)) {
2020 btf_verifier_log_member(env, v->t, member, 2175 btf_verifier_log_member(env, v->t, member,
2021 "Invalid member"); 2176 "Invalid member");
2022 return -EINVAL; 2177 return -EINVAL;
@@ -2411,6 +2566,222 @@ static struct btf_kind_operations func_ops = {
2411 .seq_show = btf_df_seq_show, 2566 .seq_show = btf_df_seq_show,
2412}; 2567};
2413 2568
2569static s32 btf_var_check_meta(struct btf_verifier_env *env,
2570 const struct btf_type *t,
2571 u32 meta_left)
2572{
2573 const struct btf_var *var;
2574 u32 meta_needed = sizeof(*var);
2575
2576 if (meta_left < meta_needed) {
2577 btf_verifier_log_basic(env, t,
2578 "meta_left:%u meta_needed:%u",
2579 meta_left, meta_needed);
2580 return -EINVAL;
2581 }
2582
2583 if (btf_type_vlen(t)) {
2584 btf_verifier_log_type(env, t, "vlen != 0");
2585 return -EINVAL;
2586 }
2587
2588 if (btf_type_kflag(t)) {
2589 btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
2590 return -EINVAL;
2591 }
2592
2593 if (!t->name_off ||
2594 !__btf_name_valid(env->btf, t->name_off, true)) {
2595 btf_verifier_log_type(env, t, "Invalid name");
2596 return -EINVAL;
2597 }
2598
2599 /* A var cannot be in type void */
2600 if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
2601 btf_verifier_log_type(env, t, "Invalid type_id");
2602 return -EINVAL;
2603 }
2604
2605 var = btf_type_var(t);
2606 if (var->linkage != BTF_VAR_STATIC &&
2607 var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2608 btf_verifier_log_type(env, t, "Linkage not supported");
2609 return -EINVAL;
2610 }
2611
2612 btf_verifier_log_type(env, t, NULL);
2613
2614 return meta_needed;
2615}
2616
2617static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
2618{
2619 const struct btf_var *var = btf_type_var(t);
2620
2621 btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
2622}
2623
2624static const struct btf_kind_operations var_ops = {
2625 .check_meta = btf_var_check_meta,
2626 .resolve = btf_var_resolve,
2627 .check_member = btf_df_check_member,
2628 .check_kflag_member = btf_df_check_kflag_member,
2629 .log_details = btf_var_log,
2630 .seq_show = btf_var_seq_show,
2631};
2632
2633static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
2634 const struct btf_type *t,
2635 u32 meta_left)
2636{
2637 const struct btf_var_secinfo *vsi;
2638 u64 last_vsi_end_off = 0, sum = 0;
2639 u32 i, meta_needed;
2640
2641 meta_needed = btf_type_vlen(t) * sizeof(*vsi);
2642 if (meta_left < meta_needed) {
2643 btf_verifier_log_basic(env, t,
2644 "meta_left:%u meta_needed:%u",
2645 meta_left, meta_needed);
2646 return -EINVAL;
2647 }
2648
2649 if (!btf_type_vlen(t)) {
2650 btf_verifier_log_type(env, t, "vlen == 0");
2651 return -EINVAL;
2652 }
2653
2654 if (!t->size) {
2655 btf_verifier_log_type(env, t, "size == 0");
2656 return -EINVAL;
2657 }
2658
2659 if (btf_type_kflag(t)) {
2660 btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
2661 return -EINVAL;
2662 }
2663
2664 if (!t->name_off ||
2665 !btf_name_valid_section(env->btf, t->name_off)) {
2666 btf_verifier_log_type(env, t, "Invalid name");
2667 return -EINVAL;
2668 }
2669
2670 btf_verifier_log_type(env, t, NULL);
2671
2672 for_each_vsi(i, t, vsi) {
2673 /* A var cannot be in type void */
2674 if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
2675 btf_verifier_log_vsi(env, t, vsi,
2676 "Invalid type_id");
2677 return -EINVAL;
2678 }
2679
2680 if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
2681 btf_verifier_log_vsi(env, t, vsi,
2682 "Invalid offset");
2683 return -EINVAL;
2684 }
2685
2686 if (!vsi->size || vsi->size > t->size) {
2687 btf_verifier_log_vsi(env, t, vsi,
2688 "Invalid size");
2689 return -EINVAL;
2690 }
2691
2692 last_vsi_end_off = vsi->offset + vsi->size;
2693 if (last_vsi_end_off > t->size) {
2694 btf_verifier_log_vsi(env, t, vsi,
2695 "Invalid offset+size");
2696 return -EINVAL;
2697 }
2698
2699 btf_verifier_log_vsi(env, t, vsi, NULL);
2700 sum += vsi->size;
2701 }
2702
2703 if (t->size < sum) {
2704 btf_verifier_log_type(env, t, "Invalid btf_info size");
2705 return -EINVAL;
2706 }
2707
2708 return meta_needed;
2709}
2710
2711static int btf_datasec_resolve(struct btf_verifier_env *env,
2712 const struct resolve_vertex *v)
2713{
2714 const struct btf_var_secinfo *vsi;
2715 struct btf *btf = env->btf;
2716 u16 i;
2717
2718 for_each_vsi_from(i, v->next_member, v->t, vsi) {
2719 u32 var_type_id = vsi->type, type_id, type_size = 0;
2720 const struct btf_type *var_type = btf_type_by_id(env->btf,
2721 var_type_id);
2722 if (!var_type || !btf_type_is_var(var_type)) {
2723 btf_verifier_log_vsi(env, v->t, vsi,
2724 "Not a VAR kind member");
2725 return -EINVAL;
2726 }
2727
2728 if (!env_type_is_resolve_sink(env, var_type) &&
2729 !env_type_is_resolved(env, var_type_id)) {
2730 env_stack_set_next_member(env, i + 1);
2731 return env_stack_push(env, var_type, var_type_id);
2732 }
2733
2734 type_id = var_type->type;
2735 if (!btf_type_id_size(btf, &type_id, &type_size)) {
2736 btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
2737 return -EINVAL;
2738 }
2739
2740 if (vsi->size < type_size) {
2741 btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
2742 return -EINVAL;
2743 }
2744 }
2745
2746 env_stack_pop_resolved(env, 0, 0);
2747 return 0;
2748}
2749
2750static void btf_datasec_log(struct btf_verifier_env *env,
2751 const struct btf_type *t)
2752{
2753 btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
2754}
2755
2756static void btf_datasec_seq_show(const struct btf *btf,
2757 const struct btf_type *t, u32 type_id,
2758 void *data, u8 bits_offset,
2759 struct seq_file *m)
2760{
2761 const struct btf_var_secinfo *vsi;
2762 const struct btf_type *var;
2763 u32 i;
2764
2765 seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off));
2766 for_each_vsi(i, t, vsi) {
2767 var = btf_type_by_id(btf, vsi->type);
2768 if (i)
2769 seq_puts(m, ",");
2770 btf_type_ops(var)->seq_show(btf, var, vsi->type,
2771 data + vsi->offset, bits_offset, m);
2772 }
2773 seq_puts(m, "}");
2774}
2775
2776static const struct btf_kind_operations datasec_ops = {
2777 .check_meta = btf_datasec_check_meta,
2778 .resolve = btf_datasec_resolve,
2779 .check_member = btf_df_check_member,
2780 .check_kflag_member = btf_df_check_kflag_member,
2781 .log_details = btf_datasec_log,
2782 .seq_show = btf_datasec_seq_show,
2783};
2784
2414static int btf_func_proto_check(struct btf_verifier_env *env, 2785static int btf_func_proto_check(struct btf_verifier_env *env,
2415 const struct btf_type *t) 2786 const struct btf_type *t)
2416{ 2787{
@@ -2542,6 +2913,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
2542 [BTF_KIND_RESTRICT] = &modifier_ops, 2913 [BTF_KIND_RESTRICT] = &modifier_ops,
2543 [BTF_KIND_FUNC] = &func_ops, 2914 [BTF_KIND_FUNC] = &func_ops,
2544 [BTF_KIND_FUNC_PROTO] = &func_proto_ops, 2915 [BTF_KIND_FUNC_PROTO] = &func_proto_ops,
2916 [BTF_KIND_VAR] = &var_ops,
2917 [BTF_KIND_DATASEC] = &datasec_ops,
2545}; 2918};
2546 2919
2547static s32 btf_check_meta(struct btf_verifier_env *env, 2920static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -2622,13 +2995,17 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
2622 if (!env_type_is_resolved(env, type_id)) 2995 if (!env_type_is_resolved(env, type_id))
2623 return false; 2996 return false;
2624 2997
2625 if (btf_type_is_struct(t)) 2998 if (btf_type_is_struct(t) || btf_type_is_datasec(t))
2626 return !btf->resolved_ids[type_id] && 2999 return !btf->resolved_ids[type_id] &&
2627 !btf->resolved_sizes[type_id]; 3000 !btf->resolved_sizes[type_id];
2628 3001
2629 if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) { 3002 if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
3003 btf_type_is_var(t)) {
2630 t = btf_type_id_resolve(btf, &type_id); 3004 t = btf_type_id_resolve(btf, &type_id);
2631 return t && !btf_type_is_modifier(t); 3005 return t &&
3006 !btf_type_is_modifier(t) &&
3007 !btf_type_is_var(t) &&
3008 !btf_type_is_datasec(t);
2632 } 3009 }
2633 3010
2634 if (btf_type_is_array(t)) { 3011 if (btf_type_is_array(t)) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ff09d32a8a1b..ace8c22c8b0e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -292,7 +292,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
292 dst[i] = fp->insnsi[i]; 292 dst[i] = fp->insnsi[i];
293 if (!was_ld_map && 293 if (!was_ld_map &&
294 dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) && 294 dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
295 dst[i].src_reg == BPF_PSEUDO_MAP_FD) { 295 (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
296 dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
296 was_ld_map = true; 297 was_ld_map = true;
297 dst[i].imm = 0; 298 dst[i].imm = 0;
298 } else if (was_ld_map && 299 } else if (was_ld_map &&
@@ -438,6 +439,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
438 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; 439 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
439 const u32 cnt_max = S16_MAX; 440 const u32 cnt_max = S16_MAX;
440 struct bpf_prog *prog_adj; 441 struct bpf_prog *prog_adj;
442 int err;
441 443
442 /* Since our patchlet doesn't expand the image, we're done. */ 444 /* Since our patchlet doesn't expand the image, we're done. */
443 if (insn_delta == 0) { 445 if (insn_delta == 0) {
@@ -453,8 +455,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
453 * we afterwards may not fail anymore. 455 * we afterwards may not fail anymore.
454 */ 456 */
455 if (insn_adj_cnt > cnt_max && 457 if (insn_adj_cnt > cnt_max &&
456 bpf_adj_branches(prog, off, off + 1, off + len, true)) 458 (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
457 return NULL; 459 return ERR_PTR(err);
458 460
459 /* Several new instructions need to be inserted. Make room 461 /* Several new instructions need to be inserted. Make room
460 * for them. Likely, there's no need for a new allocation as 462 * for them. Likely, there's no need for a new allocation as
@@ -463,7 +465,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
463 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), 465 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
464 GFP_USER); 466 GFP_USER);
465 if (!prog_adj) 467 if (!prog_adj)
466 return NULL; 468 return ERR_PTR(-ENOMEM);
467 469
468 prog_adj->len = insn_adj_cnt; 470 prog_adj->len = insn_adj_cnt;
469 471
@@ -1096,13 +1098,13 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
1096 continue; 1098 continue;
1097 1099
1098 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); 1100 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
1099 if (!tmp) { 1101 if (IS_ERR(tmp)) {
1100 /* Patching may have repointed aux->prog during 1102 /* Patching may have repointed aux->prog during
1101 * realloc from the original one, so we need to 1103 * realloc from the original one, so we need to
1102 * fix it up here on error. 1104 * fix it up here on error.
1103 */ 1105 */
1104 bpf_jit_prog_release_other(prog, clone); 1106 bpf_jit_prog_release_other(prog, clone);
1105 return ERR_PTR(-ENOMEM); 1107 return tmp;
1106 } 1108 }
1107 1109
1108 clone = tmp; 1110 clone = tmp;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index de73f55e42fd..d9ce383c0f9c 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -205,10 +205,11 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
205 * part of the ldimm64 insn is accessible. 205 * part of the ldimm64 insn is accessible.
206 */ 206 */
207 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; 207 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
208 bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; 208 bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
209 insn->src_reg == BPF_PSEUDO_MAP_VALUE;
209 char tmp[64]; 210 char tmp[64];
210 211
211 if (map_ptr && !allow_ptr_leaks) 212 if (is_ptr && !allow_ptr_leaks)
212 imm = 0; 213 imm = 0;
213 214
214 verbose(cbs->private_data, "(%02x) r%d = %s\n", 215 verbose(cbs->private_data, "(%02x) r%d = %s\n",
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fed15cf94dca..192d32e77db3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -23,7 +23,7 @@
23 23
24#define HTAB_CREATE_FLAG_MASK \ 24#define HTAB_CREATE_FLAG_MASK \
25 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 25 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
26 BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED) 26 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
27 27
28struct bucket { 28struct bucket {
29 struct hlist_nulls_head head; 29 struct hlist_nulls_head head;
@@ -262,8 +262,8 @@ static int htab_map_alloc_check(union bpf_attr *attr)
262 /* Guard against local DoS, and discourage production use. */ 262 /* Guard against local DoS, and discourage production use. */
263 return -EPERM; 263 return -EPERM;
264 264
265 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) 265 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
266 /* reserved bits should not be used */ 266 !bpf_map_flags_access_ok(attr->map_flags))
267 return -EINVAL; 267 return -EINVAL;
268 268
269 if (!lru && percpu_lru) 269 if (!lru && percpu_lru)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 6b572e2de7fb..980e8f1f6cb5 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -14,7 +14,7 @@ DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STO
14#ifdef CONFIG_CGROUP_BPF 14#ifdef CONFIG_CGROUP_BPF
15 15
16#define LOCAL_STORAGE_CREATE_FLAG_MASK \ 16#define LOCAL_STORAGE_CREATE_FLAG_MASK \
17 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 17 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
18 18
19struct bpf_cgroup_storage_map { 19struct bpf_cgroup_storage_map {
20 struct bpf_map map; 20 struct bpf_map map;
@@ -282,8 +282,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
282 if (attr->value_size > PAGE_SIZE) 282 if (attr->value_size > PAGE_SIZE)
283 return ERR_PTR(-E2BIG); 283 return ERR_PTR(-E2BIG);
284 284
285 if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK) 285 if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
286 /* reserved bits should not be used */ 286 !bpf_map_flags_access_ok(attr->map_flags))
287 return ERR_PTR(-EINVAL); 287 return ERR_PTR(-EINVAL);
288 288
289 if (attr->max_entries) 289 if (attr->max_entries)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 93a5cbbde421..e61630c2e50b 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -538,7 +538,7 @@ out:
538#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) 538#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
539 539
540#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \ 540#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \
541 BPF_F_RDONLY | BPF_F_WRONLY) 541 BPF_F_ACCESS_MASK)
542 542
543static struct bpf_map *trie_alloc(union bpf_attr *attr) 543static struct bpf_map *trie_alloc(union bpf_attr *attr)
544{ 544{
@@ -553,6 +553,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
553 if (attr->max_entries == 0 || 553 if (attr->max_entries == 0 ||
554 !(attr->map_flags & BPF_F_NO_PREALLOC) || 554 !(attr->map_flags & BPF_F_NO_PREALLOC) ||
555 attr->map_flags & ~LPM_CREATE_FLAG_MASK || 555 attr->map_flags & ~LPM_CREATE_FLAG_MASK ||
556 !bpf_map_flags_access_ok(attr->map_flags) ||
556 attr->key_size < LPM_KEY_SIZE_MIN || 557 attr->key_size < LPM_KEY_SIZE_MIN ||
557 attr->key_size > LPM_KEY_SIZE_MAX || 558 attr->key_size > LPM_KEY_SIZE_MAX ||
558 attr->value_size < LPM_VAL_SIZE_MIN || 559 attr->value_size < LPM_VAL_SIZE_MIN ||
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index b384ea9f3254..0b140d236889 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -11,8 +11,7 @@
11#include "percpu_freelist.h" 11#include "percpu_freelist.h"
12 12
13#define QUEUE_STACK_CREATE_FLAG_MASK \ 13#define QUEUE_STACK_CREATE_FLAG_MASK \
14 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 14 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
15
16 15
17struct bpf_queue_stack { 16struct bpf_queue_stack {
18 struct bpf_map map; 17 struct bpf_map map;
@@ -52,7 +51,8 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
52 /* check sanity of attributes */ 51 /* check sanity of attributes */
53 if (attr->max_entries == 0 || attr->key_size != 0 || 52 if (attr->max_entries == 0 || attr->key_size != 0 ||
54 attr->value_size == 0 || 53 attr->value_size == 0 ||
55 attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK) 54 attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK ||
55 !bpf_map_flags_access_ok(attr->map_flags))
56 return -EINVAL; 56 return -EINVAL;
57 57
58 if (attr->value_size > KMALLOC_MAX_SIZE) 58 if (attr->value_size > KMALLOC_MAX_SIZE)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index afca36f53c49..d995eedfdd16 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -166,13 +166,25 @@ void bpf_map_area_free(void *area)
166 kvfree(area); 166 kvfree(area);
167} 167}
168 168
169static u32 bpf_map_flags_retain_permanent(u32 flags)
170{
171 /* Some map creation flags are not tied to the map object but
172 * rather to the map fd instead, so they have no meaning upon
173 * map object inspection since multiple file descriptors with
174 * different (access) properties can exist here. Thus, given
175 * this has zero meaning for the map itself, lets clear these
176 * from here.
177 */
178 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
179}
180
169void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 181void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
170{ 182{
171 map->map_type = attr->map_type; 183 map->map_type = attr->map_type;
172 map->key_size = attr->key_size; 184 map->key_size = attr->key_size;
173 map->value_size = attr->value_size; 185 map->value_size = attr->value_size;
174 map->max_entries = attr->max_entries; 186 map->max_entries = attr->max_entries;
175 map->map_flags = attr->map_flags; 187 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
176 map->numa_node = bpf_map_attr_numa_node(attr); 188 map->numa_node = bpf_map_attr_numa_node(attr);
177} 189}
178 190
@@ -343,6 +355,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
343 return 0; 355 return 0;
344} 356}
345 357
358static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
359{
360 fmode_t mode = f.file->f_mode;
361
362 /* Our file permissions may have been overridden by global
363 * map permissions facing syscall side.
364 */
365 if (READ_ONCE(map->frozen))
366 mode &= ~FMODE_CAN_WRITE;
367 return mode;
368}
369
346#ifdef CONFIG_PROC_FS 370#ifdef CONFIG_PROC_FS
347static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 371static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
348{ 372{
@@ -364,14 +388,16 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
364 "max_entries:\t%u\n" 388 "max_entries:\t%u\n"
365 "map_flags:\t%#x\n" 389 "map_flags:\t%#x\n"
366 "memlock:\t%llu\n" 390 "memlock:\t%llu\n"
367 "map_id:\t%u\n", 391 "map_id:\t%u\n"
392 "frozen:\t%u\n",
368 map->map_type, 393 map->map_type,
369 map->key_size, 394 map->key_size,
370 map->value_size, 395 map->value_size,
371 map->max_entries, 396 map->max_entries,
372 map->map_flags, 397 map->map_flags,
373 map->pages * 1ULL << PAGE_SHIFT, 398 map->pages * 1ULL << PAGE_SHIFT,
374 map->id); 399 map->id,
400 READ_ONCE(map->frozen));
375 401
376 if (owner_prog_type) { 402 if (owner_prog_type) {
377 seq_printf(m, "owner_prog_type:\t%u\n", 403 seq_printf(m, "owner_prog_type:\t%u\n",
@@ -448,10 +474,10 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
448 const char *end = src + BPF_OBJ_NAME_LEN; 474 const char *end = src + BPF_OBJ_NAME_LEN;
449 475
450 memset(dst, 0, BPF_OBJ_NAME_LEN); 476 memset(dst, 0, BPF_OBJ_NAME_LEN);
451 477 /* Copy all isalnum(), '_' and '.' chars. */
452 /* Copy all isalnum() and '_' char */
453 while (src < end && *src) { 478 while (src < end && *src) {
454 if (!isalnum(*src) && *src != '_') 479 if (!isalnum(*src) &&
480 *src != '_' && *src != '.')
455 return -EINVAL; 481 return -EINVAL;
456 *dst++ = *src++; 482 *dst++ = *src++;
457 } 483 }
@@ -478,9 +504,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
478 u32 key_size, value_size; 504 u32 key_size, value_size;
479 int ret = 0; 505 int ret = 0;
480 506
481 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 507 /* Some maps allow key to be unspecified. */
482 if (!key_type || key_size != map->key_size) 508 if (btf_key_id) {
483 return -EINVAL; 509 key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
510 if (!key_type || key_size != map->key_size)
511 return -EINVAL;
512 } else {
513 key_type = btf_type_by_id(btf, 0);
514 if (!map->ops->map_check_btf)
515 return -EINVAL;
516 }
484 517
485 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 518 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
486 if (!value_type || value_size != map->value_size) 519 if (!value_type || value_size != map->value_size)
@@ -489,6 +522,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
489 map->spin_lock_off = btf_find_spin_lock(btf, value_type); 522 map->spin_lock_off = btf_find_spin_lock(btf, value_type);
490 523
491 if (map_value_has_spin_lock(map)) { 524 if (map_value_has_spin_lock(map)) {
525 if (map->map_flags & BPF_F_RDONLY_PROG)
526 return -EACCES;
492 if (map->map_type != BPF_MAP_TYPE_HASH && 527 if (map->map_type != BPF_MAP_TYPE_HASH &&
493 map->map_type != BPF_MAP_TYPE_ARRAY && 528 map->map_type != BPF_MAP_TYPE_ARRAY &&
494 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) 529 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
@@ -545,7 +580,7 @@ static int map_create(union bpf_attr *attr)
545 if (attr->btf_key_type_id || attr->btf_value_type_id) { 580 if (attr->btf_key_type_id || attr->btf_value_type_id) {
546 struct btf *btf; 581 struct btf *btf;
547 582
548 if (!attr->btf_key_type_id || !attr->btf_value_type_id) { 583 if (!attr->btf_value_type_id) {
549 err = -EINVAL; 584 err = -EINVAL;
550 goto free_map_nouncharge; 585 goto free_map_nouncharge;
551 } 586 }
@@ -713,8 +748,7 @@ static int map_lookup_elem(union bpf_attr *attr)
713 map = __bpf_map_get(f); 748 map = __bpf_map_get(f);
714 if (IS_ERR(map)) 749 if (IS_ERR(map))
715 return PTR_ERR(map); 750 return PTR_ERR(map);
716 751 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
717 if (!(f.file->f_mode & FMODE_CAN_READ)) {
718 err = -EPERM; 752 err = -EPERM;
719 goto err_put; 753 goto err_put;
720 } 754 }
@@ -843,8 +877,7 @@ static int map_update_elem(union bpf_attr *attr)
843 map = __bpf_map_get(f); 877 map = __bpf_map_get(f);
844 if (IS_ERR(map)) 878 if (IS_ERR(map))
845 return PTR_ERR(map); 879 return PTR_ERR(map);
846 880 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
847 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
848 err = -EPERM; 881 err = -EPERM;
849 goto err_put; 882 goto err_put;
850 } 883 }
@@ -955,8 +988,7 @@ static int map_delete_elem(union bpf_attr *attr)
955 map = __bpf_map_get(f); 988 map = __bpf_map_get(f);
956 if (IS_ERR(map)) 989 if (IS_ERR(map))
957 return PTR_ERR(map); 990 return PTR_ERR(map);
958 991 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
959 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
960 err = -EPERM; 992 err = -EPERM;
961 goto err_put; 993 goto err_put;
962 } 994 }
@@ -1007,8 +1039,7 @@ static int map_get_next_key(union bpf_attr *attr)
1007 map = __bpf_map_get(f); 1039 map = __bpf_map_get(f);
1008 if (IS_ERR(map)) 1040 if (IS_ERR(map))
1009 return PTR_ERR(map); 1041 return PTR_ERR(map);
1010 1042 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1011 if (!(f.file->f_mode & FMODE_CAN_READ)) {
1012 err = -EPERM; 1043 err = -EPERM;
1013 goto err_put; 1044 goto err_put;
1014 } 1045 }
@@ -1075,8 +1106,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
1075 map = __bpf_map_get(f); 1106 map = __bpf_map_get(f);
1076 if (IS_ERR(map)) 1107 if (IS_ERR(map))
1077 return PTR_ERR(map); 1108 return PTR_ERR(map);
1078 1109 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1079 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
1080 err = -EPERM; 1110 err = -EPERM;
1081 goto err_put; 1111 goto err_put;
1082 } 1112 }
@@ -1118,6 +1148,36 @@ err_put:
1118 return err; 1148 return err;
1119} 1149}
1120 1150
1151#define BPF_MAP_FREEZE_LAST_FIELD map_fd
1152
1153static int map_freeze(const union bpf_attr *attr)
1154{
1155 int err = 0, ufd = attr->map_fd;
1156 struct bpf_map *map;
1157 struct fd f;
1158
1159 if (CHECK_ATTR(BPF_MAP_FREEZE))
1160 return -EINVAL;
1161
1162 f = fdget(ufd);
1163 map = __bpf_map_get(f);
1164 if (IS_ERR(map))
1165 return PTR_ERR(map);
1166 if (READ_ONCE(map->frozen)) {
1167 err = -EBUSY;
1168 goto err_put;
1169 }
1170 if (!capable(CAP_SYS_ADMIN)) {
1171 err = -EPERM;
1172 goto err_put;
1173 }
1174
1175 WRITE_ONCE(map->frozen, true);
1176err_put:
1177 fdput(f);
1178 return err;
1179}
1180
1121static const struct bpf_prog_ops * const bpf_prog_types[] = { 1181static const struct bpf_prog_ops * const bpf_prog_types[] = {
1122#define BPF_PROG_TYPE(_id, _name) \ 1182#define BPF_PROG_TYPE(_id, _name) \
1123 [_id] = & _name ## _prog_ops, 1183 [_id] = & _name ## _prog_ops,
@@ -1557,7 +1617,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
1557 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1617 /* eBPF programs must be GPL compatible to use GPL-ed functions */
1558 is_gpl = license_is_gpl_compatible(license); 1618 is_gpl = license_is_gpl_compatible(license);
1559 1619
1560 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1620 if (attr->insn_cnt == 0 ||
1621 attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
1561 return -E2BIG; 1622 return -E2BIG;
1562 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1623 if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
1563 type != BPF_PROG_TYPE_CGROUP_SKB && 1624 type != BPF_PROG_TYPE_CGROUP_SKB &&
@@ -1948,7 +2009,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
1948 return cgroup_bpf_prog_query(attr, uattr); 2009 return cgroup_bpf_prog_query(attr, uattr);
1949} 2010}
1950 2011
1951#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 2012#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
1952 2013
1953static int bpf_prog_test_run(const union bpf_attr *attr, 2014static int bpf_prog_test_run(const union bpf_attr *attr,
1954 union bpf_attr __user *uattr) 2015 union bpf_attr __user *uattr)
@@ -1961,6 +2022,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
1961 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 2022 if (CHECK_ATTR(BPF_PROG_TEST_RUN))
1962 return -EINVAL; 2023 return -EINVAL;
1963 2024
2025 if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
2026 (!attr->test.ctx_size_in && attr->test.ctx_in))
2027 return -EINVAL;
2028
2029 if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
2030 (!attr->test.ctx_size_out && attr->test.ctx_out))
2031 return -EINVAL;
2032
1964 prog = bpf_prog_get(attr->test.prog_fd); 2033 prog = bpf_prog_get(attr->test.prog_fd);
1965 if (IS_ERR(prog)) 2034 if (IS_ERR(prog))
1966 return PTR_ERR(prog); 2035 return PTR_ERR(prog);
@@ -2071,13 +2140,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
2071} 2140}
2072 2141
2073static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 2142static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
2074 unsigned long addr) 2143 unsigned long addr, u32 *off,
2144 u32 *type)
2075{ 2145{
2146 const struct bpf_map *map;
2076 int i; 2147 int i;
2077 2148
2078 for (i = 0; i < prog->aux->used_map_cnt; i++) 2149 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
2079 if (prog->aux->used_maps[i] == (void *)addr) 2150 map = prog->aux->used_maps[i];
2080 return prog->aux->used_maps[i]; 2151 if (map == (void *)addr) {
2152 *type = BPF_PSEUDO_MAP_FD;
2153 return map;
2154 }
2155 if (!map->ops->map_direct_value_meta)
2156 continue;
2157 if (!map->ops->map_direct_value_meta(map, addr, off)) {
2158 *type = BPF_PSEUDO_MAP_VALUE;
2159 return map;
2160 }
2161 }
2162
2081 return NULL; 2163 return NULL;
2082} 2164}
2083 2165
@@ -2085,6 +2167,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
2085{ 2167{
2086 const struct bpf_map *map; 2168 const struct bpf_map *map;
2087 struct bpf_insn *insns; 2169 struct bpf_insn *insns;
2170 u32 off, type;
2088 u64 imm; 2171 u64 imm;
2089 int i; 2172 int i;
2090 2173
@@ -2112,11 +2195,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
2112 continue; 2195 continue;
2113 2196
2114 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 2197 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
2115 map = bpf_map_from_imm(prog, imm); 2198 map = bpf_map_from_imm(prog, imm, &off, &type);
2116 if (map) { 2199 if (map) {
2117 insns[i].src_reg = BPF_PSEUDO_MAP_FD; 2200 insns[i].src_reg = type;
2118 insns[i].imm = map->id; 2201 insns[i].imm = map->id;
2119 insns[i + 1].imm = 0; 2202 insns[i + 1].imm = off;
2120 continue; 2203 continue;
2121 } 2204 }
2122 } 2205 }
@@ -2706,6 +2789,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
2706 case BPF_MAP_GET_NEXT_KEY: 2789 case BPF_MAP_GET_NEXT_KEY:
2707 err = map_get_next_key(&attr); 2790 err = map_get_next_key(&attr);
2708 break; 2791 break;
2792 case BPF_MAP_FREEZE:
2793 err = map_freeze(&attr);
2794 break;
2709 case BPF_PROG_LOAD: 2795 case BPF_PROG_LOAD:
2710 err = bpf_prog_load(&attr, uattr); 2796 err = bpf_prog_load(&attr, uattr);
2711 break; 2797 break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b7ad8003c4e6..f25b7c9c20ba 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -176,7 +176,6 @@ struct bpf_verifier_stack_elem {
176 struct bpf_verifier_stack_elem *next; 176 struct bpf_verifier_stack_elem *next;
177}; 177};
178 178
179#define BPF_COMPLEXITY_LIMIT_INSNS 131072
180#define BPF_COMPLEXITY_LIMIT_STACK 1024 179#define BPF_COMPLEXITY_LIMIT_STACK 1024
181#define BPF_COMPLEXITY_LIMIT_STATES 64 180#define BPF_COMPLEXITY_LIMIT_STATES 64
182 181
@@ -1092,7 +1091,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
1092 */ 1091 */
1093 subprog[env->subprog_cnt].start = insn_cnt; 1092 subprog[env->subprog_cnt].start = insn_cnt;
1094 1093
1095 if (env->log.level > 1) 1094 if (env->log.level & BPF_LOG_LEVEL2)
1096 for (i = 0; i < env->subprog_cnt; i++) 1095 for (i = 0; i < env->subprog_cnt; i++)
1097 verbose(env, "func#%d @%d\n", i, subprog[i].start); 1096 verbose(env, "func#%d @%d\n", i, subprog[i].start);
1098 1097
@@ -1139,6 +1138,7 @@ static int mark_reg_read(struct bpf_verifier_env *env,
1139 struct bpf_reg_state *parent) 1138 struct bpf_reg_state *parent)
1140{ 1139{
1141 bool writes = parent == state->parent; /* Observe write marks */ 1140 bool writes = parent == state->parent; /* Observe write marks */
1141 int cnt = 0;
1142 1142
1143 while (parent) { 1143 while (parent) {
1144 /* if read wasn't screened by an earlier write ... */ 1144 /* if read wasn't screened by an earlier write ... */
@@ -1150,12 +1150,25 @@ static int mark_reg_read(struct bpf_verifier_env *env,
1150 parent->var_off.value, parent->off); 1150 parent->var_off.value, parent->off);
1151 return -EFAULT; 1151 return -EFAULT;
1152 } 1152 }
1153 if (parent->live & REG_LIVE_READ)
1154 /* The parentage chain never changes and
1155 * this parent was already marked as LIVE_READ.
1156 * There is no need to keep walking the chain again and
1157 * keep re-marking all parents as LIVE_READ.
1158 * This case happens when the same register is read
1159 * multiple times without writes into it in-between.
1160 */
1161 break;
1153 /* ... then we depend on parent's value */ 1162 /* ... then we depend on parent's value */
1154 parent->live |= REG_LIVE_READ; 1163 parent->live |= REG_LIVE_READ;
1155 state = parent; 1164 state = parent;
1156 parent = state->parent; 1165 parent = state->parent;
1157 writes = true; 1166 writes = true;
1167 cnt++;
1158 } 1168 }
1169
1170 if (env->longest_mark_read_walk < cnt)
1171 env->longest_mark_read_walk = cnt;
1159 return 0; 1172 return 0;
1160} 1173}
1161 1174
@@ -1413,7 +1426,7 @@ static int check_stack_access(struct bpf_verifier_env *env,
1413 char tn_buf[48]; 1426 char tn_buf[48];
1414 1427
1415 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 1428 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1416 verbose(env, "variable stack access var_off=%s off=%d size=%d", 1429 verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
1417 tn_buf, off, size); 1430 tn_buf, off, size);
1418 return -EACCES; 1431 return -EACCES;
1419 } 1432 }
@@ -1426,6 +1439,28 @@ static int check_stack_access(struct bpf_verifier_env *env,
1426 return 0; 1439 return 0;
1427} 1440}
1428 1441
1442static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
1443 int off, int size, enum bpf_access_type type)
1444{
1445 struct bpf_reg_state *regs = cur_regs(env);
1446 struct bpf_map *map = regs[regno].map_ptr;
1447 u32 cap = bpf_map_flags_to_cap(map);
1448
1449 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
1450 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
1451 map->value_size, off, size);
1452 return -EACCES;
1453 }
1454
1455 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
1456 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
1457 map->value_size, off, size);
1458 return -EACCES;
1459 }
1460
1461 return 0;
1462}
1463
1429/* check read/write into map element returned by bpf_map_lookup_elem() */ 1464/* check read/write into map element returned by bpf_map_lookup_elem() */
1430static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, 1465static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1431 int size, bool zero_size_allowed) 1466 int size, bool zero_size_allowed)
@@ -1455,7 +1490,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1455 * need to try adding each of min_value and max_value to off 1490 * need to try adding each of min_value and max_value to off
1456 * to make sure our theoretical access will be safe. 1491 * to make sure our theoretical access will be safe.
1457 */ 1492 */
1458 if (env->log.level) 1493 if (env->log.level & BPF_LOG_LEVEL)
1459 print_verifier_state(env, state); 1494 print_verifier_state(env, state);
1460 1495
1461 /* The minimum value is only important with signed 1496 /* The minimum value is only important with signed
@@ -2012,7 +2047,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
2012 verbose(env, "R%d leaks addr into map\n", value_regno); 2047 verbose(env, "R%d leaks addr into map\n", value_regno);
2013 return -EACCES; 2048 return -EACCES;
2014 } 2049 }
2015 2050 err = check_map_access_type(env, regno, off, size, t);
2051 if (err)
2052 return err;
2016 err = check_map_access(env, regno, off, size, false); 2053 err = check_map_access(env, regno, off, size, false);
2017 if (!err && t == BPF_READ && value_regno >= 0) 2054 if (!err && t == BPF_READ && value_regno >= 0)
2018 mark_reg_unknown(env, regs, value_regno); 2055 mark_reg_unknown(env, regs, value_regno);
@@ -2158,6 +2195,29 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
2158 BPF_SIZE(insn->code), BPF_WRITE, -1, true); 2195 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
2159} 2196}
2160 2197
2198static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
2199 int off, int access_size,
2200 bool zero_size_allowed)
2201{
2202 struct bpf_reg_state *reg = reg_state(env, regno);
2203
2204 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
2205 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
2206 if (tnum_is_const(reg->var_off)) {
2207 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
2208 regno, off, access_size);
2209 } else {
2210 char tn_buf[48];
2211
2212 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2213 verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
2214 regno, tn_buf, access_size);
2215 }
2216 return -EACCES;
2217 }
2218 return 0;
2219}
2220
2161/* when register 'regno' is passed into function that will read 'access_size' 2221/* when register 'regno' is passed into function that will read 'access_size'
2162 * bytes from that pointer, make sure that it's within stack boundary 2222 * bytes from that pointer, make sure that it's within stack boundary
2163 * and all elements of stack are initialized. 2223 * and all elements of stack are initialized.
@@ -2170,7 +2230,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2170{ 2230{
2171 struct bpf_reg_state *reg = reg_state(env, regno); 2231 struct bpf_reg_state *reg = reg_state(env, regno);
2172 struct bpf_func_state *state = func(env, reg); 2232 struct bpf_func_state *state = func(env, reg);
2173 int off, i, slot, spi; 2233 int err, min_off, max_off, i, slot, spi;
2174 2234
2175 if (reg->type != PTR_TO_STACK) { 2235 if (reg->type != PTR_TO_STACK) {
2176 /* Allow zero-byte read from NULL, regardless of pointer type */ 2236 /* Allow zero-byte read from NULL, regardless of pointer type */
@@ -2184,21 +2244,57 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2184 return -EACCES; 2244 return -EACCES;
2185 } 2245 }
2186 2246
2187 /* Only allow fixed-offset stack reads */ 2247 if (tnum_is_const(reg->var_off)) {
2188 if (!tnum_is_const(reg->var_off)) { 2248 min_off = max_off = reg->var_off.value + reg->off;
2189 char tn_buf[48]; 2249 err = __check_stack_boundary(env, regno, min_off, access_size,
2250 zero_size_allowed);
2251 if (err)
2252 return err;
2253 } else {
2254 /* Variable offset is prohibited for unprivileged mode for
2255 * simplicity since it requires corresponding support in
2256 * Spectre masking for stack ALU.
2257 * See also retrieve_ptr_limit().
2258 */
2259 if (!env->allow_ptr_leaks) {
2260 char tn_buf[48];
2190 2261
2191 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2262 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2192 verbose(env, "invalid variable stack read R%d var_off=%s\n", 2263 verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
2193 regno, tn_buf); 2264 regno, tn_buf);
2194 return -EACCES; 2265 return -EACCES;
2195 } 2266 }
2196 off = reg->off + reg->var_off.value; 2267 /* Only initialized buffer on stack is allowed to be accessed
2197 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 2268 * with variable offset. With uninitialized buffer it's hard to
2198 access_size < 0 || (access_size == 0 && !zero_size_allowed)) { 2269 * guarantee that whole memory is marked as initialized on
2199 verbose(env, "invalid stack type R%d off=%d access_size=%d\n", 2270 * helper return since specific bounds are unknown what may
2200 regno, off, access_size); 2271 * cause uninitialized stack leaking.
2201 return -EACCES; 2272 */
2273 if (meta && meta->raw_mode)
2274 meta = NULL;
2275
2276 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
2277 reg->smax_value <= -BPF_MAX_VAR_OFF) {
2278 verbose(env, "R%d unbounded indirect variable offset stack access\n",
2279 regno);
2280 return -EACCES;
2281 }
2282 min_off = reg->smin_value + reg->off;
2283 max_off = reg->smax_value + reg->off;
2284 err = __check_stack_boundary(env, regno, min_off, access_size,
2285 zero_size_allowed);
2286 if (err) {
2287 verbose(env, "R%d min value is outside of stack bound\n",
2288 regno);
2289 return err;
2290 }
2291 err = __check_stack_boundary(env, regno, max_off, access_size,
2292 zero_size_allowed);
2293 if (err) {
2294 verbose(env, "R%d max value is outside of stack bound\n",
2295 regno);
2296 return err;
2297 }
2202 } 2298 }
2203 2299
2204 if (meta && meta->raw_mode) { 2300 if (meta && meta->raw_mode) {
@@ -2207,10 +2303,10 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2207 return 0; 2303 return 0;
2208 } 2304 }
2209 2305
2210 for (i = 0; i < access_size; i++) { 2306 for (i = min_off; i < max_off + access_size; i++) {
2211 u8 *stype; 2307 u8 *stype;
2212 2308
2213 slot = -(off + i) - 1; 2309 slot = -i - 1;
2214 spi = slot / BPF_REG_SIZE; 2310 spi = slot / BPF_REG_SIZE;
2215 if (state->allocated_stack <= slot) 2311 if (state->allocated_stack <= slot)
2216 goto err; 2312 goto err;
@@ -2223,8 +2319,16 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2223 goto mark; 2319 goto mark;
2224 } 2320 }
2225err: 2321err:
2226 verbose(env, "invalid indirect read from stack off %d+%d size %d\n", 2322 if (tnum_is_const(reg->var_off)) {
2227 off, i, access_size); 2323 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2324 min_off, i - min_off, access_size);
2325 } else {
2326 char tn_buf[48];
2327
2328 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2329 verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
2330 tn_buf, i - min_off, access_size);
2331 }
2228 return -EACCES; 2332 return -EACCES;
2229mark: 2333mark:
2230 /* reading any byte out of 8-byte 'spill_slot' will cause 2334 /* reading any byte out of 8-byte 'spill_slot' will cause
@@ -2233,7 +2337,7 @@ mark:
2233 mark_reg_read(env, &state->stack[spi].spilled_ptr, 2337 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2234 state->stack[spi].spilled_ptr.parent); 2338 state->stack[spi].spilled_ptr.parent);
2235 } 2339 }
2236 return update_stack_depth(env, state, off); 2340 return update_stack_depth(env, state, min_off);
2237} 2341}
2238 2342
2239static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, 2343static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@ -2248,6 +2352,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2248 return check_packet_access(env, regno, reg->off, access_size, 2352 return check_packet_access(env, regno, reg->off, access_size,
2249 zero_size_allowed); 2353 zero_size_allowed);
2250 case PTR_TO_MAP_VALUE: 2354 case PTR_TO_MAP_VALUE:
2355 if (check_map_access_type(env, regno, reg->off, access_size,
2356 meta && meta->raw_mode ? BPF_WRITE :
2357 BPF_READ))
2358 return -EACCES;
2251 return check_map_access(env, regno, reg->off, access_size, 2359 return check_map_access(env, regno, reg->off, access_size,
2252 zero_size_allowed); 2360 zero_size_allowed);
2253 default: /* scalar_value|ptr_to_stack or invalid ptr */ 2361 default: /* scalar_value|ptr_to_stack or invalid ptr */
@@ -2906,7 +3014,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2906 /* and go analyze first insn of the callee */ 3014 /* and go analyze first insn of the callee */
2907 *insn_idx = target_insn; 3015 *insn_idx = target_insn;
2908 3016
2909 if (env->log.level) { 3017 if (env->log.level & BPF_LOG_LEVEL) {
2910 verbose(env, "caller:\n"); 3018 verbose(env, "caller:\n");
2911 print_verifier_state(env, caller); 3019 print_verifier_state(env, caller);
2912 verbose(env, "callee:\n"); 3020 verbose(env, "callee:\n");
@@ -2946,7 +3054,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2946 return err; 3054 return err;
2947 3055
2948 *insn_idx = callee->callsite + 1; 3056 *insn_idx = callee->callsite + 1;
2949 if (env->log.level) { 3057 if (env->log.level & BPF_LOG_LEVEL) {
2950 verbose(env, "returning from callee:\n"); 3058 verbose(env, "returning from callee:\n");
2951 print_verifier_state(env, callee); 3059 print_verifier_state(env, callee);
2952 verbose(env, "to caller at %d:\n", *insn_idx); 3060 verbose(env, "to caller at %d:\n", *insn_idx);
@@ -2980,6 +3088,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2980 int func_id, int insn_idx) 3088 int func_id, int insn_idx)
2981{ 3089{
2982 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 3090 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
3091 struct bpf_map *map = meta->map_ptr;
2983 3092
2984 if (func_id != BPF_FUNC_tail_call && 3093 if (func_id != BPF_FUNC_tail_call &&
2985 func_id != BPF_FUNC_map_lookup_elem && 3094 func_id != BPF_FUNC_map_lookup_elem &&
@@ -2990,11 +3099,24 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2990 func_id != BPF_FUNC_map_peek_elem) 3099 func_id != BPF_FUNC_map_peek_elem)
2991 return 0; 3100 return 0;
2992 3101
2993 if (meta->map_ptr == NULL) { 3102 if (map == NULL) {
2994 verbose(env, "kernel subsystem misconfigured verifier\n"); 3103 verbose(env, "kernel subsystem misconfigured verifier\n");
2995 return -EINVAL; 3104 return -EINVAL;
2996 } 3105 }
2997 3106
3107 /* In case of read-only, some additional restrictions
3108 * need to be applied in order to prevent altering the
3109 * state of the map from program side.
3110 */
3111 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
3112 (func_id == BPF_FUNC_map_delete_elem ||
3113 func_id == BPF_FUNC_map_update_elem ||
3114 func_id == BPF_FUNC_map_push_elem ||
3115 func_id == BPF_FUNC_map_pop_elem)) {
3116 verbose(env, "write into map forbidden\n");
3117 return -EACCES;
3118 }
3119
2998 if (!BPF_MAP_PTR(aux->map_state)) 3120 if (!BPF_MAP_PTR(aux->map_state))
2999 bpf_map_ptr_store(aux, meta->map_ptr, 3121 bpf_map_ptr_store(aux, meta->map_ptr,
3000 meta->map_ptr->unpriv_array); 3122 meta->map_ptr->unpriv_array);
@@ -3285,6 +3407,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3285 3407
3286 switch (ptr_reg->type) { 3408 switch (ptr_reg->type) {
3287 case PTR_TO_STACK: 3409 case PTR_TO_STACK:
3410 /* Indirect variable offset stack access is prohibited in
3411 * unprivileged mode so it's not handled here.
3412 */
3288 off = ptr_reg->off + ptr_reg->var_off.value; 3413 off = ptr_reg->off + ptr_reg->var_off.value;
3289 if (mask_to_left) 3414 if (mask_to_left)
3290 *ptr_limit = MAX_BPF_STACK + off; 3415 *ptr_limit = MAX_BPF_STACK + off;
@@ -4969,23 +5094,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4969 insn->dst_reg); 5094 insn->dst_reg);
4970 return -EACCES; 5095 return -EACCES;
4971 } 5096 }
4972 if (env->log.level) 5097 if (env->log.level & BPF_LOG_LEVEL)
4973 print_verifier_state(env, this_branch->frame[this_branch->curframe]); 5098 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
4974 return 0; 5099 return 0;
4975} 5100}
4976 5101
4977/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4978static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4979{
4980 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4981
4982 return (struct bpf_map *) (unsigned long) imm64;
4983}
4984
4985/* verify BPF_LD_IMM64 instruction */ 5102/* verify BPF_LD_IMM64 instruction */
4986static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) 5103static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
4987{ 5104{
5105 struct bpf_insn_aux_data *aux = cur_aux(env);
4988 struct bpf_reg_state *regs = cur_regs(env); 5106 struct bpf_reg_state *regs = cur_regs(env);
5107 struct bpf_map *map;
4989 int err; 5108 int err;
4990 5109
4991 if (BPF_SIZE(insn->code) != BPF_DW) { 5110 if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -5009,11 +5128,22 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
5009 return 0; 5128 return 0;
5010 } 5129 }
5011 5130
5012 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ 5131 map = env->used_maps[aux->map_index];
5013 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); 5132 mark_reg_known_zero(env, regs, insn->dst_reg);
5133 regs[insn->dst_reg].map_ptr = map;
5134
5135 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
5136 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
5137 regs[insn->dst_reg].off = aux->map_off;
5138 if (map_value_has_spin_lock(map))
5139 regs[insn->dst_reg].id = ++env->id_gen;
5140 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
5141 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5142 } else {
5143 verbose(env, "bpf verifier is misconfigured\n");
5144 return -EINVAL;
5145 }
5014 5146
5015 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5016 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
5017 return 0; 5147 return 0;
5018} 5148}
5019 5149
@@ -5267,13 +5397,13 @@ static int check_cfg(struct bpf_verifier_env *env)
5267 int ret = 0; 5397 int ret = 0;
5268 int i, t; 5398 int i, t;
5269 5399
5270 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 5400 insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5271 if (!insn_state) 5401 if (!insn_state)
5272 return -ENOMEM; 5402 return -ENOMEM;
5273 5403
5274 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 5404 insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5275 if (!insn_stack) { 5405 if (!insn_stack) {
5276 kfree(insn_state); 5406 kvfree(insn_state);
5277 return -ENOMEM; 5407 return -ENOMEM;
5278 } 5408 }
5279 5409
@@ -5371,8 +5501,8 @@ check_state:
5371 ret = 0; /* cfg looks good */ 5501 ret = 0; /* cfg looks good */
5372 5502
5373err_free: 5503err_free:
5374 kfree(insn_state); 5504 kvfree(insn_state);
5375 kfree(insn_stack); 5505 kvfree(insn_stack);
5376 return ret; 5506 return ret;
5377} 5507}
5378 5508
@@ -6115,11 +6245,13 @@ static int propagate_liveness(struct bpf_verifier_env *env,
6115static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 6245static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6116{ 6246{
6117 struct bpf_verifier_state_list *new_sl; 6247 struct bpf_verifier_state_list *new_sl;
6118 struct bpf_verifier_state_list *sl; 6248 struct bpf_verifier_state_list *sl, **pprev;
6119 struct bpf_verifier_state *cur = env->cur_state, *new; 6249 struct bpf_verifier_state *cur = env->cur_state, *new;
6120 int i, j, err, states_cnt = 0; 6250 int i, j, err, states_cnt = 0;
6121 6251
6122 sl = env->explored_states[insn_idx]; 6252 pprev = &env->explored_states[insn_idx];
6253 sl = *pprev;
6254
6123 if (!sl) 6255 if (!sl)
6124 /* this 'insn_idx' instruction wasn't marked, so we will not 6256 /* this 'insn_idx' instruction wasn't marked, so we will not
6125 * be doing state search here 6257 * be doing state search here
@@ -6130,6 +6262,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6130 6262
6131 while (sl != STATE_LIST_MARK) { 6263 while (sl != STATE_LIST_MARK) {
6132 if (states_equal(env, &sl->state, cur)) { 6264 if (states_equal(env, &sl->state, cur)) {
6265 sl->hit_cnt++;
6133 /* reached equivalent register/stack state, 6266 /* reached equivalent register/stack state,
6134 * prune the search. 6267 * prune the search.
6135 * Registers read by the continuation are read by us. 6268 * Registers read by the continuation are read by us.
@@ -6145,10 +6278,40 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6145 return err; 6278 return err;
6146 return 1; 6279 return 1;
6147 } 6280 }
6148 sl = sl->next;
6149 states_cnt++; 6281 states_cnt++;
6282 sl->miss_cnt++;
6283 /* heuristic to determine whether this state is beneficial
6284 * to keep checking from state equivalence point of view.
6285 * Higher numbers increase max_states_per_insn and verification time,
6286 * but do not meaningfully decrease insn_processed.
6287 */
6288 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
6289 /* the state is unlikely to be useful. Remove it to
6290 * speed up verification
6291 */
6292 *pprev = sl->next;
6293 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
6294 free_verifier_state(&sl->state, false);
6295 kfree(sl);
6296 env->peak_states--;
6297 } else {
6298 /* cannot free this state, since parentage chain may
6299 * walk it later. Add it for free_list instead to
6300 * be freed at the end of verification
6301 */
6302 sl->next = env->free_list;
6303 env->free_list = sl;
6304 }
6305 sl = *pprev;
6306 continue;
6307 }
6308 pprev = &sl->next;
6309 sl = *pprev;
6150 } 6310 }
6151 6311
6312 if (env->max_states_per_insn < states_cnt)
6313 env->max_states_per_insn = states_cnt;
6314
6152 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 6315 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
6153 return 0; 6316 return 0;
6154 6317
@@ -6162,6 +6325,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6162 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); 6325 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
6163 if (!new_sl) 6326 if (!new_sl)
6164 return -ENOMEM; 6327 return -ENOMEM;
6328 env->total_states++;
6329 env->peak_states++;
6165 6330
6166 /* add new state to the head of linked list */ 6331 /* add new state to the head of linked list */
6167 new = &new_sl->state; 6332 new = &new_sl->state;
@@ -6246,8 +6411,7 @@ static int do_check(struct bpf_verifier_env *env)
6246 struct bpf_verifier_state *state; 6411 struct bpf_verifier_state *state;
6247 struct bpf_insn *insns = env->prog->insnsi; 6412 struct bpf_insn *insns = env->prog->insnsi;
6248 struct bpf_reg_state *regs; 6413 struct bpf_reg_state *regs;
6249 int insn_cnt = env->prog->len, i; 6414 int insn_cnt = env->prog->len;
6250 int insn_processed = 0;
6251 bool do_print_state = false; 6415 bool do_print_state = false;
6252 6416
6253 env->prev_linfo = NULL; 6417 env->prev_linfo = NULL;
@@ -6282,10 +6446,10 @@ static int do_check(struct bpf_verifier_env *env)
6282 insn = &insns[env->insn_idx]; 6446 insn = &insns[env->insn_idx];
6283 class = BPF_CLASS(insn->code); 6447 class = BPF_CLASS(insn->code);
6284 6448
6285 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { 6449 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
6286 verbose(env, 6450 verbose(env,
6287 "BPF program is too large. Processed %d insn\n", 6451 "BPF program is too large. Processed %d insn\n",
6288 insn_processed); 6452 env->insn_processed);
6289 return -E2BIG; 6453 return -E2BIG;
6290 } 6454 }
6291 6455
@@ -6294,7 +6458,7 @@ static int do_check(struct bpf_verifier_env *env)
6294 return err; 6458 return err;
6295 if (err == 1) { 6459 if (err == 1) {
6296 /* found equivalent state, can prune the search */ 6460 /* found equivalent state, can prune the search */
6297 if (env->log.level) { 6461 if (env->log.level & BPF_LOG_LEVEL) {
6298 if (do_print_state) 6462 if (do_print_state)
6299 verbose(env, "\nfrom %d to %d%s: safe\n", 6463 verbose(env, "\nfrom %d to %d%s: safe\n",
6300 env->prev_insn_idx, env->insn_idx, 6464 env->prev_insn_idx, env->insn_idx,
@@ -6312,8 +6476,9 @@ static int do_check(struct bpf_verifier_env *env)
6312 if (need_resched()) 6476 if (need_resched())
6313 cond_resched(); 6477 cond_resched();
6314 6478
6315 if (env->log.level > 1 || (env->log.level && do_print_state)) { 6479 if (env->log.level & BPF_LOG_LEVEL2 ||
6316 if (env->log.level > 1) 6480 (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
6481 if (env->log.level & BPF_LOG_LEVEL2)
6317 verbose(env, "%d:", env->insn_idx); 6482 verbose(env, "%d:", env->insn_idx);
6318 else 6483 else
6319 verbose(env, "\nfrom %d to %d%s:", 6484 verbose(env, "\nfrom %d to %d%s:",
@@ -6324,7 +6489,7 @@ static int do_check(struct bpf_verifier_env *env)
6324 do_print_state = false; 6489 do_print_state = false;
6325 } 6490 }
6326 6491
6327 if (env->log.level) { 6492 if (env->log.level & BPF_LOG_LEVEL) {
6328 const struct bpf_insn_cbs cbs = { 6493 const struct bpf_insn_cbs cbs = {
6329 .cb_print = verbose, 6494 .cb_print = verbose,
6330 .private_data = env, 6495 .private_data = env,
@@ -6589,16 +6754,6 @@ process_bpf_exit:
6589 env->insn_idx++; 6754 env->insn_idx++;
6590 } 6755 }
6591 6756
6592 verbose(env, "processed %d insns (limit %d), stack depth ",
6593 insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
6594 for (i = 0; i < env->subprog_cnt; i++) {
6595 u32 depth = env->subprog_info[i].stack_depth;
6596
6597 verbose(env, "%d", depth);
6598 if (i + 1 < env->subprog_cnt)
6599 verbose(env, "+");
6600 }
6601 verbose(env, "\n");
6602 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 6757 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
6603 return 0; 6758 return 0;
6604} 6759}
@@ -6696,8 +6851,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
6696 } 6851 }
6697 6852
6698 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { 6853 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
6854 struct bpf_insn_aux_data *aux;
6699 struct bpf_map *map; 6855 struct bpf_map *map;
6700 struct fd f; 6856 struct fd f;
6857 u64 addr;
6701 6858
6702 if (i == insn_cnt - 1 || insn[1].code != 0 || 6859 if (i == insn_cnt - 1 || insn[1].code != 0 ||
6703 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || 6860 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@ -6706,13 +6863,19 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
6706 return -EINVAL; 6863 return -EINVAL;
6707 } 6864 }
6708 6865
6709 if (insn->src_reg == 0) 6866 if (insn[0].src_reg == 0)
6710 /* valid generic load 64-bit imm */ 6867 /* valid generic load 64-bit imm */
6711 goto next_insn; 6868 goto next_insn;
6712 6869
6713 if (insn[0].src_reg != BPF_PSEUDO_MAP_FD || 6870 /* In final convert_pseudo_ld_imm64() step, this is
6714 insn[1].imm != 0) { 6871 * converted into regular 64-bit imm load insn.
6715 verbose(env, "unrecognized bpf_ld_imm64 insn\n"); 6872 */
6873 if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
6874 insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
6875 (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
6876 insn[1].imm != 0)) {
6877 verbose(env,
6878 "unrecognized bpf_ld_imm64 insn\n");
6716 return -EINVAL; 6879 return -EINVAL;
6717 } 6880 }
6718 6881
@@ -6730,16 +6893,47 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
6730 return err; 6893 return err;
6731 } 6894 }
6732 6895
6733 /* store map pointer inside BPF_LD_IMM64 instruction */ 6896 aux = &env->insn_aux_data[i];
6734 insn[0].imm = (u32) (unsigned long) map; 6897 if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
6735 insn[1].imm = ((u64) (unsigned long) map) >> 32; 6898 addr = (unsigned long)map;
6899 } else {
6900 u32 off = insn[1].imm;
6901
6902 if (off >= BPF_MAX_VAR_OFF) {
6903 verbose(env, "direct value offset of %u is not allowed\n", off);
6904 fdput(f);
6905 return -EINVAL;
6906 }
6907
6908 if (!map->ops->map_direct_value_addr) {
6909 verbose(env, "no direct value access support for this map type\n");
6910 fdput(f);
6911 return -EINVAL;
6912 }
6913
6914 err = map->ops->map_direct_value_addr(map, &addr, off);
6915 if (err) {
6916 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
6917 map->value_size, off);
6918 fdput(f);
6919 return err;
6920 }
6921
6922 aux->map_off = off;
6923 addr += off;
6924 }
6925
6926 insn[0].imm = (u32)addr;
6927 insn[1].imm = addr >> 32;
6736 6928
6737 /* check whether we recorded this map already */ 6929 /* check whether we recorded this map already */
6738 for (j = 0; j < env->used_map_cnt; j++) 6930 for (j = 0; j < env->used_map_cnt; j++) {
6739 if (env->used_maps[j] == map) { 6931 if (env->used_maps[j] == map) {
6932 aux->map_index = j;
6740 fdput(f); 6933 fdput(f);
6741 goto next_insn; 6934 goto next_insn;
6742 } 6935 }
6936 }
6743 6937
6744 if (env->used_map_cnt >= MAX_USED_MAPS) { 6938 if (env->used_map_cnt >= MAX_USED_MAPS) {
6745 fdput(f); 6939 fdput(f);
@@ -6756,6 +6950,8 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
6756 fdput(f); 6950 fdput(f);
6757 return PTR_ERR(map); 6951 return PTR_ERR(map);
6758 } 6952 }
6953
6954 aux->map_index = env->used_map_cnt;
6759 env->used_maps[env->used_map_cnt++] = map; 6955 env->used_maps[env->used_map_cnt++] = map;
6760 6956
6761 if (bpf_map_is_cgroup_storage(map) && 6957 if (bpf_map_is_cgroup_storage(map) &&
@@ -6861,8 +7057,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
6861 struct bpf_prog *new_prog; 7057 struct bpf_prog *new_prog;
6862 7058
6863 new_prog = bpf_patch_insn_single(env->prog, off, patch, len); 7059 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
6864 if (!new_prog) 7060 if (IS_ERR(new_prog)) {
7061 if (PTR_ERR(new_prog) == -ERANGE)
7062 verbose(env,
7063 "insn %d cannot be patched due to 16-bit range\n",
7064 env->insn_aux_data[off].orig_idx);
6865 return NULL; 7065 return NULL;
7066 }
6866 if (adjust_insn_aux_data(env, new_prog->len, off, len)) 7067 if (adjust_insn_aux_data(env, new_prog->len, off, len))
6867 return NULL; 7068 return NULL;
6868 adjust_subprog_starts(env, off, len); 7069 adjust_subprog_starts(env, off, len);
@@ -7804,6 +8005,14 @@ static void free_states(struct bpf_verifier_env *env)
7804 struct bpf_verifier_state_list *sl, *sln; 8005 struct bpf_verifier_state_list *sl, *sln;
7805 int i; 8006 int i;
7806 8007
8008 sl = env->free_list;
8009 while (sl) {
8010 sln = sl->next;
8011 free_verifier_state(&sl->state, false);
8012 kfree(sl);
8013 sl = sln;
8014 }
8015
7807 if (!env->explored_states) 8016 if (!env->explored_states)
7808 return; 8017 return;
7809 8018
@@ -7819,12 +8028,37 @@ static void free_states(struct bpf_verifier_env *env)
7819 } 8028 }
7820 } 8029 }
7821 8030
7822 kfree(env->explored_states); 8031 kvfree(env->explored_states);
8032}
8033
8034static void print_verification_stats(struct bpf_verifier_env *env)
8035{
8036 int i;
8037
8038 if (env->log.level & BPF_LOG_STATS) {
8039 verbose(env, "verification time %lld usec\n",
8040 div_u64(env->verification_time, 1000));
8041 verbose(env, "stack depth ");
8042 for (i = 0; i < env->subprog_cnt; i++) {
8043 u32 depth = env->subprog_info[i].stack_depth;
8044
8045 verbose(env, "%d", depth);
8046 if (i + 1 < env->subprog_cnt)
8047 verbose(env, "+");
8048 }
8049 verbose(env, "\n");
8050 }
8051 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
8052 "total_states %d peak_states %d mark_read %d\n",
8053 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
8054 env->max_states_per_insn, env->total_states,
8055 env->peak_states, env->longest_mark_read_walk);
7823} 8056}
7824 8057
7825int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, 8058int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7826 union bpf_attr __user *uattr) 8059 union bpf_attr __user *uattr)
7827{ 8060{
8061 u64 start_time = ktime_get_ns();
7828 struct bpf_verifier_env *env; 8062 struct bpf_verifier_env *env;
7829 struct bpf_verifier_log *log; 8063 struct bpf_verifier_log *log;
7830 int i, len, ret = -EINVAL; 8064 int i, len, ret = -EINVAL;
@@ -7866,8 +8100,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7866 8100
7867 ret = -EINVAL; 8101 ret = -EINVAL;
7868 /* log attributes have to be sane */ 8102 /* log attributes have to be sane */
7869 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || 8103 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
7870 !log->level || !log->ubuf) 8104 !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
7871 goto err_unlock; 8105 goto err_unlock;
7872 } 8106 }
7873 8107
@@ -7890,7 +8124,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7890 goto skip_full_check; 8124 goto skip_full_check;
7891 } 8125 }
7892 8126
7893 env->explored_states = kcalloc(env->prog->len, 8127 env->explored_states = kvcalloc(env->prog->len,
7894 sizeof(struct bpf_verifier_state_list *), 8128 sizeof(struct bpf_verifier_state_list *),
7895 GFP_USER); 8129 GFP_USER);
7896 ret = -ENOMEM; 8130 ret = -ENOMEM;
@@ -7948,6 +8182,9 @@ skip_full_check:
7948 if (ret == 0) 8182 if (ret == 0)
7949 ret = fixup_call_args(env); 8183 ret = fixup_call_args(env);
7950 8184
8185 env->verification_time = ktime_get_ns() - start_time;
8186 print_verification_stats(env);
8187
7951 if (log->level && bpf_verifier_log_full(log)) 8188 if (log->level && bpf_verifier_log_full(log))
7952 ret = -ENOSPC; 8189 ret = -ENOSPC;
7953 if (log->level && !log->ubuf) { 8190 if (log->level && !log->ubuf) {