aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/btf.c17
-rw-r--r--kernel/bpf/cgroup.c3
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/lpm_trie.c1
-rw-r--r--kernel/bpf/map_in_map.c17
-rw-r--r--kernel/bpf/percpu_freelist.c41
-rw-r--r--kernel/bpf/percpu_freelist.h4
-rw-r--r--kernel/bpf/stackmap.c20
-rw-r--r--kernel/bpf/syscall.c12
-rw-r--r--kernel/bpf/verifier.c72
10 files changed, 143 insertions, 48 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 715f9fcf4712..c57bd10340ed 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
467 return kind_ops[BTF_INFO_KIND(t->info)]; 467 return kind_ops[BTF_INFO_KIND(t->info)];
468} 468}
469 469
470bool btf_name_offset_valid(const struct btf *btf, u32 offset) 470static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
471{ 471{
472 return BTF_STR_OFFSET_VALID(offset) && 472 return BTF_STR_OFFSET_VALID(offset) &&
473 offset < btf->hdr.str_len; 473 offset < btf->hdr.str_len;
@@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset,
1219 u8 nr_copy_bits; 1219 u8 nr_copy_bits;
1220 u64 print_num; 1220 u64 print_num;
1221 1221
1222 data += BITS_ROUNDDOWN_BYTES(bits_offset);
1223 bits_offset = BITS_PER_BYTE_MASKED(bits_offset);
1224 nr_copy_bits = nr_bits + bits_offset; 1222 nr_copy_bits = nr_bits + bits_offset;
1225 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); 1223 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
1226 1224
@@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf,
1255 * BTF_INT_OFFSET() cannot exceed 64 bits. 1253 * BTF_INT_OFFSET() cannot exceed 64 bits.
1256 */ 1254 */
1257 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); 1255 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
1258 btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m); 1256 data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
1257 bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
1258 btf_bitfield_seq_show(data, bits_offset, nr_bits, m);
1259} 1259}
1260 1260
1261static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, 1261static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
@@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
1459 1459
1460 /* "typedef void new_void", "const void"...etc */ 1460 /* "typedef void new_void", "const void"...etc */
1461 if (!btf_type_is_void(next_type) && 1461 if (!btf_type_is_void(next_type) &&
1462 !btf_type_is_fwd(next_type)) { 1462 !btf_type_is_fwd(next_type) &&
1463 !btf_type_is_func_proto(next_type)) {
1463 btf_verifier_log_type(env, v->t, "Invalid type_id"); 1464 btf_verifier_log_type(env, v->t, "Invalid type_id");
1464 return -EINVAL; 1465 return -EINVAL;
1465 } 1466 }
@@ -2001,12 +2002,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
2001 2002
2002 member_offset = btf_member_bit_offset(t, member); 2003 member_offset = btf_member_bit_offset(t, member);
2003 bitfield_size = btf_member_bitfield_size(t, member); 2004 bitfield_size = btf_member_bitfield_size(t, member);
2005 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
2006 bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
2004 if (bitfield_size) { 2007 if (bitfield_size) {
2005 btf_bitfield_seq_show(data, member_offset, 2008 btf_bitfield_seq_show(data + bytes_offset, bits8_offset,
2006 bitfield_size, m); 2009 bitfield_size, m);
2007 } else { 2010 } else {
2008 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
2009 bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
2010 ops = btf_type_ops(member_type); 2011 ops = btf_type_ops(member_type);
2011 ops->seq_show(btf, member_type, member->type, 2012 ops->seq_show(btf, member_type, member->type,
2012 data + bytes_offset, bits8_offset, m); 2013 data + bytes_offset, bits8_offset, m);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9425c2fb872f..d17d05570a3f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
572 bpf_compute_and_save_data_end(skb, &saved_data_end); 572 bpf_compute_and_save_data_end(skb, &saved_data_end);
573 573
574 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 574 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
575 bpf_prog_run_save_cb); 575 __bpf_prog_run_save_cb);
576 bpf_restore_data_end(skb, saved_data_end); 576 bpf_restore_data_end(skb, saved_data_end);
577 __skb_pull(skb, offset); 577 __skb_pull(skb, offset);
578 skb->sk = save_sk; 578 skb->sk = save_sk;
@@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
718 case BPF_FUNC_trace_printk: 718 case BPF_FUNC_trace_printk:
719 if (capable(CAP_SYS_ADMIN)) 719 if (capable(CAP_SYS_ADMIN))
720 return bpf_get_trace_printk_proto(); 720 return bpf_get_trace_printk_proto();
721 /* fall through */
721 default: 722 default:
722 return NULL; 723 return NULL;
723 } 724 }
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4b7c76765d9d..f9274114c88d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
686 } 686 }
687 687
688 if (htab_is_prealloc(htab)) { 688 if (htab_is_prealloc(htab)) {
689 pcpu_freelist_push(&htab->freelist, &l->fnode); 689 __pcpu_freelist_push(&htab->freelist, &l->fnode);
690 } else { 690 } else {
691 atomic_dec(&htab->count); 691 atomic_dec(&htab->count);
692 l->htab = htab; 692 l->htab = htab;
@@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
748 } else { 748 } else {
749 struct pcpu_freelist_node *l; 749 struct pcpu_freelist_node *l;
750 750
751 l = pcpu_freelist_pop(&htab->freelist); 751 l = __pcpu_freelist_pop(&htab->freelist);
752 if (!l) 752 if (!l)
753 return ERR_PTR(-E2BIG); 753 return ERR_PTR(-E2BIG);
754 l_new = container_of(l, struct htab_elem, fnode); 754 l_new = container_of(l, struct htab_elem, fnode);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index abf1002080df..93a5cbbde421 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -471,6 +471,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
471 } 471 }
472 472
473 if (!node || node->prefixlen != key->prefixlen || 473 if (!node || node->prefixlen != key->prefixlen ||
474 node->prefixlen != matchlen ||
474 (node->flags & LPM_TREE_NODE_FLAG_IM)) { 475 (node->flags & LPM_TREE_NODE_FLAG_IM)) {
475 ret = -ENOENT; 476 ret = -ENOENT;
476 goto out; 477 goto out;
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 99d243e1ad6e..52378d3e34b3 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -12,6 +12,7 @@
12struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) 12struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
13{ 13{
14 struct bpf_map *inner_map, *inner_map_meta; 14 struct bpf_map *inner_map, *inner_map_meta;
15 u32 inner_map_meta_size;
15 struct fd f; 16 struct fd f;
16 17
17 f = fdget(inner_map_ufd); 18 f = fdget(inner_map_ufd);
@@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
36 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
37 } 38 }
38 39
39 inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); 40 inner_map_meta_size = sizeof(*inner_map_meta);
41 /* In some cases verifier needs to access beyond just base map. */
42 if (inner_map->ops == &array_map_ops)
43 inner_map_meta_size = sizeof(struct bpf_array);
44
45 inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
40 if (!inner_map_meta) { 46 if (!inner_map_meta) {
41 fdput(f); 47 fdput(f);
42 return ERR_PTR(-ENOMEM); 48 return ERR_PTR(-ENOMEM);
@@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
46 inner_map_meta->key_size = inner_map->key_size; 52 inner_map_meta->key_size = inner_map->key_size;
47 inner_map_meta->value_size = inner_map->value_size; 53 inner_map_meta->value_size = inner_map->value_size;
48 inner_map_meta->map_flags = inner_map->map_flags; 54 inner_map_meta->map_flags = inner_map->map_flags;
49 inner_map_meta->ops = inner_map->ops;
50 inner_map_meta->max_entries = inner_map->max_entries; 55 inner_map_meta->max_entries = inner_map->max_entries;
51 56
57 /* Misc members not needed in bpf_map_meta_equal() check. */
58 inner_map_meta->ops = inner_map->ops;
59 if (inner_map->ops == &array_map_ops) {
60 inner_map_meta->unpriv_array = inner_map->unpriv_array;
61 container_of(inner_map_meta, struct bpf_array, map)->index_mask =
62 container_of(inner_map, struct bpf_array, map)->index_mask;
63 }
64
52 fdput(f); 65 fdput(f);
53 return inner_map_meta; 66 return inner_map_meta;
54} 67}
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 673fa6fe2d73..0c1b4ba9e90e 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s)
28 free_percpu(s->freelist); 28 free_percpu(s->freelist);
29} 29}
30 30
31static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, 31static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
32 struct pcpu_freelist_node *node) 32 struct pcpu_freelist_node *node)
33{ 33{
34 raw_spin_lock(&head->lock); 34 raw_spin_lock(&head->lock);
35 node->next = head->first; 35 node->next = head->first;
@@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
37 raw_spin_unlock(&head->lock); 37 raw_spin_unlock(&head->lock);
38} 38}
39 39
40void pcpu_freelist_push(struct pcpu_freelist *s, 40void __pcpu_freelist_push(struct pcpu_freelist *s,
41 struct pcpu_freelist_node *node) 41 struct pcpu_freelist_node *node)
42{ 42{
43 struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); 43 struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
44 44
45 __pcpu_freelist_push(head, node); 45 ___pcpu_freelist_push(head, node);
46}
47
48void pcpu_freelist_push(struct pcpu_freelist *s,
49 struct pcpu_freelist_node *node)
50{
51 unsigned long flags;
52
53 local_irq_save(flags);
54 __pcpu_freelist_push(s, node);
55 local_irq_restore(flags);
46} 56}
47 57
48void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, 58void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
@@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
63 for_each_possible_cpu(cpu) { 73 for_each_possible_cpu(cpu) {
64again: 74again:
65 head = per_cpu_ptr(s->freelist, cpu); 75 head = per_cpu_ptr(s->freelist, cpu);
66 __pcpu_freelist_push(head, buf); 76 ___pcpu_freelist_push(head, buf);
67 i++; 77 i++;
68 buf += elem_size; 78 buf += elem_size;
69 if (i == nr_elems) 79 if (i == nr_elems)
@@ -74,14 +84,12 @@ again:
74 local_irq_restore(flags); 84 local_irq_restore(flags);
75} 85}
76 86
77struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) 87struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
78{ 88{
79 struct pcpu_freelist_head *head; 89 struct pcpu_freelist_head *head;
80 struct pcpu_freelist_node *node; 90 struct pcpu_freelist_node *node;
81 unsigned long flags;
82 int orig_cpu, cpu; 91 int orig_cpu, cpu;
83 92
84 local_irq_save(flags);
85 orig_cpu = cpu = raw_smp_processor_id(); 93 orig_cpu = cpu = raw_smp_processor_id();
86 while (1) { 94 while (1) {
87 head = per_cpu_ptr(s->freelist, cpu); 95 head = per_cpu_ptr(s->freelist, cpu);
@@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
89 node = head->first; 97 node = head->first;
90 if (node) { 98 if (node) {
91 head->first = node->next; 99 head->first = node->next;
92 raw_spin_unlock_irqrestore(&head->lock, flags); 100 raw_spin_unlock(&head->lock);
93 return node; 101 return node;
94 } 102 }
95 raw_spin_unlock(&head->lock); 103 raw_spin_unlock(&head->lock);
96 cpu = cpumask_next(cpu, cpu_possible_mask); 104 cpu = cpumask_next(cpu, cpu_possible_mask);
97 if (cpu >= nr_cpu_ids) 105 if (cpu >= nr_cpu_ids)
98 cpu = 0; 106 cpu = 0;
99 if (cpu == orig_cpu) { 107 if (cpu == orig_cpu)
100 local_irq_restore(flags);
101 return NULL; 108 return NULL;
102 }
103 } 109 }
104} 110}
111
112struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
113{
114 struct pcpu_freelist_node *ret;
115 unsigned long flags;
116
117 local_irq_save(flags);
118 ret = __pcpu_freelist_pop(s);
119 local_irq_restore(flags);
120 return ret;
121}
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
index 3049aae8ea1e..c3960118e617 100644
--- a/kernel/bpf/percpu_freelist.h
+++ b/kernel/bpf/percpu_freelist.h
@@ -22,8 +22,12 @@ struct pcpu_freelist_node {
22 struct pcpu_freelist_node *next; 22 struct pcpu_freelist_node *next;
23}; 23};
24 24
25/* pcpu_freelist_* do spin_lock_irqsave. */
25void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); 26void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
26struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); 27struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
28/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */
29void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
30struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *);
27void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, 31void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
28 u32 nr_elems); 32 u32 nr_elems);
29int pcpu_freelist_init(struct pcpu_freelist *); 33int pcpu_freelist_init(struct pcpu_freelist *);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 90daf285de03..950ab2f28922 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -44,7 +44,7 @@ static void do_up_read(struct irq_work *entry)
44 struct stack_map_irq_work *work; 44 struct stack_map_irq_work *work;
45 45
46 work = container_of(entry, struct stack_map_irq_work, irq_work); 46 work = container_of(entry, struct stack_map_irq_work, irq_work);
47 up_read(work->sem); 47 up_read_non_owner(work->sem);
48 work->sem = NULL; 48 work->sem = NULL;
49} 49}
50 50
@@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr,
180 180
181 if (nhdr->n_type == BPF_BUILD_ID && 181 if (nhdr->n_type == BPF_BUILD_ID &&
182 nhdr->n_namesz == sizeof("GNU") && 182 nhdr->n_namesz == sizeof("GNU") &&
183 nhdr->n_descsz == BPF_BUILD_ID_SIZE) { 183 nhdr->n_descsz > 0 &&
184 nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
184 memcpy(build_id, 185 memcpy(build_id,
185 note_start + note_offs + 186 note_start + note_offs +
186 ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), 187 ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
187 BPF_BUILD_ID_SIZE); 188 nhdr->n_descsz);
189 memset(build_id + nhdr->n_descsz, 0,
190 BPF_BUILD_ID_SIZE - nhdr->n_descsz);
188 return 0; 191 return 0;
189 } 192 }
190 new_offs = note_offs + sizeof(Elf32_Nhdr) + 193 new_offs = note_offs + sizeof(Elf32_Nhdr) +
@@ -260,7 +263,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
260 return -EFAULT; /* page not mapped */ 263 return -EFAULT; /* page not mapped */
261 264
262 ret = -EINVAL; 265 ret = -EINVAL;
263 page_addr = page_address(page); 266 page_addr = kmap_atomic(page);
264 ehdr = (Elf32_Ehdr *)page_addr; 267 ehdr = (Elf32_Ehdr *)page_addr;
265 268
266 /* compare magic x7f "ELF" */ 269 /* compare magic x7f "ELF" */
@@ -276,6 +279,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
276 else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) 279 else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
277 ret = stack_map_get_build_id_64(page_addr, build_id); 280 ret = stack_map_get_build_id_64(page_addr, build_id);
278out: 281out:
282 kunmap_atomic(page_addr);
279 put_page(page); 283 put_page(page);
280 return ret; 284 return ret;
281} 285}
@@ -310,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
310 for (i = 0; i < trace_nr; i++) { 314 for (i = 0; i < trace_nr; i++) {
311 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 315 id_offs[i].status = BPF_STACK_BUILD_ID_IP;
312 id_offs[i].ip = ips[i]; 316 id_offs[i].ip = ips[i];
317 memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
313 } 318 }
314 return; 319 return;
315 } 320 }
@@ -320,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
320 /* per entry fall back to ips */ 325 /* per entry fall back to ips */
321 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 326 id_offs[i].status = BPF_STACK_BUILD_ID_IP;
322 id_offs[i].ip = ips[i]; 327 id_offs[i].ip = ips[i];
328 memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
323 continue; 329 continue;
324 } 330 }
325 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] 331 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
@@ -332,6 +338,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
332 } else { 338 } else {
333 work->sem = &current->mm->mmap_sem; 339 work->sem = &current->mm->mmap_sem;
334 irq_work_queue(&work->irq_work); 340 irq_work_queue(&work->irq_work);
341 /*
342 * The irq_work will release the mmap_sem with
343 * up_read_non_owner(). The rwsem_release() is called
344 * here to release the lock from lockdep's perspective.
345 */
346 rwsem_release(&current->mm->mmap_sem.dep_map, 1, _RET_IP_);
335 } 347 }
336} 348}
337 349
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..8577bb7f8be6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr)
713 713
714 if (bpf_map_is_dev_bound(map)) { 714 if (bpf_map_is_dev_bound(map)) {
715 err = bpf_map_offload_lookup_elem(map, key, value); 715 err = bpf_map_offload_lookup_elem(map, key, value);
716 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 716 goto done;
717 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 717 }
718
719 preempt_disable();
720 this_cpu_inc(bpf_prog_active);
721 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
722 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
718 err = bpf_percpu_hash_copy(map, key, value); 723 err = bpf_percpu_hash_copy(map, key, value);
719 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 724 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
720 err = bpf_percpu_array_copy(map, key, value); 725 err = bpf_percpu_array_copy(map, key, value);
@@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr)
744 } 749 }
745 rcu_read_unlock(); 750 rcu_read_unlock();
746 } 751 }
752 this_cpu_dec(bpf_prog_active);
753 preempt_enable();
747 754
755done:
748 if (err) 756 if (err)
749 goto free_value; 757 goto free_value;
750 758
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f6bc62a9ee8e..8f295b790297 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1617,12 +1617,13 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1617 return 0; 1617 return 0;
1618} 1618}
1619 1619
1620static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, 1620static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1621 int size, enum bpf_access_type t) 1621 u32 regno, int off, int size,
1622 enum bpf_access_type t)
1622{ 1623{
1623 struct bpf_reg_state *regs = cur_regs(env); 1624 struct bpf_reg_state *regs = cur_regs(env);
1624 struct bpf_reg_state *reg = &regs[regno]; 1625 struct bpf_reg_state *reg = &regs[regno];
1625 struct bpf_insn_access_aux info; 1626 struct bpf_insn_access_aux info = {};
1626 1627
1627 if (reg->smin_value < 0) { 1628 if (reg->smin_value < 0) {
1628 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 1629 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@@ -1636,6 +1637,8 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
1636 return -EACCES; 1637 return -EACCES;
1637 } 1638 }
1638 1639
1640 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1641
1639 return 0; 1642 return 0;
1640} 1643}
1641 1644
@@ -2032,7 +2035,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
2032 verbose(env, "cannot write into socket\n"); 2035 verbose(env, "cannot write into socket\n");
2033 return -EACCES; 2036 return -EACCES;
2034 } 2037 }
2035 err = check_sock_access(env, regno, off, size, t); 2038 err = check_sock_access(env, insn_idx, regno, off, size, t);
2036 if (!err && value_regno >= 0) 2039 if (!err && value_regno >= 0)
2037 mark_reg_unknown(env, regs, value_regno); 2040 mark_reg_unknown(env, regs, value_regno);
2038 } else { 2041 } else {
@@ -3103,6 +3106,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3103 } 3106 }
3104} 3107}
3105 3108
3109static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
3110 const struct bpf_insn *insn)
3111{
3112 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
3113}
3114
3115static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
3116 u32 alu_state, u32 alu_limit)
3117{
3118 /* If we arrived here from different branches with different
3119 * state or limits to sanitize, then this won't work.
3120 */
3121 if (aux->alu_state &&
3122 (aux->alu_state != alu_state ||
3123 aux->alu_limit != alu_limit))
3124 return -EACCES;
3125
3126 /* Corresponding fixup done in fixup_bpf_calls(). */
3127 aux->alu_state = alu_state;
3128 aux->alu_limit = alu_limit;
3129 return 0;
3130}
3131
3132static int sanitize_val_alu(struct bpf_verifier_env *env,
3133 struct bpf_insn *insn)
3134{
3135 struct bpf_insn_aux_data *aux = cur_aux(env);
3136
3137 if (can_skip_alu_sanitation(env, insn))
3138 return 0;
3139
3140 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
3141}
3142
3106static int sanitize_ptr_alu(struct bpf_verifier_env *env, 3143static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3107 struct bpf_insn *insn, 3144 struct bpf_insn *insn,
3108 const struct bpf_reg_state *ptr_reg, 3145 const struct bpf_reg_state *ptr_reg,
@@ -3117,7 +3154,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3117 struct bpf_reg_state tmp; 3154 struct bpf_reg_state tmp;
3118 bool ret; 3155 bool ret;
3119 3156
3120 if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) 3157 if (can_skip_alu_sanitation(env, insn))
3121 return 0; 3158 return 0;
3122 3159
3123 /* We already marked aux for masking from non-speculative 3160 /* We already marked aux for masking from non-speculative
@@ -3133,19 +3170,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3133 3170
3134 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) 3171 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
3135 return 0; 3172 return 0;
3136 3173 if (update_alu_sanitation_state(aux, alu_state, alu_limit))
3137 /* If we arrived here from different branches with different
3138 * limits to sanitize, then this won't work.
3139 */
3140 if (aux->alu_state &&
3141 (aux->alu_state != alu_state ||
3142 aux->alu_limit != alu_limit))
3143 return -EACCES; 3174 return -EACCES;
3144
3145 /* Corresponding fixup done in fixup_bpf_calls(). */
3146 aux->alu_state = alu_state;
3147 aux->alu_limit = alu_limit;
3148
3149do_sim: 3175do_sim:
3150 /* Simulate and find potential out-of-bounds access under 3176 /* Simulate and find potential out-of-bounds access under
3151 * speculative execution from truncation as a result of 3177 * speculative execution from truncation as a result of
@@ -3418,6 +3444,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3418 s64 smin_val, smax_val; 3444 s64 smin_val, smax_val;
3419 u64 umin_val, umax_val; 3445 u64 umin_val, umax_val;
3420 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 3446 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
3447 u32 dst = insn->dst_reg;
3448 int ret;
3421 3449
3422 if (insn_bitness == 32) { 3450 if (insn_bitness == 32) {
3423 /* Relevant for 32-bit RSH: Information can propagate towards 3451 /* Relevant for 32-bit RSH: Information can propagate towards
@@ -3452,6 +3480,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3452 3480
3453 switch (opcode) { 3481 switch (opcode) {
3454 case BPF_ADD: 3482 case BPF_ADD:
3483 ret = sanitize_val_alu(env, insn);
3484 if (ret < 0) {
3485 verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
3486 return ret;
3487 }
3455 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 3488 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3456 signed_add_overflows(dst_reg->smax_value, smax_val)) { 3489 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3457 dst_reg->smin_value = S64_MIN; 3490 dst_reg->smin_value = S64_MIN;
@@ -3471,6 +3504,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3471 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); 3504 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
3472 break; 3505 break;
3473 case BPF_SUB: 3506 case BPF_SUB:
3507 ret = sanitize_val_alu(env, insn);
3508 if (ret < 0) {
3509 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
3510 return ret;
3511 }
3474 if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 3512 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3475 signed_sub_overflows(dst_reg->smax_value, smin_val)) { 3513 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3476 /* Overflow possible, we know nothing */ 3514 /* Overflow possible, we know nothing */