diff options
Diffstat (limited to 'kernel/bpf')
| -rw-r--r-- | kernel/bpf/btf.c | 17 | ||||
| -rw-r--r-- | kernel/bpf/cgroup.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/lpm_trie.c | 1 | ||||
| -rw-r--r-- | kernel/bpf/map_in_map.c | 17 | ||||
| -rw-r--r-- | kernel/bpf/percpu_freelist.c | 41 | ||||
| -rw-r--r-- | kernel/bpf/percpu_freelist.h | 4 | ||||
| -rw-r--r-- | kernel/bpf/stackmap.c | 20 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 12 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 72 |
10 files changed, 143 insertions, 48 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 715f9fcf4712..c57bd10340ed 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c | |||
| @@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) | |||
| 467 | return kind_ops[BTF_INFO_KIND(t->info)]; | 467 | return kind_ops[BTF_INFO_KIND(t->info)]; |
| 468 | } | 468 | } |
| 469 | 469 | ||
| 470 | bool btf_name_offset_valid(const struct btf *btf, u32 offset) | 470 | static bool btf_name_offset_valid(const struct btf *btf, u32 offset) |
| 471 | { | 471 | { |
| 472 | return BTF_STR_OFFSET_VALID(offset) && | 472 | return BTF_STR_OFFSET_VALID(offset) && |
| 473 | offset < btf->hdr.str_len; | 473 | offset < btf->hdr.str_len; |
| @@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset, | |||
| 1219 | u8 nr_copy_bits; | 1219 | u8 nr_copy_bits; |
| 1220 | u64 print_num; | 1220 | u64 print_num; |
| 1221 | 1221 | ||
| 1222 | data += BITS_ROUNDDOWN_BYTES(bits_offset); | ||
| 1223 | bits_offset = BITS_PER_BYTE_MASKED(bits_offset); | ||
| 1224 | nr_copy_bits = nr_bits + bits_offset; | 1222 | nr_copy_bits = nr_bits + bits_offset; |
| 1225 | nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); | 1223 | nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); |
| 1226 | 1224 | ||
| @@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf, | |||
| 1255 | * BTF_INT_OFFSET() cannot exceed 64 bits. | 1253 | * BTF_INT_OFFSET() cannot exceed 64 bits. |
| 1256 | */ | 1254 | */ |
| 1257 | total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); | 1255 | total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); |
| 1258 | btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m); | 1256 | data += BITS_ROUNDDOWN_BYTES(total_bits_offset); |
| 1257 | bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); | ||
| 1258 | btf_bitfield_seq_show(data, bits_offset, nr_bits, m); | ||
| 1259 | } | 1259 | } |
| 1260 | 1260 | ||
| 1261 | static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, | 1261 | static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, |
| @@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, | |||
| 1459 | 1459 | ||
| 1460 | /* "typedef void new_void", "const void"...etc */ | 1460 | /* "typedef void new_void", "const void"...etc */ |
| 1461 | if (!btf_type_is_void(next_type) && | 1461 | if (!btf_type_is_void(next_type) && |
| 1462 | !btf_type_is_fwd(next_type)) { | 1462 | !btf_type_is_fwd(next_type) && |
| 1463 | !btf_type_is_func_proto(next_type)) { | ||
| 1463 | btf_verifier_log_type(env, v->t, "Invalid type_id"); | 1464 | btf_verifier_log_type(env, v->t, "Invalid type_id"); |
| 1464 | return -EINVAL; | 1465 | return -EINVAL; |
| 1465 | } | 1466 | } |
| @@ -2001,12 +2002,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, | |||
| 2001 | 2002 | ||
| 2002 | member_offset = btf_member_bit_offset(t, member); | 2003 | member_offset = btf_member_bit_offset(t, member); |
| 2003 | bitfield_size = btf_member_bitfield_size(t, member); | 2004 | bitfield_size = btf_member_bitfield_size(t, member); |
| 2005 | bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); | ||
| 2006 | bits8_offset = BITS_PER_BYTE_MASKED(member_offset); | ||
| 2004 | if (bitfield_size) { | 2007 | if (bitfield_size) { |
| 2005 | btf_bitfield_seq_show(data, member_offset, | 2008 | btf_bitfield_seq_show(data + bytes_offset, bits8_offset, |
| 2006 | bitfield_size, m); | 2009 | bitfield_size, m); |
| 2007 | } else { | 2010 | } else { |
| 2008 | bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); | ||
| 2009 | bits8_offset = BITS_PER_BYTE_MASKED(member_offset); | ||
| 2010 | ops = btf_type_ops(member_type); | 2011 | ops = btf_type_ops(member_type); |
| 2011 | ops->seq_show(btf, member_type, member->type, | 2012 | ops->seq_show(btf, member_type, member->type, |
| 2012 | data + bytes_offset, bits8_offset, m); | 2013 | data + bytes_offset, bits8_offset, m); |
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9425c2fb872f..d17d05570a3f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c | |||
| @@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, | |||
| 572 | bpf_compute_and_save_data_end(skb, &saved_data_end); | 572 | bpf_compute_and_save_data_end(skb, &saved_data_end); |
| 573 | 573 | ||
| 574 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, | 574 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, |
| 575 | bpf_prog_run_save_cb); | 575 | __bpf_prog_run_save_cb); |
| 576 | bpf_restore_data_end(skb, saved_data_end); | 576 | bpf_restore_data_end(skb, saved_data_end); |
| 577 | __skb_pull(skb, offset); | 577 | __skb_pull(skb, offset); |
| 578 | skb->sk = save_sk; | 578 | skb->sk = save_sk; |
| @@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 718 | case BPF_FUNC_trace_printk: | 718 | case BPF_FUNC_trace_printk: |
| 719 | if (capable(CAP_SYS_ADMIN)) | 719 | if (capable(CAP_SYS_ADMIN)) |
| 720 | return bpf_get_trace_printk_proto(); | 720 | return bpf_get_trace_printk_proto(); |
| 721 | /* fall through */ | ||
| 721 | default: | 722 | default: |
| 722 | return NULL; | 723 | return NULL; |
| 723 | } | 724 | } |
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..f9274114c88d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
| @@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | |||
| 686 | } | 686 | } |
| 687 | 687 | ||
| 688 | if (htab_is_prealloc(htab)) { | 688 | if (htab_is_prealloc(htab)) { |
| 689 | pcpu_freelist_push(&htab->freelist, &l->fnode); | 689 | __pcpu_freelist_push(&htab->freelist, &l->fnode); |
| 690 | } else { | 690 | } else { |
| 691 | atomic_dec(&htab->count); | 691 | atomic_dec(&htab->count); |
| 692 | l->htab = htab; | 692 | l->htab = htab; |
| @@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, | |||
| 748 | } else { | 748 | } else { |
| 749 | struct pcpu_freelist_node *l; | 749 | struct pcpu_freelist_node *l; |
| 750 | 750 | ||
| 751 | l = pcpu_freelist_pop(&htab->freelist); | 751 | l = __pcpu_freelist_pop(&htab->freelist); |
| 752 | if (!l) | 752 | if (!l) |
| 753 | return ERR_PTR(-E2BIG); | 753 | return ERR_PTR(-E2BIG); |
| 754 | l_new = container_of(l, struct htab_elem, fnode); | 754 | l_new = container_of(l, struct htab_elem, fnode); |
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index abf1002080df..93a5cbbde421 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c | |||
| @@ -471,6 +471,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) | |||
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | if (!node || node->prefixlen != key->prefixlen || | 473 | if (!node || node->prefixlen != key->prefixlen || |
| 474 | node->prefixlen != matchlen || | ||
| 474 | (node->flags & LPM_TREE_NODE_FLAG_IM)) { | 475 | (node->flags & LPM_TREE_NODE_FLAG_IM)) { |
| 475 | ret = -ENOENT; | 476 | ret = -ENOENT; |
| 476 | goto out; | 477 | goto out; |
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 99d243e1ad6e..52378d3e34b3 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | 12 | struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) |
| 13 | { | 13 | { |
| 14 | struct bpf_map *inner_map, *inner_map_meta; | 14 | struct bpf_map *inner_map, *inner_map_meta; |
| 15 | u32 inner_map_meta_size; | ||
| 15 | struct fd f; | 16 | struct fd f; |
| 16 | 17 | ||
| 17 | f = fdget(inner_map_ufd); | 18 | f = fdget(inner_map_ufd); |
| @@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | |||
| 36 | return ERR_PTR(-EINVAL); | 37 | return ERR_PTR(-EINVAL); |
| 37 | } | 38 | } |
| 38 | 39 | ||
| 39 | inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); | 40 | inner_map_meta_size = sizeof(*inner_map_meta); |
| 41 | /* In some cases verifier needs to access beyond just base map. */ | ||
| 42 | if (inner_map->ops == &array_map_ops) | ||
| 43 | inner_map_meta_size = sizeof(struct bpf_array); | ||
| 44 | |||
| 45 | inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); | ||
| 40 | if (!inner_map_meta) { | 46 | if (!inner_map_meta) { |
| 41 | fdput(f); | 47 | fdput(f); |
| 42 | return ERR_PTR(-ENOMEM); | 48 | return ERR_PTR(-ENOMEM); |
| @@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | |||
| 46 | inner_map_meta->key_size = inner_map->key_size; | 52 | inner_map_meta->key_size = inner_map->key_size; |
| 47 | inner_map_meta->value_size = inner_map->value_size; | 53 | inner_map_meta->value_size = inner_map->value_size; |
| 48 | inner_map_meta->map_flags = inner_map->map_flags; | 54 | inner_map_meta->map_flags = inner_map->map_flags; |
| 49 | inner_map_meta->ops = inner_map->ops; | ||
| 50 | inner_map_meta->max_entries = inner_map->max_entries; | 55 | inner_map_meta->max_entries = inner_map->max_entries; |
| 51 | 56 | ||
| 57 | /* Misc members not needed in bpf_map_meta_equal() check. */ | ||
| 58 | inner_map_meta->ops = inner_map->ops; | ||
| 59 | if (inner_map->ops == &array_map_ops) { | ||
| 60 | inner_map_meta->unpriv_array = inner_map->unpriv_array; | ||
| 61 | container_of(inner_map_meta, struct bpf_array, map)->index_mask = | ||
| 62 | container_of(inner_map, struct bpf_array, map)->index_mask; | ||
| 63 | } | ||
| 64 | |||
| 52 | fdput(f); | 65 | fdput(f); |
| 53 | return inner_map_meta; | 66 | return inner_map_meta; |
| 54 | } | 67 | } |
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c | |||
| @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) | |||
| 28 | free_percpu(s->freelist); | 28 | free_percpu(s->freelist); |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, | 31 | static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, |
| 32 | struct pcpu_freelist_node *node) | 32 | struct pcpu_freelist_node *node) |
| 33 | { | 33 | { |
| 34 | raw_spin_lock(&head->lock); | 34 | raw_spin_lock(&head->lock); |
| 35 | node->next = head->first; | 35 | node->next = head->first; |
| @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, | |||
| 37 | raw_spin_unlock(&head->lock); | 37 | raw_spin_unlock(&head->lock); |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | void pcpu_freelist_push(struct pcpu_freelist *s, | 40 | void __pcpu_freelist_push(struct pcpu_freelist *s, |
| 41 | struct pcpu_freelist_node *node) | 41 | struct pcpu_freelist_node *node) |
| 42 | { | 42 | { |
| 43 | struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); | 43 | struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); |
| 44 | 44 | ||
| 45 | __pcpu_freelist_push(head, node); | 45 | ___pcpu_freelist_push(head, node); |
| 46 | } | ||
| 47 | |||
| 48 | void pcpu_freelist_push(struct pcpu_freelist *s, | ||
| 49 | struct pcpu_freelist_node *node) | ||
| 50 | { | ||
| 51 | unsigned long flags; | ||
| 52 | |||
| 53 | local_irq_save(flags); | ||
| 54 | __pcpu_freelist_push(s, node); | ||
| 55 | local_irq_restore(flags); | ||
| 46 | } | 56 | } |
| 47 | 57 | ||
| 48 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, | 58 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, |
| @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, | |||
| 63 | for_each_possible_cpu(cpu) { | 73 | for_each_possible_cpu(cpu) { |
| 64 | again: | 74 | again: |
| 65 | head = per_cpu_ptr(s->freelist, cpu); | 75 | head = per_cpu_ptr(s->freelist, cpu); |
| 66 | __pcpu_freelist_push(head, buf); | 76 | ___pcpu_freelist_push(head, buf); |
| 67 | i++; | 77 | i++; |
| 68 | buf += elem_size; | 78 | buf += elem_size; |
| 69 | if (i == nr_elems) | 79 | if (i == nr_elems) |
| @@ -74,14 +84,12 @@ again: | |||
| 74 | local_irq_restore(flags); | 84 | local_irq_restore(flags); |
| 75 | } | 85 | } |
| 76 | 86 | ||
| 77 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) | 87 | struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) |
| 78 | { | 88 | { |
| 79 | struct pcpu_freelist_head *head; | 89 | struct pcpu_freelist_head *head; |
| 80 | struct pcpu_freelist_node *node; | 90 | struct pcpu_freelist_node *node; |
| 81 | unsigned long flags; | ||
| 82 | int orig_cpu, cpu; | 91 | int orig_cpu, cpu; |
| 83 | 92 | ||
| 84 | local_irq_save(flags); | ||
| 85 | orig_cpu = cpu = raw_smp_processor_id(); | 93 | orig_cpu = cpu = raw_smp_processor_id(); |
| 86 | while (1) { | 94 | while (1) { |
| 87 | head = per_cpu_ptr(s->freelist, cpu); | 95 | head = per_cpu_ptr(s->freelist, cpu); |
| @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) | |||
| 89 | node = head->first; | 97 | node = head->first; |
| 90 | if (node) { | 98 | if (node) { |
| 91 | head->first = node->next; | 99 | head->first = node->next; |
| 92 | raw_spin_unlock_irqrestore(&head->lock, flags); | 100 | raw_spin_unlock(&head->lock); |
| 93 | return node; | 101 | return node; |
| 94 | } | 102 | } |
| 95 | raw_spin_unlock(&head->lock); | 103 | raw_spin_unlock(&head->lock); |
| 96 | cpu = cpumask_next(cpu, cpu_possible_mask); | 104 | cpu = cpumask_next(cpu, cpu_possible_mask); |
| 97 | if (cpu >= nr_cpu_ids) | 105 | if (cpu >= nr_cpu_ids) |
| 98 | cpu = 0; | 106 | cpu = 0; |
| 99 | if (cpu == orig_cpu) { | 107 | if (cpu == orig_cpu) |
| 100 | local_irq_restore(flags); | ||
| 101 | return NULL; | 108 | return NULL; |
| 102 | } | ||
| 103 | } | 109 | } |
| 104 | } | 110 | } |
| 111 | |||
| 112 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) | ||
| 113 | { | ||
| 114 | struct pcpu_freelist_node *ret; | ||
| 115 | unsigned long flags; | ||
| 116 | |||
| 117 | local_irq_save(flags); | ||
| 118 | ret = __pcpu_freelist_pop(s); | ||
| 119 | local_irq_restore(flags); | ||
| 120 | return ret; | ||
| 121 | } | ||
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h | |||
| @@ -22,8 +22,12 @@ struct pcpu_freelist_node { | |||
| 22 | struct pcpu_freelist_node *next; | 22 | struct pcpu_freelist_node *next; |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | /* pcpu_freelist_* do spin_lock_irqsave. */ | ||
| 25 | void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); | 26 | void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); |
| 26 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); | 27 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); |
| 28 | /* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ | ||
| 29 | void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); | ||
| 30 | struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); | ||
| 27 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, | 31 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, |
| 28 | u32 nr_elems); | 32 | u32 nr_elems); |
| 29 | int pcpu_freelist_init(struct pcpu_freelist *); | 33 | int pcpu_freelist_init(struct pcpu_freelist *); |
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 90daf285de03..950ab2f28922 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
| @@ -44,7 +44,7 @@ static void do_up_read(struct irq_work *entry) | |||
| 44 | struct stack_map_irq_work *work; | 44 | struct stack_map_irq_work *work; |
| 45 | 45 | ||
| 46 | work = container_of(entry, struct stack_map_irq_work, irq_work); | 46 | work = container_of(entry, struct stack_map_irq_work, irq_work); |
| 47 | up_read(work->sem); | 47 | up_read_non_owner(work->sem); |
| 48 | work->sem = NULL; | 48 | work->sem = NULL; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| @@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr, | |||
| 180 | 180 | ||
| 181 | if (nhdr->n_type == BPF_BUILD_ID && | 181 | if (nhdr->n_type == BPF_BUILD_ID && |
| 182 | nhdr->n_namesz == sizeof("GNU") && | 182 | nhdr->n_namesz == sizeof("GNU") && |
| 183 | nhdr->n_descsz == BPF_BUILD_ID_SIZE) { | 183 | nhdr->n_descsz > 0 && |
| 184 | nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { | ||
| 184 | memcpy(build_id, | 185 | memcpy(build_id, |
| 185 | note_start + note_offs + | 186 | note_start + note_offs + |
| 186 | ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), | 187 | ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), |
| 187 | BPF_BUILD_ID_SIZE); | 188 | nhdr->n_descsz); |
| 189 | memset(build_id + nhdr->n_descsz, 0, | ||
| 190 | BPF_BUILD_ID_SIZE - nhdr->n_descsz); | ||
| 188 | return 0; | 191 | return 0; |
| 189 | } | 192 | } |
| 190 | new_offs = note_offs + sizeof(Elf32_Nhdr) + | 193 | new_offs = note_offs + sizeof(Elf32_Nhdr) + |
| @@ -260,7 +263,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, | |||
| 260 | return -EFAULT; /* page not mapped */ | 263 | return -EFAULT; /* page not mapped */ |
| 261 | 264 | ||
| 262 | ret = -EINVAL; | 265 | ret = -EINVAL; |
| 263 | page_addr = page_address(page); | 266 | page_addr = kmap_atomic(page); |
| 264 | ehdr = (Elf32_Ehdr *)page_addr; | 267 | ehdr = (Elf32_Ehdr *)page_addr; |
| 265 | 268 | ||
| 266 | /* compare magic x7f "ELF" */ | 269 | /* compare magic x7f "ELF" */ |
| @@ -276,6 +279,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, | |||
| 276 | else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) | 279 | else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) |
| 277 | ret = stack_map_get_build_id_64(page_addr, build_id); | 280 | ret = stack_map_get_build_id_64(page_addr, build_id); |
| 278 | out: | 281 | out: |
| 282 | kunmap_atomic(page_addr); | ||
| 279 | put_page(page); | 283 | put_page(page); |
| 280 | return ret; | 284 | return ret; |
| 281 | } | 285 | } |
| @@ -310,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
| 310 | for (i = 0; i < trace_nr; i++) { | 314 | for (i = 0; i < trace_nr; i++) { |
| 311 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; | 315 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
| 312 | id_offs[i].ip = ips[i]; | 316 | id_offs[i].ip = ips[i]; |
| 317 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); | ||
| 313 | } | 318 | } |
| 314 | return; | 319 | return; |
| 315 | } | 320 | } |
| @@ -320,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
| 320 | /* per entry fall back to ips */ | 325 | /* per entry fall back to ips */ |
| 321 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; | 326 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
| 322 | id_offs[i].ip = ips[i]; | 327 | id_offs[i].ip = ips[i]; |
| 328 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); | ||
| 323 | continue; | 329 | continue; |
| 324 | } | 330 | } |
| 325 | id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] | 331 | id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] |
| @@ -332,6 +338,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
| 332 | } else { | 338 | } else { |
| 333 | work->sem = ¤t->mm->mmap_sem; | 339 | work->sem = ¤t->mm->mmap_sem; |
| 334 | irq_work_queue(&work->irq_work); | 340 | irq_work_queue(&work->irq_work); |
| 341 | /* | ||
| 342 | * The irq_work will release the mmap_sem with | ||
| 343 | * up_read_non_owner(). The rwsem_release() is called | ||
| 344 | * here to release the lock from lockdep's perspective. | ||
| 345 | */ | ||
| 346 | rwsem_release(¤t->mm->mmap_sem.dep_map, 1, _RET_IP_); | ||
| 335 | } | 347 | } |
| 336 | } | 348 | } |
| 337 | 349 | ||
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..8577bb7f8be6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
| @@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
| 713 | 713 | ||
| 714 | if (bpf_map_is_dev_bound(map)) { | 714 | if (bpf_map_is_dev_bound(map)) { |
| 715 | err = bpf_map_offload_lookup_elem(map, key, value); | 715 | err = bpf_map_offload_lookup_elem(map, key, value); |
| 716 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || | 716 | goto done; |
| 717 | map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { | 717 | } |
| 718 | |||
| 719 | preempt_disable(); | ||
| 720 | this_cpu_inc(bpf_prog_active); | ||
| 721 | if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || | ||
| 722 | map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { | ||
| 718 | err = bpf_percpu_hash_copy(map, key, value); | 723 | err = bpf_percpu_hash_copy(map, key, value); |
| 719 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { | 724 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
| 720 | err = bpf_percpu_array_copy(map, key, value); | 725 | err = bpf_percpu_array_copy(map, key, value); |
| @@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
| 744 | } | 749 | } |
| 745 | rcu_read_unlock(); | 750 | rcu_read_unlock(); |
| 746 | } | 751 | } |
| 752 | this_cpu_dec(bpf_prog_active); | ||
| 753 | preempt_enable(); | ||
| 747 | 754 | ||
| 755 | done: | ||
| 748 | if (err) | 756 | if (err) |
| 749 | goto free_value; | 757 | goto free_value; |
| 750 | 758 | ||
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f6bc62a9ee8e..8f295b790297 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -1617,12 +1617,13 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off, | |||
| 1617 | return 0; | 1617 | return 0; |
| 1618 | } | 1618 | } |
| 1619 | 1619 | ||
| 1620 | static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, | 1620 | static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, |
| 1621 | int size, enum bpf_access_type t) | 1621 | u32 regno, int off, int size, |
| 1622 | enum bpf_access_type t) | ||
| 1622 | { | 1623 | { |
| 1623 | struct bpf_reg_state *regs = cur_regs(env); | 1624 | struct bpf_reg_state *regs = cur_regs(env); |
| 1624 | struct bpf_reg_state *reg = ®s[regno]; | 1625 | struct bpf_reg_state *reg = ®s[regno]; |
| 1625 | struct bpf_insn_access_aux info; | 1626 | struct bpf_insn_access_aux info = {}; |
| 1626 | 1627 | ||
| 1627 | if (reg->smin_value < 0) { | 1628 | if (reg->smin_value < 0) { |
| 1628 | verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", | 1629 | verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", |
| @@ -1636,6 +1637,8 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, | |||
| 1636 | return -EACCES; | 1637 | return -EACCES; |
| 1637 | } | 1638 | } |
| 1638 | 1639 | ||
| 1640 | env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; | ||
| 1641 | |||
| 1639 | return 0; | 1642 | return 0; |
| 1640 | } | 1643 | } |
| 1641 | 1644 | ||
| @@ -2032,7 +2035,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
| 2032 | verbose(env, "cannot write into socket\n"); | 2035 | verbose(env, "cannot write into socket\n"); |
| 2033 | return -EACCES; | 2036 | return -EACCES; |
| 2034 | } | 2037 | } |
| 2035 | err = check_sock_access(env, regno, off, size, t); | 2038 | err = check_sock_access(env, insn_idx, regno, off, size, t); |
| 2036 | if (!err && value_regno >= 0) | 2039 | if (!err && value_regno >= 0) |
| 2037 | mark_reg_unknown(env, regs, value_regno); | 2040 | mark_reg_unknown(env, regs, value_regno); |
| 2038 | } else { | 2041 | } else { |
| @@ -3103,6 +3106,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, | |||
| 3103 | } | 3106 | } |
| 3104 | } | 3107 | } |
| 3105 | 3108 | ||
| 3109 | static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, | ||
| 3110 | const struct bpf_insn *insn) | ||
| 3111 | { | ||
| 3112 | return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; | ||
| 3113 | } | ||
| 3114 | |||
| 3115 | static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, | ||
| 3116 | u32 alu_state, u32 alu_limit) | ||
| 3117 | { | ||
| 3118 | /* If we arrived here from different branches with different | ||
| 3119 | * state or limits to sanitize, then this won't work. | ||
| 3120 | */ | ||
| 3121 | if (aux->alu_state && | ||
| 3122 | (aux->alu_state != alu_state || | ||
| 3123 | aux->alu_limit != alu_limit)) | ||
| 3124 | return -EACCES; | ||
| 3125 | |||
| 3126 | /* Corresponding fixup done in fixup_bpf_calls(). */ | ||
| 3127 | aux->alu_state = alu_state; | ||
| 3128 | aux->alu_limit = alu_limit; | ||
| 3129 | return 0; | ||
| 3130 | } | ||
| 3131 | |||
| 3132 | static int sanitize_val_alu(struct bpf_verifier_env *env, | ||
| 3133 | struct bpf_insn *insn) | ||
| 3134 | { | ||
| 3135 | struct bpf_insn_aux_data *aux = cur_aux(env); | ||
| 3136 | |||
| 3137 | if (can_skip_alu_sanitation(env, insn)) | ||
| 3138 | return 0; | ||
| 3139 | |||
| 3140 | return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); | ||
| 3141 | } | ||
| 3142 | |||
| 3106 | static int sanitize_ptr_alu(struct bpf_verifier_env *env, | 3143 | static int sanitize_ptr_alu(struct bpf_verifier_env *env, |
| 3107 | struct bpf_insn *insn, | 3144 | struct bpf_insn *insn, |
| 3108 | const struct bpf_reg_state *ptr_reg, | 3145 | const struct bpf_reg_state *ptr_reg, |
| @@ -3117,7 +3154,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, | |||
| 3117 | struct bpf_reg_state tmp; | 3154 | struct bpf_reg_state tmp; |
| 3118 | bool ret; | 3155 | bool ret; |
| 3119 | 3156 | ||
| 3120 | if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) | 3157 | if (can_skip_alu_sanitation(env, insn)) |
| 3121 | return 0; | 3158 | return 0; |
| 3122 | 3159 | ||
| 3123 | /* We already marked aux for masking from non-speculative | 3160 | /* We already marked aux for masking from non-speculative |
| @@ -3133,19 +3170,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, | |||
| 3133 | 3170 | ||
| 3134 | if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) | 3171 | if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) |
| 3135 | return 0; | 3172 | return 0; |
| 3136 | 3173 | if (update_alu_sanitation_state(aux, alu_state, alu_limit)) | |
| 3137 | /* If we arrived here from different branches with different | ||
| 3138 | * limits to sanitize, then this won't work. | ||
| 3139 | */ | ||
| 3140 | if (aux->alu_state && | ||
| 3141 | (aux->alu_state != alu_state || | ||
| 3142 | aux->alu_limit != alu_limit)) | ||
| 3143 | return -EACCES; | 3174 | return -EACCES; |
| 3144 | |||
| 3145 | /* Corresponding fixup done in fixup_bpf_calls(). */ | ||
| 3146 | aux->alu_state = alu_state; | ||
| 3147 | aux->alu_limit = alu_limit; | ||
| 3148 | |||
| 3149 | do_sim: | 3175 | do_sim: |
| 3150 | /* Simulate and find potential out-of-bounds access under | 3176 | /* Simulate and find potential out-of-bounds access under |
| 3151 | * speculative execution from truncation as a result of | 3177 | * speculative execution from truncation as a result of |
| @@ -3418,6 +3444,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 3418 | s64 smin_val, smax_val; | 3444 | s64 smin_val, smax_val; |
| 3419 | u64 umin_val, umax_val; | 3445 | u64 umin_val, umax_val; |
| 3420 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; | 3446 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; |
| 3447 | u32 dst = insn->dst_reg; | ||
| 3448 | int ret; | ||
| 3421 | 3449 | ||
| 3422 | if (insn_bitness == 32) { | 3450 | if (insn_bitness == 32) { |
| 3423 | /* Relevant for 32-bit RSH: Information can propagate towards | 3451 | /* Relevant for 32-bit RSH: Information can propagate towards |
| @@ -3452,6 +3480,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 3452 | 3480 | ||
| 3453 | switch (opcode) { | 3481 | switch (opcode) { |
| 3454 | case BPF_ADD: | 3482 | case BPF_ADD: |
| 3483 | ret = sanitize_val_alu(env, insn); | ||
| 3484 | if (ret < 0) { | ||
| 3485 | verbose(env, "R%d tried to add from different pointers or scalars\n", dst); | ||
| 3486 | return ret; | ||
| 3487 | } | ||
| 3455 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || | 3488 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || |
| 3456 | signed_add_overflows(dst_reg->smax_value, smax_val)) { | 3489 | signed_add_overflows(dst_reg->smax_value, smax_val)) { |
| 3457 | dst_reg->smin_value = S64_MIN; | 3490 | dst_reg->smin_value = S64_MIN; |
| @@ -3471,6 +3504,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 3471 | dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); | 3504 | dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); |
| 3472 | break; | 3505 | break; |
| 3473 | case BPF_SUB: | 3506 | case BPF_SUB: |
| 3507 | ret = sanitize_val_alu(env, insn); | ||
| 3508 | if (ret < 0) { | ||
| 3509 | verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); | ||
| 3510 | return ret; | ||
| 3511 | } | ||
| 3474 | if (signed_sub_overflows(dst_reg->smin_value, smax_val) || | 3512 | if (signed_sub_overflows(dst_reg->smin_value, smax_val) || |
| 3475 | signed_sub_overflows(dst_reg->smax_value, smin_val)) { | 3513 | signed_sub_overflows(dst_reg->smax_value, smin_val)) { |
| 3476 | /* Overflow possible, we know nothing */ | 3514 | /* Overflow possible, we know nothing */ |
