aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-10-22 00:11:46 -0400
committerDavid S. Miller <davem@davemloft.net>2018-10-22 00:11:46 -0400
commita19c59cc10a5ebc6b5a542e56bfd9f427ce01d74 (patch)
treecd04c1af4e800eef175cbc51ffb6e78040d7ee27 /kernel/bpf
parent92303c86b7e9b7d3895ccafb441a0354143e2a18 (diff)
parentfe8ecccc10b3adc071de05ca7af728ca1a4ac9aa (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-10-21 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Implement two new kind of BPF maps, that is, queue and stack map along with new peek, push and pop operations, from Mauricio. 2) Add support for MSG_PEEK flag when redirecting into an ingress psock sk_msg queue, and add a new helper bpf_msg_push_data() for insert data into the message, from John. 3) Allow for BPF programs of type BPF_PROG_TYPE_CGROUP_SKB to use direct packet access for __skb_buff, from Song. 4) Use more lightweight barriers for walking perf ring buffer for libbpf and perf tool as well. Also, various fixes and improvements from verifier side, from Daniel. 5) Add per-symbol visibility for DSO in libbpf and hide by default global symbols such as netlink related functions, from Andrey. 6) Two improvements to nfp's BPF offload to check vNIC capabilities in case prog is shared with multiple vNICs and to protect against mis-initializing atomic counters, from Jakub. 7) Fix for bpftool to use 4 context mode for the nfp disassembler, also from Jakub. 8) Fix a return value comparison in test_libbpf.sh and add several bpftool improvements in bash completion, documentation of bpf fs restrictions and batch mode summary print, from Quentin. 9) Fix a file resource leak in BPF selftest's load_kallsyms() helper, from Peng. 10) Fix an unused variable warning in map_lookup_and_delete_elem(), from Alexei. 11) Fix bpf_skb_adjust_room() signature in BPF UAPI helper doc, from Nicolas. 12) Add missing executables to .gitignore in BPF selftests, from Anders. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/cgroup.c6
-rw-r--r--kernel/bpf/core.c3
-rw-r--r--kernel/bpf/helpers.c43
-rw-r--r--kernel/bpf/queue_stack_maps.c288
-rw-r--r--kernel/bpf/stackmap.c2
-rw-r--r--kernel/bpf/syscall.c91
-rw-r--r--kernel/bpf/verifier.c85
8 files changed, 500 insertions, 20 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index ff8262626b8f..4c2fa3ac56f6 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,7 +3,7 @@ obj-y := core.o
3 3
4obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 4obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
5obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o 5obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
6obj-$(CONFIG_BPF_SYSCALL) += local_storage.o 6obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
7obj-$(CONFIG_BPF_SYSCALL) += disasm.o 7obj-$(CONFIG_BPF_SYSCALL) += disasm.o
8obj-$(CONFIG_BPF_SYSCALL) += btf.o 8obj-$(CONFIG_BPF_SYSCALL) += btf.o
9ifeq ($(CONFIG_NET),y) 9ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..9425c2fb872f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
553{ 553{
554 unsigned int offset = skb->data - skb_network_header(skb); 554 unsigned int offset = skb->data - skb_network_header(skb);
555 struct sock *save_sk; 555 struct sock *save_sk;
556 void *saved_data_end;
556 struct cgroup *cgrp; 557 struct cgroup *cgrp;
557 int ret; 558 int ret;
558 559
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
566 save_sk = skb->sk; 567 save_sk = skb->sk;
567 skb->sk = sk; 568 skb->sk = sk;
568 __skb_push(skb, offset); 569 __skb_push(skb, offset);
570
571 /* compute pointers for the bpf prog */
572 bpf_compute_and_save_data_end(skb, &saved_data_end);
573
569 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 574 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
570 bpf_prog_run_save_cb); 575 bpf_prog_run_save_cb);
576 bpf_restore_data_end(skb, saved_data_end);
571 __skb_pull(skb, offset); 577 __skb_pull(skb, offset);
572 skb->sk = save_sk; 578 skb->sk = save_sk;
573 return ret == 1 ? 0 : -EPERM; 579 return ret == 1 ? 0 : -EPERM;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index defcf4df6d91..7c7eeea8cffc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
1783const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; 1783const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
1784const struct bpf_func_proto bpf_map_update_elem_proto __weak; 1784const struct bpf_func_proto bpf_map_update_elem_proto __weak;
1785const struct bpf_func_proto bpf_map_delete_elem_proto __weak; 1785const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
1786const struct bpf_func_proto bpf_map_push_elem_proto __weak;
1787const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
1788const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
1786 1789
1787const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; 1790const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
1788const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; 1791const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6502115e8f55..ab0d5e3f9892 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
76 .arg2_type = ARG_PTR_TO_MAP_KEY, 76 .arg2_type = ARG_PTR_TO_MAP_KEY,
77}; 77};
78 78
79BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
80{
81 return map->ops->map_push_elem(map, value, flags);
82}
83
84const struct bpf_func_proto bpf_map_push_elem_proto = {
85 .func = bpf_map_push_elem,
86 .gpl_only = false,
87 .pkt_access = true,
88 .ret_type = RET_INTEGER,
89 .arg1_type = ARG_CONST_MAP_PTR,
90 .arg2_type = ARG_PTR_TO_MAP_VALUE,
91 .arg3_type = ARG_ANYTHING,
92};
93
94BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
95{
96 return map->ops->map_pop_elem(map, value);
97}
98
99const struct bpf_func_proto bpf_map_pop_elem_proto = {
100 .func = bpf_map_pop_elem,
101 .gpl_only = false,
102 .pkt_access = true,
103 .ret_type = RET_INTEGER,
104 .arg1_type = ARG_CONST_MAP_PTR,
105 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
106};
107
108BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
109{
110 return map->ops->map_peek_elem(map, value);
111}
112
113const struct bpf_func_proto bpf_map_peek_elem_proto = {
114 .func = bpf_map_pop_elem,
115 .gpl_only = false,
116 .pkt_access = true,
117 .ret_type = RET_INTEGER,
118 .arg1_type = ARG_CONST_MAP_PTR,
119 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
120};
121
79const struct bpf_func_proto bpf_get_prandom_u32_proto = { 122const struct bpf_func_proto bpf_get_prandom_u32_proto = {
80 .func = bpf_user_rnd_u32, 123 .func = bpf_user_rnd_u32,
81 .gpl_only = false, 124 .gpl_only = false,
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
new file mode 100644
index 000000000000..12a93fb37449
--- /dev/null
+++ b/kernel/bpf/queue_stack_maps.c
@@ -0,0 +1,288 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * queue_stack_maps.c: BPF queue and stack maps
4 *
5 * Copyright (c) 2018 Politecnico di Torino
6 */
7#include <linux/bpf.h>
8#include <linux/list.h>
9#include <linux/slab.h>
10#include "percpu_freelist.h"
11
12#define QUEUE_STACK_CREATE_FLAG_MASK \
13 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
14
15
16struct bpf_queue_stack {
17 struct bpf_map map;
18 raw_spinlock_t lock;
19 u32 head, tail;
20 u32 size; /* max_entries + 1 */
21
22 char elements[0] __aligned(8);
23};
24
25static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
26{
27 return container_of(map, struct bpf_queue_stack, map);
28}
29
30static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
31{
32 return qs->head == qs->tail;
33}
34
35static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
36{
37 u32 head = qs->head + 1;
38
39 if (unlikely(head >= qs->size))
40 head = 0;
41
42 return head == qs->tail;
43}
44
45/* Called from syscall */
46static int queue_stack_map_alloc_check(union bpf_attr *attr)
47{
48 /* check sanity of attributes */
49 if (attr->max_entries == 0 || attr->key_size != 0 ||
50 attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
51 return -EINVAL;
52
53 if (attr->value_size > KMALLOC_MAX_SIZE)
54 /* if value_size is bigger, the user space won't be able to
55 * access the elements.
56 */
57 return -E2BIG;
58
59 return 0;
60}
61
62static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
63{
64 int ret, numa_node = bpf_map_attr_numa_node(attr);
65 struct bpf_queue_stack *qs;
66 u32 size, value_size;
67 u64 queue_size, cost;
68
69 size = attr->max_entries + 1;
70 value_size = attr->value_size;
71
72 queue_size = sizeof(*qs) + (u64) value_size * size;
73
74 cost = queue_size;
75 if (cost >= U32_MAX - PAGE_SIZE)
76 return ERR_PTR(-E2BIG);
77
78 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
79
80 ret = bpf_map_precharge_memlock(cost);
81 if (ret < 0)
82 return ERR_PTR(ret);
83
84 qs = bpf_map_area_alloc(queue_size, numa_node);
85 if (!qs)
86 return ERR_PTR(-ENOMEM);
87
88 memset(qs, 0, sizeof(*qs));
89
90 bpf_map_init_from_attr(&qs->map, attr);
91
92 qs->map.pages = cost;
93 qs->size = size;
94
95 raw_spin_lock_init(&qs->lock);
96
97 return &qs->map;
98}
99
100/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
101static void queue_stack_map_free(struct bpf_map *map)
102{
103 struct bpf_queue_stack *qs = bpf_queue_stack(map);
104
105 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
106 * so the programs (can be more than one that used this map) were
107 * disconnected from events. Wait for outstanding critical sections in
108 * these programs to complete
109 */
110 synchronize_rcu();
111
112 bpf_map_area_free(qs);
113}
114
115static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
116{
117 struct bpf_queue_stack *qs = bpf_queue_stack(map);
118 unsigned long flags;
119 int err = 0;
120 void *ptr;
121
122 raw_spin_lock_irqsave(&qs->lock, flags);
123
124 if (queue_stack_map_is_empty(qs)) {
125 err = -ENOENT;
126 goto out;
127 }
128
129 ptr = &qs->elements[qs->tail * qs->map.value_size];
130 memcpy(value, ptr, qs->map.value_size);
131
132 if (delete) {
133 if (unlikely(++qs->tail >= qs->size))
134 qs->tail = 0;
135 }
136
137out:
138 raw_spin_unlock_irqrestore(&qs->lock, flags);
139 return err;
140}
141
142
143static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
144{
145 struct bpf_queue_stack *qs = bpf_queue_stack(map);
146 unsigned long flags;
147 int err = 0;
148 void *ptr;
149 u32 index;
150
151 raw_spin_lock_irqsave(&qs->lock, flags);
152
153 if (queue_stack_map_is_empty(qs)) {
154 err = -ENOENT;
155 goto out;
156 }
157
158 index = qs->head - 1;
159 if (unlikely(index >= qs->size))
160 index = qs->size - 1;
161
162 ptr = &qs->elements[index * qs->map.value_size];
163 memcpy(value, ptr, qs->map.value_size);
164
165 if (delete)
166 qs->head = index;
167
168out:
169 raw_spin_unlock_irqrestore(&qs->lock, flags);
170 return err;
171}
172
173/* Called from syscall or from eBPF program */
174static int queue_map_peek_elem(struct bpf_map *map, void *value)
175{
176 return __queue_map_get(map, value, false);
177}
178
179/* Called from syscall or from eBPF program */
180static int stack_map_peek_elem(struct bpf_map *map, void *value)
181{
182 return __stack_map_get(map, value, false);
183}
184
185/* Called from syscall or from eBPF program */
186static int queue_map_pop_elem(struct bpf_map *map, void *value)
187{
188 return __queue_map_get(map, value, true);
189}
190
191/* Called from syscall or from eBPF program */
192static int stack_map_pop_elem(struct bpf_map *map, void *value)
193{
194 return __stack_map_get(map, value, true);
195}
196
197/* Called from syscall or from eBPF program */
198static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
199 u64 flags)
200{
201 struct bpf_queue_stack *qs = bpf_queue_stack(map);
202 unsigned long irq_flags;
203 int err = 0;
204 void *dst;
205
206 /* BPF_EXIST is used to force making room for a new element in case the
207 * map is full
208 */
209 bool replace = (flags & BPF_EXIST);
210
211 /* Check supported flags for queue and stack maps */
212 if (flags & BPF_NOEXIST || flags > BPF_EXIST)
213 return -EINVAL;
214
215 raw_spin_lock_irqsave(&qs->lock, irq_flags);
216
217 if (queue_stack_map_is_full(qs)) {
218 if (!replace) {
219 err = -E2BIG;
220 goto out;
221 }
222 /* advance tail pointer to overwrite oldest element */
223 if (unlikely(++qs->tail >= qs->size))
224 qs->tail = 0;
225 }
226
227 dst = &qs->elements[qs->head * qs->map.value_size];
228 memcpy(dst, value, qs->map.value_size);
229
230 if (unlikely(++qs->head >= qs->size))
231 qs->head = 0;
232
233out:
234 raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
235 return err;
236}
237
238/* Called from syscall or from eBPF program */
239static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
240{
241 return NULL;
242}
243
244/* Called from syscall or from eBPF program */
245static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
246 void *value, u64 flags)
247{
248 return -EINVAL;
249}
250
251/* Called from syscall or from eBPF program */
252static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
253{
254 return -EINVAL;
255}
256
257/* Called from syscall */
258static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
259 void *next_key)
260{
261 return -EINVAL;
262}
263
264const struct bpf_map_ops queue_map_ops = {
265 .map_alloc_check = queue_stack_map_alloc_check,
266 .map_alloc = queue_stack_map_alloc,
267 .map_free = queue_stack_map_free,
268 .map_lookup_elem = queue_stack_map_lookup_elem,
269 .map_update_elem = queue_stack_map_update_elem,
270 .map_delete_elem = queue_stack_map_delete_elem,
271 .map_push_elem = queue_stack_map_push_elem,
272 .map_pop_elem = queue_map_pop_elem,
273 .map_peek_elem = queue_map_peek_elem,
274 .map_get_next_key = queue_stack_map_get_next_key,
275};
276
277const struct bpf_map_ops stack_map_ops = {
278 .map_alloc_check = queue_stack_map_alloc_check,
279 .map_alloc = queue_stack_map_alloc,
280 .map_free = queue_stack_map_free,
281 .map_lookup_elem = queue_stack_map_lookup_elem,
282 .map_update_elem = queue_stack_map_update_elem,
283 .map_delete_elem = queue_stack_map_delete_elem,
284 .map_push_elem = queue_stack_map_push_elem,
285 .map_pop_elem = stack_map_pop_elem,
286 .map_peek_elem = stack_map_peek_elem,
287 .map_get_next_key = queue_stack_map_get_next_key,
288};
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b2ade10f7ec3..90daf285de03 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
600 put_callchain_buffers(); 600 put_callchain_buffers();
601} 601}
602 602
603const struct bpf_map_ops stack_map_ops = { 603const struct bpf_map_ops stack_trace_map_ops = {
604 .map_alloc = stack_map_alloc, 604 .map_alloc = stack_map_alloc,
605 .map_free = stack_map_free, 605 .map_free = stack_map_free,
606 .map_get_next_key = stack_map_get_next_key, 606 .map_get_next_key = stack_map_get_next_key,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f4ecd6ed2252..ccb93277aae2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
651 return -ENOTSUPP; 651 return -ENOTSUPP;
652} 652}
653 653
654static void *__bpf_copy_key(void __user *ukey, u64 key_size)
655{
656 if (key_size)
657 return memdup_user(ukey, key_size);
658
659 if (ukey)
660 return ERR_PTR(-EINVAL);
661
662 return NULL;
663}
664
654/* last field in 'union bpf_attr' used by this command */ 665/* last field in 'union bpf_attr' used by this command */
655#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 666#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
656 667
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
678 goto err_put; 689 goto err_put;
679 } 690 }
680 691
681 key = memdup_user(ukey, map->key_size); 692 key = __bpf_copy_key(ukey, map->key_size);
682 if (IS_ERR(key)) { 693 if (IS_ERR(key)) {
683 err = PTR_ERR(key); 694 err = PTR_ERR(key);
684 goto err_put; 695 goto err_put;
@@ -716,6 +727,9 @@ static int map_lookup_elem(union bpf_attr *attr)
716 err = bpf_fd_htab_map_lookup_elem(map, key, value); 727 err = bpf_fd_htab_map_lookup_elem(map, key, value);
717 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 728 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
718 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 729 err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
730 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
731 map->map_type == BPF_MAP_TYPE_STACK) {
732 err = map->ops->map_peek_elem(map, value);
719 } else { 733 } else {
720 rcu_read_lock(); 734 rcu_read_lock();
721 ptr = map->ops->map_lookup_elem(map, key); 735 ptr = map->ops->map_lookup_elem(map, key);
@@ -785,7 +799,7 @@ static int map_update_elem(union bpf_attr *attr)
785 goto err_put; 799 goto err_put;
786 } 800 }
787 801
788 key = memdup_user(ukey, map->key_size); 802 key = __bpf_copy_key(ukey, map->key_size);
789 if (IS_ERR(key)) { 803 if (IS_ERR(key)) {
790 err = PTR_ERR(key); 804 err = PTR_ERR(key);
791 goto err_put; 805 goto err_put;
@@ -846,6 +860,9 @@ static int map_update_elem(union bpf_attr *attr)
846 /* rcu_read_lock() is not needed */ 860 /* rcu_read_lock() is not needed */
847 err = bpf_fd_reuseport_array_update_elem(map, key, value, 861 err = bpf_fd_reuseport_array_update_elem(map, key, value,
848 attr->flags); 862 attr->flags);
863 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
864 map->map_type == BPF_MAP_TYPE_STACK) {
865 err = map->ops->map_push_elem(map, value, attr->flags);
849 } else { 866 } else {
850 rcu_read_lock(); 867 rcu_read_lock();
851 err = map->ops->map_update_elem(map, key, value, attr->flags); 868 err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -888,7 +905,7 @@ static int map_delete_elem(union bpf_attr *attr)
888 goto err_put; 905 goto err_put;
889 } 906 }
890 907
891 key = memdup_user(ukey, map->key_size); 908 key = __bpf_copy_key(ukey, map->key_size);
892 if (IS_ERR(key)) { 909 if (IS_ERR(key)) {
893 err = PTR_ERR(key); 910 err = PTR_ERR(key);
894 goto err_put; 911 goto err_put;
@@ -941,7 +958,7 @@ static int map_get_next_key(union bpf_attr *attr)
941 } 958 }
942 959
943 if (ukey) { 960 if (ukey) {
944 key = memdup_user(ukey, map->key_size); 961 key = __bpf_copy_key(ukey, map->key_size);
945 if (IS_ERR(key)) { 962 if (IS_ERR(key)) {
946 err = PTR_ERR(key); 963 err = PTR_ERR(key);
947 goto err_put; 964 goto err_put;
@@ -982,6 +999,69 @@ err_put:
982 return err; 999 return err;
983} 1000}
984 1001
1002#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
1003
1004static int map_lookup_and_delete_elem(union bpf_attr *attr)
1005{
1006 void __user *ukey = u64_to_user_ptr(attr->key);
1007 void __user *uvalue = u64_to_user_ptr(attr->value);
1008 int ufd = attr->map_fd;
1009 struct bpf_map *map;
1010 void *key, *value;
1011 u32 value_size;
1012 struct fd f;
1013 int err;
1014
1015 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
1016 return -EINVAL;
1017
1018 f = fdget(ufd);
1019 map = __bpf_map_get(f);
1020 if (IS_ERR(map))
1021 return PTR_ERR(map);
1022
1023 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
1024 err = -EPERM;
1025 goto err_put;
1026 }
1027
1028 key = __bpf_copy_key(ukey, map->key_size);
1029 if (IS_ERR(key)) {
1030 err = PTR_ERR(key);
1031 goto err_put;
1032 }
1033
1034 value_size = map->value_size;
1035
1036 err = -ENOMEM;
1037 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1038 if (!value)
1039 goto free_key;
1040
1041 if (map->map_type == BPF_MAP_TYPE_QUEUE ||
1042 map->map_type == BPF_MAP_TYPE_STACK) {
1043 err = map->ops->map_pop_elem(map, value);
1044 } else {
1045 err = -ENOTSUPP;
1046 }
1047
1048 if (err)
1049 goto free_value;
1050
1051 if (copy_to_user(uvalue, value, value_size) != 0)
1052 goto free_value;
1053
1054 err = 0;
1055
1056free_value:
1057 kfree(value);
1058free_key:
1059 kfree(key);
1060err_put:
1061 fdput(f);
1062 return err;
1063}
1064
985static const struct bpf_prog_ops * const bpf_prog_types[] = { 1065static const struct bpf_prog_ops * const bpf_prog_types[] = {
986#define BPF_PROG_TYPE(_id, _name) \ 1066#define BPF_PROG_TYPE(_id, _name) \
987 [_id] = & _name ## _prog_ops, 1067 [_id] = & _name ## _prog_ops,
@@ -2455,6 +2535,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
2455 case BPF_TASK_FD_QUERY: 2535 case BPF_TASK_FD_QUERY:
2456 err = bpf_task_fd_query(&attr, uattr); 2536 err = bpf_task_fd_query(&attr, uattr);
2457 break; 2537 break;
2538 case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
2539 err = map_lookup_and_delete_elem(&attr);
2540 break;
2458 default: 2541 default:
2459 err = -EINVAL; 2542 err = -EINVAL;
2460 break; 2543 break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3f93a548a642..98fa0be35370 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1528,14 +1528,19 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
1528 return reg->type != SCALAR_VALUE; 1528 return reg->type != SCALAR_VALUE;
1529} 1529}
1530 1530
1531static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1532{
1533 return cur_regs(env) + regno;
1534}
1535
1531static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 1536static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1532{ 1537{
1533 return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); 1538 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
1534} 1539}
1535 1540
1536static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 1541static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1537{ 1542{
1538 const struct bpf_reg_state *reg = cur_regs(env) + regno; 1543 const struct bpf_reg_state *reg = reg_state(env, regno);
1539 1544
1540 return reg->type == PTR_TO_CTX || 1545 return reg->type == PTR_TO_CTX ||
1541 reg->type == PTR_TO_SOCKET; 1546 reg->type == PTR_TO_SOCKET;
@@ -1543,11 +1548,19 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1543 1548
1544static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 1549static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1545{ 1550{
1546 const struct bpf_reg_state *reg = cur_regs(env) + regno; 1551 const struct bpf_reg_state *reg = reg_state(env, regno);
1547 1552
1548 return type_is_pkt_pointer(reg->type); 1553 return type_is_pkt_pointer(reg->type);
1549} 1554}
1550 1555
1556static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1557{
1558 const struct bpf_reg_state *reg = reg_state(env, regno);
1559
1560 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1561 return reg->type == PTR_TO_FLOW_KEYS;
1562}
1563
1551static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, 1564static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1552 const struct bpf_reg_state *reg, 1565 const struct bpf_reg_state *reg,
1553 int off, int size, bool strict) 1566 int off, int size, bool strict)
@@ -1956,9 +1969,11 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
1956 } 1969 }
1957 1970
1958 if (is_ctx_reg(env, insn->dst_reg) || 1971 if (is_ctx_reg(env, insn->dst_reg) ||
1959 is_pkt_reg(env, insn->dst_reg)) { 1972 is_pkt_reg(env, insn->dst_reg) ||
1973 is_flow_key_reg(env, insn->dst_reg)) {
1960 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 1974 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
1961 insn->dst_reg, reg_type_str[insn->dst_reg]); 1975 insn->dst_reg,
1976 reg_type_str[reg_state(env, insn->dst_reg)->type]);
1962 return -EACCES; 1977 return -EACCES;
1963 } 1978 }
1964 1979
@@ -1983,7 +1998,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1983 int access_size, bool zero_size_allowed, 1998 int access_size, bool zero_size_allowed,
1984 struct bpf_call_arg_meta *meta) 1999 struct bpf_call_arg_meta *meta)
1985{ 2000{
1986 struct bpf_reg_state *reg = cur_regs(env) + regno; 2001 struct bpf_reg_state *reg = reg_state(env, regno);
1987 struct bpf_func_state *state = func(env, reg); 2002 struct bpf_func_state *state = func(env, reg);
1988 int off, i, slot, spi; 2003 int off, i, slot, spi;
1989 2004
@@ -2062,8 +2077,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2062 case PTR_TO_PACKET_META: 2077 case PTR_TO_PACKET_META:
2063 return check_packet_access(env, regno, reg->off, access_size, 2078 return check_packet_access(env, regno, reg->off, access_size,
2064 zero_size_allowed); 2079 zero_size_allowed);
2065 case PTR_TO_FLOW_KEYS:
2066 return check_flow_keys_access(env, reg->off, access_size);
2067 case PTR_TO_MAP_VALUE: 2080 case PTR_TO_MAP_VALUE:
2068 return check_map_access(env, regno, reg->off, access_size, 2081 return check_map_access(env, regno, reg->off, access_size,
2069 zero_size_allowed); 2082 zero_size_allowed);
@@ -2117,7 +2130,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2117 } 2130 }
2118 2131
2119 if (arg_type == ARG_PTR_TO_MAP_KEY || 2132 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2120 arg_type == ARG_PTR_TO_MAP_VALUE) { 2133 arg_type == ARG_PTR_TO_MAP_VALUE ||
2134 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
2121 expected_type = PTR_TO_STACK; 2135 expected_type = PTR_TO_STACK;
2122 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && 2136 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
2123 type != expected_type) 2137 type != expected_type)
@@ -2187,7 +2201,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2187 err = check_helper_mem_access(env, regno, 2201 err = check_helper_mem_access(env, regno,
2188 meta->map_ptr->key_size, false, 2202 meta->map_ptr->key_size, false,
2189 NULL); 2203 NULL);
2190 } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { 2204 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2205 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
2191 /* bpf_map_xxx(..., map_ptr, ..., value) call: 2206 /* bpf_map_xxx(..., map_ptr, ..., value) call:
2192 * check [value, value + map->value_size) validity 2207 * check [value, value + map->value_size) validity
2193 */ 2208 */
@@ -2196,9 +2211,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2196 verbose(env, "invalid map_ptr to access map->value\n"); 2211 verbose(env, "invalid map_ptr to access map->value\n");
2197 return -EACCES; 2212 return -EACCES;
2198 } 2213 }
2214 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
2199 err = check_helper_mem_access(env, regno, 2215 err = check_helper_mem_access(env, regno,
2200 meta->map_ptr->value_size, false, 2216 meta->map_ptr->value_size, false,
2201 NULL); 2217 meta);
2202 } else if (arg_type_is_mem_size(arg_type)) { 2218 } else if (arg_type_is_mem_size(arg_type)) {
2203 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 2219 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
2204 2220
@@ -2321,6 +2337,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
2321 if (func_id != BPF_FUNC_sk_select_reuseport) 2337 if (func_id != BPF_FUNC_sk_select_reuseport)
2322 goto error; 2338 goto error;
2323 break; 2339 break;
2340 case BPF_MAP_TYPE_QUEUE:
2341 case BPF_MAP_TYPE_STACK:
2342 if (func_id != BPF_FUNC_map_peek_elem &&
2343 func_id != BPF_FUNC_map_pop_elem &&
2344 func_id != BPF_FUNC_map_push_elem)
2345 goto error;
2346 break;
2324 default: 2347 default:
2325 break; 2348 break;
2326 } 2349 }
@@ -2377,6 +2400,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
2377 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) 2400 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2378 goto error; 2401 goto error;
2379 break; 2402 break;
2403 case BPF_FUNC_map_peek_elem:
2404 case BPF_FUNC_map_pop_elem:
2405 case BPF_FUNC_map_push_elem:
2406 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2407 map->map_type != BPF_MAP_TYPE_STACK)
2408 goto error;
2409 break;
2380 default: 2410 default:
2381 break; 2411 break;
2382 } 2412 }
@@ -2672,7 +2702,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2672 if (func_id != BPF_FUNC_tail_call && 2702 if (func_id != BPF_FUNC_tail_call &&
2673 func_id != BPF_FUNC_map_lookup_elem && 2703 func_id != BPF_FUNC_map_lookup_elem &&
2674 func_id != BPF_FUNC_map_update_elem && 2704 func_id != BPF_FUNC_map_update_elem &&
2675 func_id != BPF_FUNC_map_delete_elem) 2705 func_id != BPF_FUNC_map_delete_elem &&
2706 func_id != BPF_FUNC_map_push_elem &&
2707 func_id != BPF_FUNC_map_pop_elem &&
2708 func_id != BPF_FUNC_map_peek_elem)
2676 return 0; 2709 return 0;
2677 2710
2678 if (meta->map_ptr == NULL) { 2711 if (meta->map_ptr == NULL) {
@@ -5244,7 +5277,8 @@ static int do_check(struct bpf_verifier_env *env)
5244 5277
5245 if (is_ctx_reg(env, insn->dst_reg)) { 5278 if (is_ctx_reg(env, insn->dst_reg)) {
5246 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", 5279 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
5247 insn->dst_reg, reg_type_str[insn->dst_reg]); 5280 insn->dst_reg,
5281 reg_type_str[reg_state(env, insn->dst_reg)->type]);
5248 return -EACCES; 5282 return -EACCES;
5249 } 5283 }
5250 5284
@@ -6144,7 +6178,10 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
6144 if (prog->jit_requested && BITS_PER_LONG == 64 && 6178 if (prog->jit_requested && BITS_PER_LONG == 64 &&
6145 (insn->imm == BPF_FUNC_map_lookup_elem || 6179 (insn->imm == BPF_FUNC_map_lookup_elem ||
6146 insn->imm == BPF_FUNC_map_update_elem || 6180 insn->imm == BPF_FUNC_map_update_elem ||
6147 insn->imm == BPF_FUNC_map_delete_elem)) { 6181 insn->imm == BPF_FUNC_map_delete_elem ||
6182 insn->imm == BPF_FUNC_map_push_elem ||
6183 insn->imm == BPF_FUNC_map_pop_elem ||
6184 insn->imm == BPF_FUNC_map_peek_elem)) {
6148 aux = &env->insn_aux_data[i + delta]; 6185 aux = &env->insn_aux_data[i + delta];
6149 if (bpf_map_ptr_poisoned(aux)) 6186 if (bpf_map_ptr_poisoned(aux))
6150 goto patch_call_imm; 6187 goto patch_call_imm;
@@ -6177,6 +6214,14 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
6177 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 6214 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
6178 (int (*)(struct bpf_map *map, void *key, void *value, 6215 (int (*)(struct bpf_map *map, void *key, void *value,
6179 u64 flags))NULL)); 6216 u64 flags))NULL));
6217 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
6218 (int (*)(struct bpf_map *map, void *value,
6219 u64 flags))NULL));
6220 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
6221 (int (*)(struct bpf_map *map, void *value))NULL));
6222 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
6223 (int (*)(struct bpf_map *map, void *value))NULL));
6224
6180 switch (insn->imm) { 6225 switch (insn->imm) {
6181 case BPF_FUNC_map_lookup_elem: 6226 case BPF_FUNC_map_lookup_elem:
6182 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - 6227 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
@@ -6190,6 +6235,18 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
6190 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - 6235 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
6191 __bpf_call_base; 6236 __bpf_call_base;
6192 continue; 6237 continue;
6238 case BPF_FUNC_map_push_elem:
6239 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
6240 __bpf_call_base;
6241 continue;
6242 case BPF_FUNC_map_pop_elem:
6243 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
6244 __bpf_call_base;
6245 continue;
6246 case BPF_FUNC_map_peek_elem:
6247 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
6248 __bpf_call_base;
6249 continue;
6193 } 6250 }
6194 6251
6195 goto patch_call_imm; 6252 goto patch_call_imm;