aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/Makefile3
-rw-r--r--kernel/bpf/arraymap.c28
-rw-r--r--kernel/bpf/cpumap.c1
-rw-r--r--kernel/bpf/devmap.c1
-rw-r--r--kernel/bpf/hashtab.c26
-rw-r--r--kernel/bpf/inode.c11
-rw-r--r--kernel/bpf/local_storage.c1
-rw-r--r--kernel/bpf/lpm_trie.c12
-rw-r--r--kernel/bpf/reuseport_array.c363
-rw-r--r--kernel/bpf/sockmap.c2
-rw-r--r--kernel/bpf/stackmap.c1
-rw-r--r--kernel/bpf/syscall.c42
-rw-r--r--kernel/bpf/verifier.c9
-rw-r--r--kernel/bpf/xskmap.c3
14 files changed, 478 insertions, 25 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e8906cbad81f..0488b8258321 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -23,3 +23,6 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
23obj-$(CONFIG_BPF_SYSCALL) += stackmap.o 23obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
24endif 24endif
25obj-$(CONFIG_CGROUP_BPF) += cgroup.o 25obj-$(CONFIG_CGROUP_BPF) += cgroup.o
26ifeq ($(CONFIG_INET),y)
27obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
28endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2aa55d030c77..0c17aab3ce5f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -54,7 +54,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
54} 54}
55 55
56/* Called from syscall */ 56/* Called from syscall */
57static int array_map_alloc_check(union bpf_attr *attr) 57int array_map_alloc_check(union bpf_attr *attr)
58{ 58{
59 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 59 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
60 int numa_node = bpf_map_attr_numa_node(attr); 60 int numa_node = bpf_map_attr_numa_node(attr);
@@ -358,27 +358,20 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
358 rcu_read_unlock(); 358 rcu_read_unlock();
359} 359}
360 360
361static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, 361static int array_map_check_btf(const struct bpf_map *map,
362 u32 btf_key_id, u32 btf_value_id) 362 const struct btf_type *key_type,
363 const struct btf_type *value_type)
363{ 364{
364 const struct btf_type *key_type, *value_type;
365 u32 key_size, value_size;
366 u32 int_data; 365 u32 int_data;
367 366
368 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 367 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
369 if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
370 return -EINVAL; 368 return -EINVAL;
371 369
372 int_data = *(u32 *)(key_type + 1); 370 int_data = *(u32 *)(key_type + 1);
373 /* bpf array can only take a u32 key. This check makes 371 /* bpf array can only take a u32 key. This check makes sure
374 * sure that the btf matches the attr used during map_create. 372 * that the btf matches the attr used during map_create.
375 */ 373 */
376 if (BTF_INT_BITS(int_data) != 32 || key_size != 4 || 374 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
377 BTF_INT_OFFSET(int_data))
378 return -EINVAL;
379
380 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
381 if (!value_type || value_size != map->value_size)
382 return -EINVAL; 375 return -EINVAL;
383 376
384 return 0; 377 return 0;
@@ -405,6 +398,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
405 .map_lookup_elem = percpu_array_map_lookup_elem, 398 .map_lookup_elem = percpu_array_map_lookup_elem,
406 .map_update_elem = array_map_update_elem, 399 .map_update_elem = array_map_update_elem,
407 .map_delete_elem = array_map_delete_elem, 400 .map_delete_elem = array_map_delete_elem,
401 .map_check_btf = array_map_check_btf,
408}; 402};
409 403
410static int fd_array_map_alloc_check(union bpf_attr *attr) 404static int fd_array_map_alloc_check(union bpf_attr *attr)
@@ -546,6 +540,7 @@ const struct bpf_map_ops prog_array_map_ops = {
546 .map_fd_put_ptr = prog_fd_array_put_ptr, 540 .map_fd_put_ptr = prog_fd_array_put_ptr,
547 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 541 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
548 .map_release_uref = bpf_fd_array_map_clear, 542 .map_release_uref = bpf_fd_array_map_clear,
543 .map_check_btf = map_check_no_btf,
549}; 544};
550 545
551static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, 546static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -634,6 +629,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
634 .map_fd_get_ptr = perf_event_fd_array_get_ptr, 629 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
635 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 630 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
636 .map_release = perf_event_fd_array_release, 631 .map_release = perf_event_fd_array_release,
632 .map_check_btf = map_check_no_btf,
637}; 633};
638 634
639#ifdef CONFIG_CGROUPS 635#ifdef CONFIG_CGROUPS
@@ -665,6 +661,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
665 .map_delete_elem = fd_array_map_delete_elem, 661 .map_delete_elem = fd_array_map_delete_elem,
666 .map_fd_get_ptr = cgroup_fd_array_get_ptr, 662 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
667 .map_fd_put_ptr = cgroup_fd_array_put_ptr, 663 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
664 .map_check_btf = map_check_no_btf,
668}; 665};
669#endif 666#endif
670 667
@@ -749,4 +746,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
749 .map_fd_put_ptr = bpf_map_fd_put_ptr, 746 .map_fd_put_ptr = bpf_map_fd_put_ptr,
750 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 747 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
751 .map_gen_lookup = array_of_map_gen_lookup, 748 .map_gen_lookup = array_of_map_gen_lookup,
749 .map_check_btf = map_check_no_btf,
752}; 750};
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 46f5f29605d4..620bc5024d7d 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -555,6 +555,7 @@ const struct bpf_map_ops cpu_map_ops = {
555 .map_update_elem = cpu_map_update_elem, 555 .map_update_elem = cpu_map_update_elem,
556 .map_lookup_elem = cpu_map_lookup_elem, 556 .map_lookup_elem = cpu_map_lookup_elem,
557 .map_get_next_key = cpu_map_get_next_key, 557 .map_get_next_key = cpu_map_get_next_key,
558 .map_check_btf = map_check_no_btf,
558}; 559};
559 560
560static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, 561static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 750d45edae79..ac1df79f3788 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -488,6 +488,7 @@ const struct bpf_map_ops dev_map_ops = {
488 .map_lookup_elem = dev_map_lookup_elem, 488 .map_lookup_elem = dev_map_lookup_elem,
489 .map_update_elem = dev_map_update_elem, 489 .map_update_elem = dev_map_update_elem,
490 .map_delete_elem = dev_map_delete_elem, 490 .map_delete_elem = dev_map_delete_elem,
491 .map_check_btf = map_check_no_btf,
491}; 492};
492 493
493static int dev_map_notification(struct notifier_block *notifier, 494static int dev_map_notification(struct notifier_block *notifier,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 513d9dfcf4ee..04b8eda94e7d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -11,9 +11,11 @@
11 * General Public License for more details. 11 * General Public License for more details.
12 */ 12 */
13#include <linux/bpf.h> 13#include <linux/bpf.h>
14#include <linux/btf.h>
14#include <linux/jhash.h> 15#include <linux/jhash.h>
15#include <linux/filter.h> 16#include <linux/filter.h>
16#include <linux/rculist_nulls.h> 17#include <linux/rculist_nulls.h>
18#include <uapi/linux/btf.h>
17#include "percpu_freelist.h" 19#include "percpu_freelist.h"
18#include "bpf_lru_list.h" 20#include "bpf_lru_list.h"
19#include "map_in_map.h" 21#include "map_in_map.h"
@@ -1162,6 +1164,27 @@ static void htab_map_free(struct bpf_map *map)
1162 kfree(htab); 1164 kfree(htab);
1163} 1165}
1164 1166
1167static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
1168 struct seq_file *m)
1169{
1170 void *value;
1171
1172 rcu_read_lock();
1173
1174 value = htab_map_lookup_elem(map, key);
1175 if (!value) {
1176 rcu_read_unlock();
1177 return;
1178 }
1179
1180 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
1181 seq_puts(m, ": ");
1182 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
1183 seq_puts(m, "\n");
1184
1185 rcu_read_unlock();
1186}
1187
1165const struct bpf_map_ops htab_map_ops = { 1188const struct bpf_map_ops htab_map_ops = {
1166 .map_alloc_check = htab_map_alloc_check, 1189 .map_alloc_check = htab_map_alloc_check,
1167 .map_alloc = htab_map_alloc, 1190 .map_alloc = htab_map_alloc,
@@ -1171,6 +1194,7 @@ const struct bpf_map_ops htab_map_ops = {
1171 .map_update_elem = htab_map_update_elem, 1194 .map_update_elem = htab_map_update_elem,
1172 .map_delete_elem = htab_map_delete_elem, 1195 .map_delete_elem = htab_map_delete_elem,
1173 .map_gen_lookup = htab_map_gen_lookup, 1196 .map_gen_lookup = htab_map_gen_lookup,
1197 .map_seq_show_elem = htab_map_seq_show_elem,
1174}; 1198};
1175 1199
1176const struct bpf_map_ops htab_lru_map_ops = { 1200const struct bpf_map_ops htab_lru_map_ops = {
@@ -1182,6 +1206,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
1182 .map_update_elem = htab_lru_map_update_elem, 1206 .map_update_elem = htab_lru_map_update_elem,
1183 .map_delete_elem = htab_lru_map_delete_elem, 1207 .map_delete_elem = htab_lru_map_delete_elem,
1184 .map_gen_lookup = htab_lru_map_gen_lookup, 1208 .map_gen_lookup = htab_lru_map_gen_lookup,
1209 .map_seq_show_elem = htab_map_seq_show_elem,
1185}; 1210};
1186 1211
1187/* Called from eBPF program */ 1212/* Called from eBPF program */
@@ -1408,4 +1433,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
1408 .map_fd_put_ptr = bpf_map_fd_put_ptr, 1433 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1409 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 1434 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1410 .map_gen_lookup = htab_of_map_gen_lookup, 1435 .map_gen_lookup = htab_of_map_gen_lookup,
1436 .map_check_btf = map_check_no_btf,
1411}; 1437};
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 76efe9a183f5..2ada5e21dfa6 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -196,19 +196,21 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
196{ 196{
197 struct bpf_map *map = seq_file_to_map(m); 197 struct bpf_map *map = seq_file_to_map(m);
198 void *key = map_iter(m)->key; 198 void *key = map_iter(m)->key;
199 void *prev_key;
199 200
200 if (map_iter(m)->done) 201 if (map_iter(m)->done)
201 return NULL; 202 return NULL;
202 203
203 if (unlikely(v == SEQ_START_TOKEN)) 204 if (unlikely(v == SEQ_START_TOKEN))
204 goto done; 205 prev_key = NULL;
206 else
207 prev_key = key;
205 208
206 if (map->ops->map_get_next_key(map, key, key)) { 209 if (map->ops->map_get_next_key(map, prev_key, key)) {
207 map_iter(m)->done = true; 210 map_iter(m)->done = true;
208 return NULL; 211 return NULL;
209 } 212 }
210 213
211done:
212 ++(*pos); 214 ++(*pos);
213 return key; 215 return key;
214} 216}
@@ -332,7 +334,8 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
332 struct bpf_map *map = arg; 334 struct bpf_map *map = arg;
333 335
334 return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, 336 return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
335 map->btf ? &bpffs_map_fops : &bpffs_obj_fops); 337 bpf_map_support_seq_show(map) ?
338 &bpffs_map_fops : &bpffs_obj_fops);
336} 339}
337 340
338static struct dentry * 341static struct dentry *
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index fc4e37f68f2a..22ad967d1e5f 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -246,6 +246,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
246 .map_lookup_elem = cgroup_storage_lookup_elem, 246 .map_lookup_elem = cgroup_storage_lookup_elem,
247 .map_update_elem = cgroup_storage_update_elem, 247 .map_update_elem = cgroup_storage_update_elem,
248 .map_delete_elem = cgroup_storage_delete_elem, 248 .map_delete_elem = cgroup_storage_delete_elem,
249 .map_check_btf = map_check_no_btf,
249}; 250};
250 251
251int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) 252int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1603492c9cc7..9058317ba9de 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -10,11 +10,13 @@
10 */ 10 */
11 11
12#include <linux/bpf.h> 12#include <linux/bpf.h>
13#include <linux/btf.h>
13#include <linux/err.h> 14#include <linux/err.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
15#include <linux/spinlock.h> 16#include <linux/spinlock.h>
16#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
17#include <net/ipv6.h> 18#include <net/ipv6.h>
19#include <uapi/linux/btf.h>
18 20
19/* Intermediate node */ 21/* Intermediate node */
20#define LPM_TREE_NODE_FLAG_IM BIT(0) 22#define LPM_TREE_NODE_FLAG_IM BIT(0)
@@ -686,6 +688,15 @@ free_stack:
686 return err; 688 return err;
687} 689}
688 690
691static int trie_check_btf(const struct bpf_map *map,
692 const struct btf_type *key_type,
693 const struct btf_type *value_type)
694{
695 /* Keys must have struct bpf_lpm_trie_key embedded. */
696 return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
697 -EINVAL : 0;
698}
699
689const struct bpf_map_ops trie_map_ops = { 700const struct bpf_map_ops trie_map_ops = {
690 .map_alloc = trie_alloc, 701 .map_alloc = trie_alloc,
691 .map_free = trie_free, 702 .map_free = trie_free,
@@ -693,4 +704,5 @@ const struct bpf_map_ops trie_map_ops = {
693 .map_lookup_elem = trie_lookup_elem, 704 .map_lookup_elem = trie_lookup_elem,
694 .map_update_elem = trie_update_elem, 705 .map_update_elem = trie_update_elem,
695 .map_delete_elem = trie_delete_elem, 706 .map_delete_elem = trie_delete_elem,
707 .map_check_btf = trie_check_btf,
696}; 708};
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
new file mode 100644
index 000000000000..18e225de80ff
--- /dev/null
+++ b/kernel/bpf/reuseport_array.c
@@ -0,0 +1,363 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2018 Facebook
4 */
5#include <linux/bpf.h>
6#include <linux/err.h>
7#include <linux/sock_diag.h>
8#include <net/sock_reuseport.h>
9
10struct reuseport_array {
11 struct bpf_map map;
12 struct sock __rcu *ptrs[];
13};
14
15static struct reuseport_array *reuseport_array(struct bpf_map *map)
16{
17 return (struct reuseport_array *)map;
18}
19
20/* The caller must hold the reuseport_lock */
21void bpf_sk_reuseport_detach(struct sock *sk)
22{
23 struct sock __rcu **socks;
24
25 write_lock_bh(&sk->sk_callback_lock);
26 socks = sk->sk_user_data;
27 if (socks) {
28 WRITE_ONCE(sk->sk_user_data, NULL);
29 /*
30 * Do not move this NULL assignment outside of
31 * sk->sk_callback_lock because there is
32 * a race with reuseport_array_free()
33 * which does not hold the reuseport_lock.
34 */
35 RCU_INIT_POINTER(*socks, NULL);
36 }
37 write_unlock_bh(&sk->sk_callback_lock);
38}
39
40static int reuseport_array_alloc_check(union bpf_attr *attr)
41{
42 if (attr->value_size != sizeof(u32) &&
43 attr->value_size != sizeof(u64))
44 return -EINVAL;
45
46 return array_map_alloc_check(attr);
47}
48
49static void *reuseport_array_lookup_elem(struct bpf_map *map, void *key)
50{
51 struct reuseport_array *array = reuseport_array(map);
52 u32 index = *(u32 *)key;
53
54 if (unlikely(index >= array->map.max_entries))
55 return NULL;
56
57 return rcu_dereference(array->ptrs[index]);
58}
59
60/* Called from syscall only */
61static int reuseport_array_delete_elem(struct bpf_map *map, void *key)
62{
63 struct reuseport_array *array = reuseport_array(map);
64 u32 index = *(u32 *)key;
65 struct sock *sk;
66 int err;
67
68 if (index >= map->max_entries)
69 return -E2BIG;
70
71 if (!rcu_access_pointer(array->ptrs[index]))
72 return -ENOENT;
73
74 spin_lock_bh(&reuseport_lock);
75
76 sk = rcu_dereference_protected(array->ptrs[index],
77 lockdep_is_held(&reuseport_lock));
78 if (sk) {
79 write_lock_bh(&sk->sk_callback_lock);
80 WRITE_ONCE(sk->sk_user_data, NULL);
81 RCU_INIT_POINTER(array->ptrs[index], NULL);
82 write_unlock_bh(&sk->sk_callback_lock);
83 err = 0;
84 } else {
85 err = -ENOENT;
86 }
87
88 spin_unlock_bh(&reuseport_lock);
89
90 return err;
91}
92
93static void reuseport_array_free(struct bpf_map *map)
94{
95 struct reuseport_array *array = reuseport_array(map);
96 struct sock *sk;
97 u32 i;
98
99 synchronize_rcu();
100
101 /*
102 * ops->map_*_elem() will not be able to access this
103 * array now. Hence, this function only races with
104 * bpf_sk_reuseport_detach() which was triggerred by
105 * close() or disconnect().
106 *
107 * This function and bpf_sk_reuseport_detach() are
108 * both removing sk from "array". Who removes it
109 * first does not matter.
110 *
111 * The only concern here is bpf_sk_reuseport_detach()
112 * may access "array" which is being freed here.
113 * bpf_sk_reuseport_detach() access this "array"
114 * through sk->sk_user_data _and_ with sk->sk_callback_lock
115 * held which is enough because this "array" is not freed
116 * until all sk->sk_user_data has stopped referencing this "array".
117 *
118 * Hence, due to the above, taking "reuseport_lock" is not
119 * needed here.
120 */
121
122 /*
123 * Since reuseport_lock is not taken, sk is accessed under
124 * rcu_read_lock()
125 */
126 rcu_read_lock();
127 for (i = 0; i < map->max_entries; i++) {
128 sk = rcu_dereference(array->ptrs[i]);
129 if (sk) {
130 write_lock_bh(&sk->sk_callback_lock);
131 /*
132 * No need for WRITE_ONCE(). At this point,
133 * no one is reading it without taking the
134 * sk->sk_callback_lock.
135 */
136 sk->sk_user_data = NULL;
137 write_unlock_bh(&sk->sk_callback_lock);
138 RCU_INIT_POINTER(array->ptrs[i], NULL);
139 }
140 }
141 rcu_read_unlock();
142
143 /*
144 * Once reaching here, all sk->sk_user_data is not
145 * referenceing this "array". "array" can be freed now.
146 */
147 bpf_map_area_free(array);
148}
149
150static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
151{
152 int err, numa_node = bpf_map_attr_numa_node(attr);
153 struct reuseport_array *array;
154 u64 cost, array_size;
155
156 if (!capable(CAP_SYS_ADMIN))
157 return ERR_PTR(-EPERM);
158
159 array_size = sizeof(*array);
160 array_size += (u64)attr->max_entries * sizeof(struct sock *);
161
162 /* make sure there is no u32 overflow later in round_up() */
163 cost = array_size;
164 if (cost >= U32_MAX - PAGE_SIZE)
165 return ERR_PTR(-ENOMEM);
166 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
167
168 err = bpf_map_precharge_memlock(cost);
169 if (err)
170 return ERR_PTR(err);
171
172 /* allocate all map elements and zero-initialize them */
173 array = bpf_map_area_alloc(array_size, numa_node);
174 if (!array)
175 return ERR_PTR(-ENOMEM);
176
177 /* copy mandatory map attributes */
178 bpf_map_init_from_attr(&array->map, attr);
179 array->map.pages = cost;
180
181 return &array->map;
182}
183
184int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
185 void *value)
186{
187 struct sock *sk;
188 int err;
189
190 if (map->value_size != sizeof(u64))
191 return -ENOSPC;
192
193 rcu_read_lock();
194 sk = reuseport_array_lookup_elem(map, key);
195 if (sk) {
196 *(u64 *)value = sock_gen_cookie(sk);
197 err = 0;
198 } else {
199 err = -ENOENT;
200 }
201 rcu_read_unlock();
202
203 return err;
204}
205
206static int
207reuseport_array_update_check(const struct reuseport_array *array,
208 const struct sock *nsk,
209 const struct sock *osk,
210 const struct sock_reuseport *nsk_reuse,
211 u32 map_flags)
212{
213 if (osk && map_flags == BPF_NOEXIST)
214 return -EEXIST;
215
216 if (!osk && map_flags == BPF_EXIST)
217 return -ENOENT;
218
219 if (nsk->sk_protocol != IPPROTO_UDP && nsk->sk_protocol != IPPROTO_TCP)
220 return -ENOTSUPP;
221
222 if (nsk->sk_family != AF_INET && nsk->sk_family != AF_INET6)
223 return -ENOTSUPP;
224
225 if (nsk->sk_type != SOCK_STREAM && nsk->sk_type != SOCK_DGRAM)
226 return -ENOTSUPP;
227
228 /*
229 * sk must be hashed (i.e. listening in the TCP case or binded
230 * in the UDP case) and
231 * it must also be a SO_REUSEPORT sk (i.e. reuse cannot be NULL).
232 *
233 * Also, sk will be used in bpf helper that is protected by
234 * rcu_read_lock().
235 */
236 if (!sock_flag(nsk, SOCK_RCU_FREE) || !sk_hashed(nsk) || !nsk_reuse)
237 return -EINVAL;
238
239 /* READ_ONCE because the sk->sk_callback_lock may not be held here */
240 if (READ_ONCE(nsk->sk_user_data))
241 return -EBUSY;
242
243 return 0;
244}
245
246/*
247 * Called from syscall only.
248 * The "nsk" in the fd refcnt.
249 * The "osk" and "reuse" are protected by reuseport_lock.
250 */
251int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
252 void *value, u64 map_flags)
253{
254 struct reuseport_array *array = reuseport_array(map);
255 struct sock *free_osk = NULL, *osk, *nsk;
256 struct sock_reuseport *reuse;
257 u32 index = *(u32 *)key;
258 struct socket *socket;
259 int err, fd;
260
261 if (map_flags > BPF_EXIST)
262 return -EINVAL;
263
264 if (index >= map->max_entries)
265 return -E2BIG;
266
267 if (map->value_size == sizeof(u64)) {
268 u64 fd64 = *(u64 *)value;
269
270 if (fd64 > S32_MAX)
271 return -EINVAL;
272 fd = fd64;
273 } else {
274 fd = *(int *)value;
275 }
276
277 socket = sockfd_lookup(fd, &err);
278 if (!socket)
279 return err;
280
281 nsk = socket->sk;
282 if (!nsk) {
283 err = -EINVAL;
284 goto put_file;
285 }
286
287 /* Quick checks before taking reuseport_lock */
288 err = reuseport_array_update_check(array, nsk,
289 rcu_access_pointer(array->ptrs[index]),
290 rcu_access_pointer(nsk->sk_reuseport_cb),
291 map_flags);
292 if (err)
293 goto put_file;
294
295 spin_lock_bh(&reuseport_lock);
296 /*
297 * Some of the checks only need reuseport_lock
298 * but it is done under sk_callback_lock also
299 * for simplicity reason.
300 */
301 write_lock_bh(&nsk->sk_callback_lock);
302
303 osk = rcu_dereference_protected(array->ptrs[index],
304 lockdep_is_held(&reuseport_lock));
305 reuse = rcu_dereference_protected(nsk->sk_reuseport_cb,
306 lockdep_is_held(&reuseport_lock));
307 err = reuseport_array_update_check(array, nsk, osk, reuse, map_flags);
308 if (err)
309 goto put_file_unlock;
310
311 /* Ensure reuse->reuseport_id is set */
312 err = reuseport_get_id(reuse);
313 if (err < 0)
314 goto put_file_unlock;
315
316 WRITE_ONCE(nsk->sk_user_data, &array->ptrs[index]);
317 rcu_assign_pointer(array->ptrs[index], nsk);
318 free_osk = osk;
319 err = 0;
320
321put_file_unlock:
322 write_unlock_bh(&nsk->sk_callback_lock);
323
324 if (free_osk) {
325 write_lock_bh(&free_osk->sk_callback_lock);
326 WRITE_ONCE(free_osk->sk_user_data, NULL);
327 write_unlock_bh(&free_osk->sk_callback_lock);
328 }
329
330 spin_unlock_bh(&reuseport_lock);
331put_file:
332 fput(socket->file);
333 return err;
334}
335
336/* Called from syscall */
337static int reuseport_array_get_next_key(struct bpf_map *map, void *key,
338 void *next_key)
339{
340 struct reuseport_array *array = reuseport_array(map);
341 u32 index = key ? *(u32 *)key : U32_MAX;
342 u32 *next = (u32 *)next_key;
343
344 if (index >= array->map.max_entries) {
345 *next = 0;
346 return 0;
347 }
348
349 if (index == array->map.max_entries - 1)
350 return -ENOENT;
351
352 *next = index + 1;
353 return 0;
354}
355
356const struct bpf_map_ops reuseport_array_ops = {
357 .map_alloc_check = reuseport_array_alloc_check,
358 .map_alloc = reuseport_array_alloc,
359 .map_free = reuseport_array_free,
360 .map_lookup_elem = reuseport_array_lookup_elem,
361 .map_get_next_key = reuseport_array_get_next_key,
362 .map_delete_elem = reuseport_array_delete_elem,
363};
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 70c0755e8fc4..0c1a696b041b 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2498,6 +2498,7 @@ const struct bpf_map_ops sock_map_ops = {
2498 .map_update_elem = sock_map_update_elem, 2498 .map_update_elem = sock_map_update_elem,
2499 .map_delete_elem = sock_map_delete_elem, 2499 .map_delete_elem = sock_map_delete_elem,
2500 .map_release_uref = sock_map_release, 2500 .map_release_uref = sock_map_release,
2501 .map_check_btf = map_check_no_btf,
2501}; 2502};
2502 2503
2503const struct bpf_map_ops sock_hash_ops = { 2504const struct bpf_map_ops sock_hash_ops = {
@@ -2508,6 +2509,7 @@ const struct bpf_map_ops sock_hash_ops = {
2508 .map_update_elem = sock_hash_update_elem, 2509 .map_update_elem = sock_hash_update_elem,
2509 .map_delete_elem = sock_hash_delete_elem, 2510 .map_delete_elem = sock_hash_delete_elem,
2510 .map_release_uref = sock_map_release, 2511 .map_release_uref = sock_map_release,
2512 .map_check_btf = map_check_no_btf,
2511}; 2513};
2512 2514
2513BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, 2515BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b675a3f3d141..8061a439ef18 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -607,6 +607,7 @@ const struct bpf_map_ops stack_map_ops = {
607 .map_lookup_elem = stack_map_lookup_elem, 607 .map_lookup_elem = stack_map_lookup_elem,
608 .map_update_elem = stack_map_update_elem, 608 .map_update_elem = stack_map_update_elem,
609 .map_delete_elem = stack_map_delete_elem, 609 .map_delete_elem = stack_map_delete_elem,
610 .map_check_btf = map_check_no_btf,
610}; 611};
611 612
612static int __init stack_map_init(void) 613static int __init stack_map_init(void)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5af4e9e2722d..43727ed0d94a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -103,6 +103,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
103const struct bpf_map_ops bpf_map_offload_ops = { 103const struct bpf_map_ops bpf_map_offload_ops = {
104 .map_alloc = bpf_map_offload_map_alloc, 104 .map_alloc = bpf_map_offload_map_alloc,
105 .map_free = bpf_map_offload_map_free, 105 .map_free = bpf_map_offload_map_free,
106 .map_check_btf = map_check_no_btf,
106}; 107};
107 108
108static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 109static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -455,6 +456,34 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
455 return 0; 456 return 0;
456} 457}
457 458
459int map_check_no_btf(const struct bpf_map *map,
460 const struct btf_type *key_type,
461 const struct btf_type *value_type)
462{
463 return -ENOTSUPP;
464}
465
466static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
467 u32 btf_key_id, u32 btf_value_id)
468{
469 const struct btf_type *key_type, *value_type;
470 u32 key_size, value_size;
471 int ret = 0;
472
473 key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
474 if (!key_type || key_size != map->key_size)
475 return -EINVAL;
476
477 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
478 if (!value_type || value_size != map->value_size)
479 return -EINVAL;
480
481 if (map->ops->map_check_btf)
482 ret = map->ops->map_check_btf(map, key_type, value_type);
483
484 return ret;
485}
486
458#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id 487#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
459/* called via syscall */ 488/* called via syscall */
460static int map_create(union bpf_attr *attr) 489static int map_create(union bpf_attr *attr)
@@ -489,8 +518,7 @@ static int map_create(union bpf_attr *attr)
489 atomic_set(&map->refcnt, 1); 518 atomic_set(&map->refcnt, 1);
490 atomic_set(&map->usercnt, 1); 519 atomic_set(&map->usercnt, 1);
491 520
492 if (bpf_map_support_seq_show(map) && 521 if (attr->btf_key_type_id || attr->btf_value_type_id) {
493 (attr->btf_key_type_id || attr->btf_value_type_id)) {
494 struct btf *btf; 522 struct btf *btf;
495 523
496 if (!attr->btf_key_type_id || !attr->btf_value_type_id) { 524 if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
@@ -504,8 +532,8 @@ static int map_create(union bpf_attr *attr)
504 goto free_map_nouncharge; 532 goto free_map_nouncharge;
505 } 533 }
506 534
507 err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id, 535 err = map_check_btf(map, btf, attr->btf_key_type_id,
508 attr->btf_value_type_id); 536 attr->btf_value_type_id);
509 if (err) { 537 if (err) {
510 btf_put(btf); 538 btf_put(btf);
511 goto free_map_nouncharge; 539 goto free_map_nouncharge;
@@ -684,6 +712,8 @@ static int map_lookup_elem(union bpf_attr *attr)
684 err = bpf_fd_array_map_lookup_elem(map, key, value); 712 err = bpf_fd_array_map_lookup_elem(map, key, value);
685 } else if (IS_FD_HASH(map)) { 713 } else if (IS_FD_HASH(map)) {
686 err = bpf_fd_htab_map_lookup_elem(map, key, value); 714 err = bpf_fd_htab_map_lookup_elem(map, key, value);
715 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
716 err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
687 } else { 717 } else {
688 rcu_read_lock(); 718 rcu_read_lock();
689 ptr = map->ops->map_lookup_elem(map, key); 719 ptr = map->ops->map_lookup_elem(map, key);
@@ -790,6 +820,10 @@ static int map_update_elem(union bpf_attr *attr)
790 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 820 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
791 attr->flags); 821 attr->flags);
792 rcu_read_unlock(); 822 rcu_read_unlock();
823 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
824 /* rcu_read_lock() is not needed */
825 err = bpf_fd_reuseport_array_update_elem(map, key, value,
826 attr->flags);
793 } else { 827 } else {
794 rcu_read_lock(); 828 rcu_read_lock();
795 err = map->ops->map_update_elem(map, key, value, attr->flags); 829 err = map->ops->map_update_elem(map, key, value, attr->flags);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 587468a9c37d..ca90679a7fe5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1310,6 +1310,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1310 case BPF_PROG_TYPE_LWT_IN: 1310 case BPF_PROG_TYPE_LWT_IN:
1311 case BPF_PROG_TYPE_LWT_OUT: 1311 case BPF_PROG_TYPE_LWT_OUT:
1312 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 1312 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
1313 case BPF_PROG_TYPE_SK_REUSEPORT:
1313 /* dst_input() and dst_output() can't write for now */ 1314 /* dst_input() and dst_output() can't write for now */
1314 if (t == BPF_WRITE) 1315 if (t == BPF_WRITE)
1315 return false; 1316 return false;
@@ -2166,6 +2167,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
2166 func_id != BPF_FUNC_msg_redirect_hash) 2167 func_id != BPF_FUNC_msg_redirect_hash)
2167 goto error; 2168 goto error;
2168 break; 2169 break;
2170 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2171 if (func_id != BPF_FUNC_sk_select_reuseport)
2172 goto error;
2173 break;
2169 default: 2174 default:
2170 break; 2175 break;
2171 } 2176 }
@@ -2217,6 +2222,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
2217 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) 2222 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
2218 goto error; 2223 goto error;
2219 break; 2224 break;
2225 case BPF_FUNC_sk_select_reuseport:
2226 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2227 goto error;
2228 break;
2220 default: 2229 default:
2221 break; 2230 break;
2222 } 2231 }
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index b3c557476a8d..4ddf61e158f6 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -227,6 +227,5 @@ const struct bpf_map_ops xsk_map_ops = {
227 .map_lookup_elem = xsk_map_lookup_elem, 227 .map_lookup_elem = xsk_map_lookup_elem,
228 .map_update_elem = xsk_map_update_elem, 228 .map_update_elem = xsk_map_update_elem,
229 .map_delete_elem = xsk_map_delete_elem, 229 .map_delete_elem = xsk_map_delete_elem,
230 .map_check_btf = map_check_no_btf,
230}; 231};
231
232