aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/bpf.h2
-rw-r--r--include/uapi/linux/bpf.h1
-rw-r--r--kernel/bpf/hashtab.c121
-rw-r--r--kernel/bpf/syscall.c8
-rw-r--r--kernel/bpf/verifier.c4
5 files changed, 134 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3f3cdf9b15e8..2ae39a3e9ead 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -277,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
277int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, 277int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
278 void *key, void *value, u64 map_flags); 278 void *key, void *value, u64 map_flags);
279void bpf_fd_array_map_clear(struct bpf_map *map); 279void bpf_fd_array_map_clear(struct bpf_map *map);
280int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
281 void *key, void *value, u64 map_flags);
280 282
281/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and 283/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
282 * forced to use 'long' read/writes to try to atomically copy long counters. 284 * forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1701ec1e7de3..ce6f029ac368 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_map_type {
97 BPF_MAP_TYPE_LRU_PERCPU_HASH, 97 BPF_MAP_TYPE_LRU_PERCPU_HASH,
98 BPF_MAP_TYPE_LPM_TRIE, 98 BPF_MAP_TYPE_LPM_TRIE,
99 BPF_MAP_TYPE_ARRAY_OF_MAPS, 99 BPF_MAP_TYPE_ARRAY_OF_MAPS,
100 BPF_MAP_TYPE_HASH_OF_MAPS,
100}; 101};
101 102
102enum bpf_prog_type { 103enum bpf_prog_type {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 000153acb6d5..343fb5394c95 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -16,6 +16,7 @@
16#include <linux/rculist_nulls.h> 16#include <linux/rculist_nulls.h>
17#include "percpu_freelist.h" 17#include "percpu_freelist.h"
18#include "bpf_lru_list.h" 18#include "bpf_lru_list.h"
19#include "map_in_map.h"
19 20
20struct bucket { 21struct bucket {
21 struct hlist_nulls_head head; 22 struct hlist_nulls_head head;
@@ -88,6 +89,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size
88 return *(void __percpu **)(l->key + key_size); 89 return *(void __percpu **)(l->key + key_size);
89} 90}
90 91
92static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
93{
94 return *(void **)(l->key + roundup(map->key_size, 8));
95}
96
91static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 97static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
92{ 98{
93 return (struct htab_elem *) (htab->elems + i * htab->elem_size); 99 return (struct htab_elem *) (htab->elems + i * htab->elem_size);
@@ -603,6 +609,14 @@ static void htab_elem_free_rcu(struct rcu_head *head)
603 609
604static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 610static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
605{ 611{
612 struct bpf_map *map = &htab->map;
613
614 if (map->ops->map_fd_put_ptr) {
615 void *ptr = fd_htab_map_get_ptr(map, l);
616
617 map->ops->map_fd_put_ptr(ptr);
618 }
619
606 if (l->state == HTAB_EXTRA_ELEM_USED) { 620 if (l->state == HTAB_EXTRA_ELEM_USED) {
607 l->state = HTAB_EXTRA_ELEM_FREE; 621 l->state = HTAB_EXTRA_ELEM_FREE;
608 return; 622 return;
@@ -1057,6 +1071,7 @@ static void delete_all_elements(struct bpf_htab *htab)
1057 } 1071 }
1058 } 1072 }
1059} 1073}
1074
1060/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1075/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
1061static void htab_map_free(struct bpf_map *map) 1076static void htab_map_free(struct bpf_map *map)
1062{ 1077{
@@ -1213,12 +1228,118 @@ static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = {
1213 .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, 1228 .type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
1214}; 1229};
1215 1230
1231static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
1232{
1233 struct bpf_map *map;
1234
1235 if (attr->value_size != sizeof(u32))
1236 return ERR_PTR(-EINVAL);
1237
1238 /* pointer is stored internally */
1239 attr->value_size = sizeof(void *);
1240 map = htab_map_alloc(attr);
1241 attr->value_size = sizeof(u32);
1242
1243 return map;
1244}
1245
1246static void fd_htab_map_free(struct bpf_map *map)
1247{
1248 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1249 struct hlist_nulls_node *n;
1250 struct hlist_nulls_head *head;
1251 struct htab_elem *l;
1252 int i;
1253
1254 for (i = 0; i < htab->n_buckets; i++) {
1255 head = select_bucket(htab, i);
1256
1257 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1258 void *ptr = fd_htab_map_get_ptr(map, l);
1259
1260 map->ops->map_fd_put_ptr(ptr);
1261 }
1262 }
1263
1264 htab_map_free(map);
1265}
1266
1267/* only called from syscall */
1268int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
1269 void *key, void *value, u64 map_flags)
1270{
1271 void *ptr;
1272 int ret;
1273 u32 ufd = *(u32 *)value;
1274
1275 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
1276 if (IS_ERR(ptr))
1277 return PTR_ERR(ptr);
1278
1279 ret = htab_map_update_elem(map, key, &ptr, map_flags);
1280 if (ret)
1281 map->ops->map_fd_put_ptr(ptr);
1282
1283 return ret;
1284}
1285
1286static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
1287{
1288 struct bpf_map *map, *inner_map_meta;
1289
1290 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1291 if (IS_ERR(inner_map_meta))
1292 return inner_map_meta;
1293
1294 map = fd_htab_map_alloc(attr);
1295 if (IS_ERR(map)) {
1296 bpf_map_meta_free(inner_map_meta);
1297 return map;
1298 }
1299
1300 map->inner_map_meta = inner_map_meta;
1301
1302 return map;
1303}
1304
1305static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
1306{
1307 struct bpf_map **inner_map = htab_map_lookup_elem(map, key);
1308
1309 if (!inner_map)
1310 return NULL;
1311
1312 return READ_ONCE(*inner_map);
1313}
1314
1315static void htab_of_map_free(struct bpf_map *map)
1316{
1317 bpf_map_meta_free(map->inner_map_meta);
1318 fd_htab_map_free(map);
1319}
1320
1321static const struct bpf_map_ops htab_of_map_ops = {
1322 .map_alloc = htab_of_map_alloc,
1323 .map_free = htab_of_map_free,
1324 .map_get_next_key = htab_map_get_next_key,
1325 .map_lookup_elem = htab_of_map_lookup_elem,
1326 .map_delete_elem = htab_map_delete_elem,
1327 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1328 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1329};
1330
1331static struct bpf_map_type_list htab_of_map_type __ro_after_init = {
1332 .ops = &htab_of_map_ops,
1333 .type = BPF_MAP_TYPE_HASH_OF_MAPS,
1334};
1335
1216static int __init register_htab_map(void) 1336static int __init register_htab_map(void)
1217{ 1337{
1218 bpf_register_map_type(&htab_type); 1338 bpf_register_map_type(&htab_type);
1219 bpf_register_map_type(&htab_percpu_type); 1339 bpf_register_map_type(&htab_percpu_type);
1220 bpf_register_map_type(&htab_lru_type); 1340 bpf_register_map_type(&htab_lru_type);
1221 bpf_register_map_type(&htab_lru_percpu_type); 1341 bpf_register_map_type(&htab_lru_percpu_type);
1342 bpf_register_map_type(&htab_of_map_type);
1222 return 0; 1343 return 0;
1223} 1344}
1224late_initcall(register_htab_map); 1345late_initcall(register_htab_map);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6e24fdf1f373..c35ebfe6d84d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -352,7 +352,8 @@ static int map_lookup_elem(union bpf_attr *attr)
352 err = bpf_percpu_array_copy(map, key, value); 352 err = bpf_percpu_array_copy(map, key, value);
353 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 353 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
354 err = bpf_stackmap_copy(map, key, value); 354 err = bpf_stackmap_copy(map, key, value);
355 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 355 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
356 map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
356 err = -ENOTSUPP; 357 err = -ENOTSUPP;
357 } else { 358 } else {
358 rcu_read_lock(); 359 rcu_read_lock();
@@ -446,6 +447,11 @@ static int map_update_elem(union bpf_attr *attr)
446 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 447 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
447 attr->flags); 448 attr->flags);
448 rcu_read_unlock(); 449 rcu_read_unlock();
450 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
451 rcu_read_lock();
452 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
453 attr->flags);
454 rcu_read_unlock();
449 } else { 455 } else {
450 rcu_read_lock(); 456 rcu_read_lock();
451 err = map->ops->map_update_elem(map, key, value, attr->flags); 457 err = map->ops->map_update_elem(map, key, value, attr->flags);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3b8f528c5473..09923cc5c7c7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1200,6 +1200,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
1200 goto error; 1200 goto error;
1201 break; 1201 break;
1202 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1202 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
1203 case BPF_MAP_TYPE_HASH_OF_MAPS:
1203 if (func_id != BPF_FUNC_map_lookup_elem) 1204 if (func_id != BPF_FUNC_map_lookup_elem)
1204 goto error; 1205 goto error;
1205 default: 1206 default:
@@ -3044,7 +3045,8 @@ process_bpf_exit:
3044static int check_map_prealloc(struct bpf_map *map) 3045static int check_map_prealloc(struct bpf_map *map)
3045{ 3046{
3046 return (map->map_type != BPF_MAP_TYPE_HASH && 3047 return (map->map_type != BPF_MAP_TYPE_HASH &&
3047 map->map_type != BPF_MAP_TYPE_PERCPU_HASH) || 3048 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
3049 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
3048 !(map->map_flags & BPF_F_NO_PREALLOC); 3050 !(map->map_flags & BPF_F_NO_PREALLOC);
3049} 3051}
3050 3052