diff options
-rw-r--r-- | include/linux/bpf.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 1 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 121 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 8 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 4 |
5 files changed, 134 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3f3cdf9b15e8..2ae39a3e9ead 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
@@ -277,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); | |||
277 | int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, | 277 | int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, |
278 | void *key, void *value, u64 map_flags); | 278 | void *key, void *value, u64 map_flags); |
279 | void bpf_fd_array_map_clear(struct bpf_map *map); | 279 | void bpf_fd_array_map_clear(struct bpf_map *map); |
280 | int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, | ||
281 | void *key, void *value, u64 map_flags); | ||
280 | 282 | ||
281 | /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and | 283 | /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and |
282 | * forced to use 'long' read/writes to try to atomically copy long counters. | 284 | * forced to use 'long' read/writes to try to atomically copy long counters. |
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1701ec1e7de3..ce6f029ac368 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -97,6 +97,7 @@ enum bpf_map_type { | |||
97 | BPF_MAP_TYPE_LRU_PERCPU_HASH, | 97 | BPF_MAP_TYPE_LRU_PERCPU_HASH, |
98 | BPF_MAP_TYPE_LPM_TRIE, | 98 | BPF_MAP_TYPE_LPM_TRIE, |
99 | BPF_MAP_TYPE_ARRAY_OF_MAPS, | 99 | BPF_MAP_TYPE_ARRAY_OF_MAPS, |
100 | BPF_MAP_TYPE_HASH_OF_MAPS, | ||
100 | }; | 101 | }; |
101 | 102 | ||
102 | enum bpf_prog_type { | 103 | enum bpf_prog_type { |
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 000153acb6d5..343fb5394c95 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/rculist_nulls.h> | 16 | #include <linux/rculist_nulls.h> |
17 | #include "percpu_freelist.h" | 17 | #include "percpu_freelist.h" |
18 | #include "bpf_lru_list.h" | 18 | #include "bpf_lru_list.h" |
19 | #include "map_in_map.h" | ||
19 | 20 | ||
20 | struct bucket { | 21 | struct bucket { |
21 | struct hlist_nulls_head head; | 22 | struct hlist_nulls_head head; |
@@ -88,6 +89,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size | |||
88 | return *(void __percpu **)(l->key + key_size); | 89 | return *(void __percpu **)(l->key + key_size); |
89 | } | 90 | } |
90 | 91 | ||
92 | static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) | ||
93 | { | ||
94 | return *(void **)(l->key + roundup(map->key_size, 8)); | ||
95 | } | ||
96 | |||
91 | static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) | 97 | static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) |
92 | { | 98 | { |
93 | return (struct htab_elem *) (htab->elems + i * htab->elem_size); | 99 | return (struct htab_elem *) (htab->elems + i * htab->elem_size); |
@@ -603,6 +609,14 @@ static void htab_elem_free_rcu(struct rcu_head *head) | |||
603 | 609 | ||
604 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | 610 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) |
605 | { | 611 | { |
612 | struct bpf_map *map = &htab->map; | ||
613 | |||
614 | if (map->ops->map_fd_put_ptr) { | ||
615 | void *ptr = fd_htab_map_get_ptr(map, l); | ||
616 | |||
617 | map->ops->map_fd_put_ptr(ptr); | ||
618 | } | ||
619 | |||
606 | if (l->state == HTAB_EXTRA_ELEM_USED) { | 620 | if (l->state == HTAB_EXTRA_ELEM_USED) { |
607 | l->state = HTAB_EXTRA_ELEM_FREE; | 621 | l->state = HTAB_EXTRA_ELEM_FREE; |
608 | return; | 622 | return; |
@@ -1057,6 +1071,7 @@ static void delete_all_elements(struct bpf_htab *htab) | |||
1057 | } | 1071 | } |
1058 | } | 1072 | } |
1059 | } | 1073 | } |
1074 | |||
1060 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | 1075 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ |
1061 | static void htab_map_free(struct bpf_map *map) | 1076 | static void htab_map_free(struct bpf_map *map) |
1062 | { | 1077 | { |
@@ -1213,12 +1228,118 @@ static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = { | |||
1213 | .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, | 1228 | .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, |
1214 | }; | 1229 | }; |
1215 | 1230 | ||
1231 | static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr) | ||
1232 | { | ||
1233 | struct bpf_map *map; | ||
1234 | |||
1235 | if (attr->value_size != sizeof(u32)) | ||
1236 | return ERR_PTR(-EINVAL); | ||
1237 | |||
1238 | /* pointer is stored internally */ | ||
1239 | attr->value_size = sizeof(void *); | ||
1240 | map = htab_map_alloc(attr); | ||
1241 | attr->value_size = sizeof(u32); | ||
1242 | |||
1243 | return map; | ||
1244 | } | ||
1245 | |||
1246 | static void fd_htab_map_free(struct bpf_map *map) | ||
1247 | { | ||
1248 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
1249 | struct hlist_nulls_node *n; | ||
1250 | struct hlist_nulls_head *head; | ||
1251 | struct htab_elem *l; | ||
1252 | int i; | ||
1253 | |||
1254 | for (i = 0; i < htab->n_buckets; i++) { | ||
1255 | head = select_bucket(htab, i); | ||
1256 | |||
1257 | hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { | ||
1258 | void *ptr = fd_htab_map_get_ptr(map, l); | ||
1259 | |||
1260 | map->ops->map_fd_put_ptr(ptr); | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1264 | htab_map_free(map); | ||
1265 | } | ||
1266 | |||
1267 | /* only called from syscall */ | ||
1268 | int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, | ||
1269 | void *key, void *value, u64 map_flags) | ||
1270 | { | ||
1271 | void *ptr; | ||
1272 | int ret; | ||
1273 | u32 ufd = *(u32 *)value; | ||
1274 | |||
1275 | ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); | ||
1276 | if (IS_ERR(ptr)) | ||
1277 | return PTR_ERR(ptr); | ||
1278 | |||
1279 | ret = htab_map_update_elem(map, key, &ptr, map_flags); | ||
1280 | if (ret) | ||
1281 | map->ops->map_fd_put_ptr(ptr); | ||
1282 | |||
1283 | return ret; | ||
1284 | } | ||
1285 | |||
1286 | static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) | ||
1287 | { | ||
1288 | struct bpf_map *map, *inner_map_meta; | ||
1289 | |||
1290 | inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); | ||
1291 | if (IS_ERR(inner_map_meta)) | ||
1292 | return inner_map_meta; | ||
1293 | |||
1294 | map = fd_htab_map_alloc(attr); | ||
1295 | if (IS_ERR(map)) { | ||
1296 | bpf_map_meta_free(inner_map_meta); | ||
1297 | return map; | ||
1298 | } | ||
1299 | |||
1300 | map->inner_map_meta = inner_map_meta; | ||
1301 | |||
1302 | return map; | ||
1303 | } | ||
1304 | |||
1305 | static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) | ||
1306 | { | ||
1307 | struct bpf_map **inner_map = htab_map_lookup_elem(map, key); | ||
1308 | |||
1309 | if (!inner_map) | ||
1310 | return NULL; | ||
1311 | |||
1312 | return READ_ONCE(*inner_map); | ||
1313 | } | ||
1314 | |||
1315 | static void htab_of_map_free(struct bpf_map *map) | ||
1316 | { | ||
1317 | bpf_map_meta_free(map->inner_map_meta); | ||
1318 | fd_htab_map_free(map); | ||
1319 | } | ||
1320 | |||
1321 | static const struct bpf_map_ops htab_of_map_ops = { | ||
1322 | .map_alloc = htab_of_map_alloc, | ||
1323 | .map_free = htab_of_map_free, | ||
1324 | .map_get_next_key = htab_map_get_next_key, | ||
1325 | .map_lookup_elem = htab_of_map_lookup_elem, | ||
1326 | .map_delete_elem = htab_map_delete_elem, | ||
1327 | .map_fd_get_ptr = bpf_map_fd_get_ptr, | ||
1328 | .map_fd_put_ptr = bpf_map_fd_put_ptr, | ||
1329 | }; | ||
1330 | |||
1331 | static struct bpf_map_type_list htab_of_map_type __ro_after_init = { | ||
1332 | .ops = &htab_of_map_ops, | ||
1333 | .type = BPF_MAP_TYPE_HASH_OF_MAPS, | ||
1334 | }; | ||
1335 | |||
1216 | static int __init register_htab_map(void) | 1336 | static int __init register_htab_map(void) |
1217 | { | 1337 | { |
1218 | bpf_register_map_type(&htab_type); | 1338 | bpf_register_map_type(&htab_type); |
1219 | bpf_register_map_type(&htab_percpu_type); | 1339 | bpf_register_map_type(&htab_percpu_type); |
1220 | bpf_register_map_type(&htab_lru_type); | 1340 | bpf_register_map_type(&htab_lru_type); |
1221 | bpf_register_map_type(&htab_lru_percpu_type); | 1341 | bpf_register_map_type(&htab_lru_percpu_type); |
1342 | bpf_register_map_type(&htab_of_map_type); | ||
1222 | return 0; | 1343 | return 0; |
1223 | } | 1344 | } |
1224 | late_initcall(register_htab_map); | 1345 | late_initcall(register_htab_map); |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6e24fdf1f373..c35ebfe6d84d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -352,7 +352,8 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
352 | err = bpf_percpu_array_copy(map, key, value); | 352 | err = bpf_percpu_array_copy(map, key, value); |
353 | } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { | 353 | } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { |
354 | err = bpf_stackmap_copy(map, key, value); | 354 | err = bpf_stackmap_copy(map, key, value); |
355 | } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { | 355 | } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || |
356 | map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { | ||
356 | err = -ENOTSUPP; | 357 | err = -ENOTSUPP; |
357 | } else { | 358 | } else { |
358 | rcu_read_lock(); | 359 | rcu_read_lock(); |
@@ -446,6 +447,11 @@ static int map_update_elem(union bpf_attr *attr) | |||
446 | err = bpf_fd_array_map_update_elem(map, f.file, key, value, | 447 | err = bpf_fd_array_map_update_elem(map, f.file, key, value, |
447 | attr->flags); | 448 | attr->flags); |
448 | rcu_read_unlock(); | 449 | rcu_read_unlock(); |
450 | } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { | ||
451 | rcu_read_lock(); | ||
452 | err = bpf_fd_htab_map_update_elem(map, f.file, key, value, | ||
453 | attr->flags); | ||
454 | rcu_read_unlock(); | ||
449 | } else { | 455 | } else { |
450 | rcu_read_lock(); | 456 | rcu_read_lock(); |
451 | err = map->ops->map_update_elem(map, key, value, attr->flags); | 457 | err = map->ops->map_update_elem(map, key, value, attr->flags); |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3b8f528c5473..09923cc5c7c7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -1200,6 +1200,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) | |||
1200 | goto error; | 1200 | goto error; |
1201 | break; | 1201 | break; |
1202 | case BPF_MAP_TYPE_ARRAY_OF_MAPS: | 1202 | case BPF_MAP_TYPE_ARRAY_OF_MAPS: |
1203 | case BPF_MAP_TYPE_HASH_OF_MAPS: | ||
1203 | if (func_id != BPF_FUNC_map_lookup_elem) | 1204 | if (func_id != BPF_FUNC_map_lookup_elem) |
1204 | goto error; | 1205 | goto error; |
1205 | default: | 1206 | default: |
@@ -3044,7 +3045,8 @@ process_bpf_exit: | |||
3044 | static int check_map_prealloc(struct bpf_map *map) | 3045 | static int check_map_prealloc(struct bpf_map *map) |
3045 | { | 3046 | { |
3046 | return (map->map_type != BPF_MAP_TYPE_HASH && | 3047 | return (map->map_type != BPF_MAP_TYPE_HASH && |
3047 | map->map_type != BPF_MAP_TYPE_PERCPU_HASH) || | 3048 | map->map_type != BPF_MAP_TYPE_PERCPU_HASH && |
3049 | map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || | ||
3048 | !(map->map_flags & BPF_F_NO_PREALLOC); | 3050 | !(map->map_flags & BPF_F_NO_PREALLOC); |
3049 | } | 3051 | } |
3050 | 3052 | ||