aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/devmap.c
diff options
context:
space:
mode:
authorToke Høiland-Jørgensen <toke@redhat.com>2019-07-26 12:06:55 -0400
committerAlexei Starovoitov <ast@kernel.org>2019-07-29 16:50:48 -0400
commit6f9d451ab1a33728adb72d7ff66a7b374d665176 (patch)
tree770905d84a709ad9407b82d445d4a88343f3c9c6 /kernel/bpf/devmap.c
parentfca16e51078e8e5c0af839426b3d2dcd2bede135 (diff)
xdp: Add devmap_hash map type for looking up devices by hashed index
A common pattern when using xdp_redirect_map() is to create a device map where the lookup key is simply ifindex. Because device maps are arrays, this leaves holes in the map, and the map has to be sized to fit the largest ifindex, regardless of how many devices actually are actually needed in the map. This patch adds a second type of device map where the key is looked up using a hashmap, instead of being used as an array index. This allows maps to be densely packed, so they can be smaller. Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> Acked-by: Yonghong Song <yhs@fb.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r--kernel/bpf/devmap.c200
1 files changed, 200 insertions, 0 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a0501266bdb8..9af048a932b5 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -37,6 +37,12 @@
37 * notifier hook walks the map we know that new dev references can not be 37 * notifier hook walks the map we know that new dev references can not be
38 * added by the user because core infrastructure ensures dev_get_by_index() 38 * added by the user because core infrastructure ensures dev_get_by_index()
39 * calls will fail at this point. 39 * calls will fail at this point.
40 *
41 * The devmap_hash type is a map type which interprets keys as ifindexes and
42 * indexes these using a hashmap. This allows maps that use ifindex as key to be
43 * densely packed instead of having holes in the lookup array for unused
44 * ifindexes. The setup and packet enqueue/send code is shared between the two
45 * types of devmap; only the lookup and insertion is different.
40 */ 46 */
41#include <linux/bpf.h> 47#include <linux/bpf.h>
42#include <net/xdp.h> 48#include <net/xdp.h>
@@ -59,6 +65,7 @@ struct xdp_bulk_queue {
59 65
60struct bpf_dtab_netdev { 66struct bpf_dtab_netdev {
61 struct net_device *dev; /* must be first member, due to tracepoint */ 67 struct net_device *dev; /* must be first member, due to tracepoint */
68 struct hlist_node index_hlist;
62 struct bpf_dtab *dtab; 69 struct bpf_dtab *dtab;
63 struct xdp_bulk_queue __percpu *bulkq; 70 struct xdp_bulk_queue __percpu *bulkq;
64 struct rcu_head rcu; 71 struct rcu_head rcu;
@@ -70,11 +77,30 @@ struct bpf_dtab {
70 struct bpf_dtab_netdev **netdev_map; 77 struct bpf_dtab_netdev **netdev_map;
71 struct list_head __percpu *flush_list; 78 struct list_head __percpu *flush_list;
72 struct list_head list; 79 struct list_head list;
80
81 /* these are only used for DEVMAP_HASH type maps */
82 struct hlist_head *dev_index_head;
83 spinlock_t index_lock;
84 unsigned int items;
85 u32 n_buckets;
73}; 86};
74 87
75static DEFINE_SPINLOCK(dev_map_lock); 88static DEFINE_SPINLOCK(dev_map_lock);
76static LIST_HEAD(dev_map_list); 89static LIST_HEAD(dev_map_list);
77 90
91static struct hlist_head *dev_map_create_hash(unsigned int entries)
92{
93 int i;
94 struct hlist_head *hash;
95
96 hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
97 if (hash != NULL)
98 for (i = 0; i < entries; i++)
99 INIT_HLIST_HEAD(&hash[i]);
100
101 return hash;
102}
103
78static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) 104static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
79{ 105{
80 int err, cpu; 106 int err, cpu;
@@ -97,6 +123,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
97 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); 123 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
98 cost += sizeof(struct list_head) * num_possible_cpus(); 124 cost += sizeof(struct list_head) * num_possible_cpus();
99 125
126 if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
127 dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
128
129 if (!dtab->n_buckets) /* Overflow check */
130 return -EINVAL;
131 cost += sizeof(struct hlist_head) * dtab->n_buckets;
132 }
133
100 /* if map size is larger than memlock limit, reject it */ 134 /* if map size is larger than memlock limit, reject it */
101 err = bpf_map_charge_init(&dtab->map.memory, cost); 135 err = bpf_map_charge_init(&dtab->map.memory, cost);
102 if (err) 136 if (err)
@@ -115,8 +149,18 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
115 if (!dtab->netdev_map) 149 if (!dtab->netdev_map)
116 goto free_percpu; 150 goto free_percpu;
117 151
152 if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
153 dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
154 if (!dtab->dev_index_head)
155 goto free_map_area;
156
157 spin_lock_init(&dtab->index_lock);
158 }
159
118 return 0; 160 return 0;
119 161
162free_map_area:
163 bpf_map_area_free(dtab->netdev_map);
120free_percpu: 164free_percpu:
121 free_percpu(dtab->flush_list); 165 free_percpu(dtab->flush_list);
122free_charge: 166free_charge:
@@ -198,6 +242,7 @@ static void dev_map_free(struct bpf_map *map)
198 242
199 free_percpu(dtab->flush_list); 243 free_percpu(dtab->flush_list);
200 bpf_map_area_free(dtab->netdev_map); 244 bpf_map_area_free(dtab->netdev_map);
245 kfree(dtab->dev_index_head);
201 kfree(dtab); 246 kfree(dtab);
202} 247}
203 248
@@ -218,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
218 return 0; 263 return 0;
219} 264}
220 265
266static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
267 int idx)
268{
269 return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
270}
271
272struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
273{
274 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
275 struct hlist_head *head = dev_map_index_hash(dtab, key);
276 struct bpf_dtab_netdev *dev;
277
278 hlist_for_each_entry_rcu(dev, head, index_hlist)
279 if (dev->idx == key)
280 return dev;
281
282 return NULL;
283}
284
285static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
286 void *next_key)
287{
288 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
289 u32 idx, *next = next_key;
290 struct bpf_dtab_netdev *dev, *next_dev;
291 struct hlist_head *head;
292 int i = 0;
293
294 if (!key)
295 goto find_first;
296
297 idx = *(u32 *)key;
298
299 dev = __dev_map_hash_lookup_elem(map, idx);
300 if (!dev)
301 goto find_first;
302
303 next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
304 struct bpf_dtab_netdev, index_hlist);
305
306 if (next_dev) {
307 *next = next_dev->idx;
308 return 0;
309 }
310
311 i = idx & (dtab->n_buckets - 1);
312 i++;
313
314 find_first:
315 for (; i < dtab->n_buckets; i++) {
316 head = dev_map_index_hash(dtab, i);
317
318 next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
319 struct bpf_dtab_netdev,
320 index_hlist);
321 if (next_dev) {
322 *next = next_dev->idx;
323 return 0;
324 }
325 }
326
327 return -ENOENT;
328}
329
221static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, 330static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
222 bool in_napi_ctx) 331 bool in_napi_ctx)
223{ 332{
@@ -373,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
373 return dev ? &dev->ifindex : NULL; 482 return dev ? &dev->ifindex : NULL;
374} 483}
375 484
485static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
486{
487 struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
488 *(u32 *)key);
489 struct net_device *dev = obj ? obj->dev : NULL;
490
491 return dev ? &dev->ifindex : NULL;
492}
493
376static void dev_map_flush_old(struct bpf_dtab_netdev *dev) 494static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
377{ 495{
378 if (dev->dev->netdev_ops->ndo_xdp_xmit) { 496 if (dev->dev->netdev_ops->ndo_xdp_xmit) {
@@ -422,6 +540,28 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
422 return 0; 540 return 0;
423} 541}
424 542
543static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
544{
545 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
546 struct bpf_dtab_netdev *old_dev;
547 int k = *(u32 *)key;
548 unsigned long flags;
549 int ret = -ENOENT;
550
551 spin_lock_irqsave(&dtab->index_lock, flags);
552
553 old_dev = __dev_map_hash_lookup_elem(map, k);
554 if (old_dev) {
555 dtab->items--;
556 hlist_del_init_rcu(&old_dev->index_hlist);
557 call_rcu(&old_dev->rcu, __dev_map_entry_free);
558 ret = 0;
559 }
560 spin_unlock_irqrestore(&dtab->index_lock, flags);
561
562 return ret;
563}
564
425static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, 565static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
426 struct bpf_dtab *dtab, 566 struct bpf_dtab *dtab,
427 u32 ifindex, 567 u32 ifindex,
@@ -502,6 +642,56 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
502 map, key, value, map_flags); 642 map, key, value, map_flags);
503} 643}
504 644
645static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
646 void *key, void *value, u64 map_flags)
647{
648 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
649 struct bpf_dtab_netdev *dev, *old_dev;
650 u32 ifindex = *(u32 *)value;
651 u32 idx = *(u32 *)key;
652 unsigned long flags;
653
654 if (unlikely(map_flags > BPF_EXIST || !ifindex))
655 return -EINVAL;
656
657 old_dev = __dev_map_hash_lookup_elem(map, idx);
658 if (old_dev && (map_flags & BPF_NOEXIST))
659 return -EEXIST;
660
661 dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
662 if (IS_ERR(dev))
663 return PTR_ERR(dev);
664
665 spin_lock_irqsave(&dtab->index_lock, flags);
666
667 if (old_dev) {
668 hlist_del_rcu(&old_dev->index_hlist);
669 } else {
670 if (dtab->items >= dtab->map.max_entries) {
671 spin_unlock_irqrestore(&dtab->index_lock, flags);
672 call_rcu(&dev->rcu, __dev_map_entry_free);
673 return -E2BIG;
674 }
675 dtab->items++;
676 }
677
678 hlist_add_head_rcu(&dev->index_hlist,
679 dev_map_index_hash(dtab, idx));
680 spin_unlock_irqrestore(&dtab->index_lock, flags);
681
682 if (old_dev)
683 call_rcu(&old_dev->rcu, __dev_map_entry_free);
684
685 return 0;
686}
687
688static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
689 u64 map_flags)
690{
691 return __dev_map_hash_update_elem(current->nsproxy->net_ns,
692 map, key, value, map_flags);
693}
694
505const struct bpf_map_ops dev_map_ops = { 695const struct bpf_map_ops dev_map_ops = {
506 .map_alloc = dev_map_alloc, 696 .map_alloc = dev_map_alloc,
507 .map_free = dev_map_free, 697 .map_free = dev_map_free,
@@ -512,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = {
512 .map_check_btf = map_check_no_btf, 702 .map_check_btf = map_check_no_btf,
513}; 703};
514 704
705const struct bpf_map_ops dev_map_hash_ops = {
706 .map_alloc = dev_map_alloc,
707 .map_free = dev_map_free,
708 .map_get_next_key = dev_map_hash_get_next_key,
709 .map_lookup_elem = dev_map_hash_lookup_elem,
710 .map_update_elem = dev_map_hash_update_elem,
711 .map_delete_elem = dev_map_hash_delete_elem,
712 .map_check_btf = map_check_no_btf,
713};
714
515static int dev_map_notification(struct notifier_block *notifier, 715static int dev_map_notification(struct notifier_block *notifier,
516 ulong event, void *ptr) 716 ulong event, void *ptr)
517{ 717{