diff options
author | Toke Høiland-Jørgensen <toke@redhat.com> | 2019-07-26 12:06:53 -0400 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2019-07-29 16:50:48 -0400 |
commit | fca16e51078e8e5c0af839426b3d2dcd2bede135 (patch) | |
tree | 8bf50cb542d34a730874a6bbd4230f5389366b37 /kernel/bpf/devmap.c | |
parent | 6dbff13ca8a2ad2fddd904c2e789dd5e59a8644c (diff) |
xdp: Refactor devmap allocation code for reuse
The subsequent patch to add a new devmap sub-type can re-use much of the
initialisation and allocation code, so refactor it into separate functions.
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r-- | kernel/bpf/devmap.c | 136 |
1 files changed, 83 insertions, 53 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index d83cf8ccc872..a0501266bdb8 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -60,9 +60,9 @@ struct xdp_bulk_queue { | |||
60 | struct bpf_dtab_netdev { | 60 | struct bpf_dtab_netdev { |
61 | struct net_device *dev; /* must be first member, due to tracepoint */ | 61 | struct net_device *dev; /* must be first member, due to tracepoint */ |
62 | struct bpf_dtab *dtab; | 62 | struct bpf_dtab *dtab; |
63 | unsigned int bit; | ||
64 | struct xdp_bulk_queue __percpu *bulkq; | 63 | struct xdp_bulk_queue __percpu *bulkq; |
65 | struct rcu_head rcu; | 64 | struct rcu_head rcu; |
65 | unsigned int idx; /* keep track of map index for tracepoint */ | ||
66 | }; | 66 | }; |
67 | 67 | ||
68 | struct bpf_dtab { | 68 | struct bpf_dtab { |
@@ -75,28 +75,21 @@ struct bpf_dtab { | |||
75 | static DEFINE_SPINLOCK(dev_map_lock); | 75 | static DEFINE_SPINLOCK(dev_map_lock); |
76 | static LIST_HEAD(dev_map_list); | 76 | static LIST_HEAD(dev_map_list); |
77 | 77 | ||
78 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 78 | static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) |
79 | { | 79 | { |
80 | struct bpf_dtab *dtab; | ||
81 | int err, cpu; | 80 | int err, cpu; |
82 | u64 cost; | 81 | u64 cost; |
83 | 82 | ||
84 | if (!capable(CAP_NET_ADMIN)) | ||
85 | return ERR_PTR(-EPERM); | ||
86 | |||
87 | /* check sanity of attributes */ | 83 | /* check sanity of attributes */ |
88 | if (attr->max_entries == 0 || attr->key_size != 4 || | 84 | if (attr->max_entries == 0 || attr->key_size != 4 || |
89 | attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) | 85 | attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) |
90 | return ERR_PTR(-EINVAL); | 86 | return -EINVAL; |
91 | 87 | ||
92 | /* Lookup returns a pointer straight to dev->ifindex, so make sure the | 88 | /* Lookup returns a pointer straight to dev->ifindex, so make sure the |
93 | * verifier prevents writes from the BPF side | 89 | * verifier prevents writes from the BPF side |
94 | */ | 90 | */ |
95 | attr->map_flags |= BPF_F_RDONLY_PROG; | 91 | attr->map_flags |= BPF_F_RDONLY_PROG; |
96 | 92 | ||
97 | dtab = kzalloc(sizeof(*dtab), GFP_USER); | ||
98 | if (!dtab) | ||
99 | return ERR_PTR(-ENOMEM); | ||
100 | 93 | ||
101 | bpf_map_init_from_attr(&dtab->map, attr); | 94 | bpf_map_init_from_attr(&dtab->map, attr); |
102 | 95 | ||
@@ -107,9 +100,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
107 | /* if map size is larger than memlock limit, reject it */ | 100 | /* if map size is larger than memlock limit, reject it */ |
108 | err = bpf_map_charge_init(&dtab->map.memory, cost); | 101 | err = bpf_map_charge_init(&dtab->map.memory, cost); |
109 | if (err) | 102 | if (err) |
110 | goto free_dtab; | 103 | return -EINVAL; |
111 | |||
112 | err = -ENOMEM; | ||
113 | 104 | ||
114 | dtab->flush_list = alloc_percpu(struct list_head); | 105 | dtab->flush_list = alloc_percpu(struct list_head); |
115 | if (!dtab->flush_list) | 106 | if (!dtab->flush_list) |
@@ -124,19 +115,38 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
124 | if (!dtab->netdev_map) | 115 | if (!dtab->netdev_map) |
125 | goto free_percpu; | 116 | goto free_percpu; |
126 | 117 | ||
127 | spin_lock(&dev_map_lock); | 118 | return 0; |
128 | list_add_tail_rcu(&dtab->list, &dev_map_list); | ||
129 | spin_unlock(&dev_map_lock); | ||
130 | |||
131 | return &dtab->map; | ||
132 | 119 | ||
133 | free_percpu: | 120 | free_percpu: |
134 | free_percpu(dtab->flush_list); | 121 | free_percpu(dtab->flush_list); |
135 | free_charge: | 122 | free_charge: |
136 | bpf_map_charge_finish(&dtab->map.memory); | 123 | bpf_map_charge_finish(&dtab->map.memory); |
137 | free_dtab: | 124 | return -ENOMEM; |
138 | kfree(dtab); | 125 | } |
139 | return ERR_PTR(err); | 126 | |
127 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | ||
128 | { | ||
129 | struct bpf_dtab *dtab; | ||
130 | int err; | ||
131 | |||
132 | if (!capable(CAP_NET_ADMIN)) | ||
133 | return ERR_PTR(-EPERM); | ||
134 | |||
135 | dtab = kzalloc(sizeof(*dtab), GFP_USER); | ||
136 | if (!dtab) | ||
137 | return ERR_PTR(-ENOMEM); | ||
138 | |||
139 | err = dev_map_init_map(dtab, attr); | ||
140 | if (err) { | ||
141 | kfree(dtab); | ||
142 | return ERR_PTR(err); | ||
143 | } | ||
144 | |||
145 | spin_lock(&dev_map_lock); | ||
146 | list_add_tail_rcu(&dtab->list, &dev_map_list); | ||
147 | spin_unlock(&dev_map_lock); | ||
148 | |||
149 | return &dtab->map; | ||
140 | } | 150 | } |
141 | 151 | ||
142 | static void dev_map_free(struct bpf_map *map) | 152 | static void dev_map_free(struct bpf_map *map) |
@@ -235,7 +245,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, | |||
235 | out: | 245 | out: |
236 | bq->count = 0; | 246 | bq->count = 0; |
237 | 247 | ||
238 | trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit, | 248 | trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx, |
239 | sent, drops, bq->dev_rx, dev, err); | 249 | sent, drops, bq->dev_rx, dev, err); |
240 | bq->dev_rx = NULL; | 250 | bq->dev_rx = NULL; |
241 | __list_del_clearprev(&bq->flush_node); | 251 | __list_del_clearprev(&bq->flush_node); |
@@ -412,17 +422,52 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) | |||
412 | return 0; | 422 | return 0; |
413 | } | 423 | } |
414 | 424 | ||
415 | static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | 425 | static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, |
416 | u64 map_flags) | 426 | struct bpf_dtab *dtab, |
427 | u32 ifindex, | ||
428 | unsigned int idx) | ||
417 | { | 429 | { |
418 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | ||
419 | struct net *net = current->nsproxy->net_ns; | ||
420 | gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; | 430 | gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; |
431 | struct bpf_dtab_netdev *dev; | ||
432 | struct xdp_bulk_queue *bq; | ||
433 | int cpu; | ||
434 | |||
435 | dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); | ||
436 | if (!dev) | ||
437 | return ERR_PTR(-ENOMEM); | ||
438 | |||
439 | dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), | ||
440 | sizeof(void *), gfp); | ||
441 | if (!dev->bulkq) { | ||
442 | kfree(dev); | ||
443 | return ERR_PTR(-ENOMEM); | ||
444 | } | ||
445 | |||
446 | for_each_possible_cpu(cpu) { | ||
447 | bq = per_cpu_ptr(dev->bulkq, cpu); | ||
448 | bq->obj = dev; | ||
449 | } | ||
450 | |||
451 | dev->dev = dev_get_by_index(net, ifindex); | ||
452 | if (!dev->dev) { | ||
453 | free_percpu(dev->bulkq); | ||
454 | kfree(dev); | ||
455 | return ERR_PTR(-EINVAL); | ||
456 | } | ||
457 | |||
458 | dev->idx = idx; | ||
459 | dev->dtab = dtab; | ||
460 | |||
461 | return dev; | ||
462 | } | ||
463 | |||
464 | static int __dev_map_update_elem(struct net *net, struct bpf_map *map, | ||
465 | void *key, void *value, u64 map_flags) | ||
466 | { | ||
467 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | ||
421 | struct bpf_dtab_netdev *dev, *old_dev; | 468 | struct bpf_dtab_netdev *dev, *old_dev; |
422 | u32 ifindex = *(u32 *)value; | 469 | u32 ifindex = *(u32 *)value; |
423 | struct xdp_bulk_queue *bq; | ||
424 | u32 i = *(u32 *)key; | 470 | u32 i = *(u32 *)key; |
425 | int cpu; | ||
426 | 471 | ||
427 | if (unlikely(map_flags > BPF_EXIST)) | 472 | if (unlikely(map_flags > BPF_EXIST)) |
428 | return -EINVAL; | 473 | return -EINVAL; |
@@ -434,31 +479,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
434 | if (!ifindex) { | 479 | if (!ifindex) { |
435 | dev = NULL; | 480 | dev = NULL; |
436 | } else { | 481 | } else { |
437 | dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node); | 482 | dev = __dev_map_alloc_node(net, dtab, ifindex, i); |
438 | if (!dev) | 483 | if (IS_ERR(dev)) |
439 | return -ENOMEM; | 484 | return PTR_ERR(dev); |
440 | |||
441 | dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), | ||
442 | sizeof(void *), gfp); | ||
443 | if (!dev->bulkq) { | ||
444 | kfree(dev); | ||
445 | return -ENOMEM; | ||
446 | } | ||
447 | |||
448 | for_each_possible_cpu(cpu) { | ||
449 | bq = per_cpu_ptr(dev->bulkq, cpu); | ||
450 | bq->obj = dev; | ||
451 | } | ||
452 | |||
453 | dev->dev = dev_get_by_index(net, ifindex); | ||
454 | if (!dev->dev) { | ||
455 | free_percpu(dev->bulkq); | ||
456 | kfree(dev); | ||
457 | return -EINVAL; | ||
458 | } | ||
459 | |||
460 | dev->bit = i; | ||
461 | dev->dtab = dtab; | ||
462 | } | 485 | } |
463 | 486 | ||
464 | /* Use call_rcu() here to ensure rcu critical sections have completed | 487 | /* Use call_rcu() here to ensure rcu critical sections have completed |
@@ -472,6 +495,13 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
472 | return 0; | 495 | return 0; |
473 | } | 496 | } |
474 | 497 | ||
498 | static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
499 | u64 map_flags) | ||
500 | { | ||
501 | return __dev_map_update_elem(current->nsproxy->net_ns, | ||
502 | map, key, value, map_flags); | ||
503 | } | ||
504 | |||
475 | const struct bpf_map_ops dev_map_ops = { | 505 | const struct bpf_map_ops dev_map_ops = { |
476 | .map_alloc = dev_map_alloc, | 506 | .map_alloc = dev_map_alloc, |
477 | .map_free = dev_map_free, | 507 | .map_free = dev_map_free, |