diff options
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r-- | kernel/bpf/devmap.c | 74 |
1 files changed, 70 insertions, 4 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 06c400e7e4ff..15293b9dfb77 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -55,10 +55,17 @@ | |||
55 | #define DEV_CREATE_FLAG_MASK \ | 55 | #define DEV_CREATE_FLAG_MASK \ |
56 | (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) | 56 | (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) |
57 | 57 | ||
58 | #define DEV_MAP_BULK_SIZE 16 | ||
59 | struct xdp_bulk_queue { | ||
60 | struct xdp_frame *q[DEV_MAP_BULK_SIZE]; | ||
61 | unsigned int count; | ||
62 | }; | ||
63 | |||
58 | struct bpf_dtab_netdev { | 64 | struct bpf_dtab_netdev { |
59 | struct net_device *dev; /* must be first member, due to tracepoint */ | 65 | struct net_device *dev; /* must be first member, due to tracepoint */ |
60 | struct bpf_dtab *dtab; | 66 | struct bpf_dtab *dtab; |
61 | unsigned int bit; | 67 | unsigned int bit; |
68 | struct xdp_bulk_queue __percpu *bulkq; | ||
62 | struct rcu_head rcu; | 69 | struct rcu_head rcu; |
63 | }; | 70 | }; |
64 | 71 | ||
@@ -208,6 +215,34 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) | |||
208 | __set_bit(bit, bitmap); | 215 | __set_bit(bit, bitmap); |
209 | } | 216 | } |
210 | 217 | ||
218 | static int bq_xmit_all(struct bpf_dtab_netdev *obj, | ||
219 | struct xdp_bulk_queue *bq) | ||
220 | { | ||
221 | struct net_device *dev = obj->dev; | ||
222 | int i; | ||
223 | |||
224 | if (unlikely(!bq->count)) | ||
225 | return 0; | ||
226 | |||
227 | for (i = 0; i < bq->count; i++) { | ||
228 | struct xdp_frame *xdpf = bq->q[i]; | ||
229 | |||
230 | prefetch(xdpf); | ||
231 | } | ||
232 | |||
233 | for (i = 0; i < bq->count; i++) { | ||
234 | struct xdp_frame *xdpf = bq->q[i]; | ||
235 | int err; | ||
236 | |||
237 | err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); | ||
238 | if (err) | ||
239 | xdp_return_frame(xdpf); | ||
240 | } | ||
241 | bq->count = 0; | ||
242 | |||
243 | return 0; | ||
244 | } | ||
245 | |||
211 | /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled | 246 | /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled |
212 | * from the driver before returning from its napi->poll() routine. The poll() | 247 | * from the driver before returning from its napi->poll() routine. The poll() |
213 | * routine is called either from busy_poll context or net_rx_action signaled | 248 | * routine is called either from busy_poll context or net_rx_action signaled |
@@ -223,6 +258,7 @@ void __dev_map_flush(struct bpf_map *map) | |||
223 | 258 | ||
224 | for_each_set_bit(bit, bitmap, map->max_entries) { | 259 | for_each_set_bit(bit, bitmap, map->max_entries) { |
225 | struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); | 260 | struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); |
261 | struct xdp_bulk_queue *bq; | ||
226 | struct net_device *netdev; | 262 | struct net_device *netdev; |
227 | 263 | ||
228 | /* This is possible if the dev entry is removed by user space | 264 | /* This is possible if the dev entry is removed by user space |
@@ -232,6 +268,9 @@ void __dev_map_flush(struct bpf_map *map) | |||
232 | continue; | 268 | continue; |
233 | 269 | ||
234 | __clear_bit(bit, bitmap); | 270 | __clear_bit(bit, bitmap); |
271 | |||
272 | bq = this_cpu_ptr(dev->bulkq); | ||
273 | bq_xmit_all(dev, bq); | ||
235 | netdev = dev->dev; | 274 | netdev = dev->dev; |
236 | if (likely(netdev->netdev_ops->ndo_xdp_flush)) | 275 | if (likely(netdev->netdev_ops->ndo_xdp_flush)) |
237 | netdev->netdev_ops->ndo_xdp_flush(netdev); | 276 | netdev->netdev_ops->ndo_xdp_flush(netdev); |
@@ -254,6 +293,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) | |||
254 | return obj; | 293 | return obj; |
255 | } | 294 | } |
256 | 295 | ||
296 | /* Runs under RCU-read-side, plus in softirq under NAPI protection. | ||
297 | * Thus, safe percpu variable access. | ||
298 | */ | ||
299 | static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) | ||
300 | { | ||
301 | struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); | ||
302 | |||
303 | if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) | ||
304 | bq_xmit_all(obj, bq); | ||
305 | |||
306 | bq->q[bq->count++] = xdpf; | ||
307 | return 0; | ||
308 | } | ||
309 | |||
257 | int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) | 310 | int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) |
258 | { | 311 | { |
259 | struct net_device *dev = dst->dev; | 312 | struct net_device *dev = dst->dev; |
@@ -266,8 +319,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) | |||
266 | if (unlikely(!xdpf)) | 319 | if (unlikely(!xdpf)) |
267 | return -EOVERFLOW; | 320 | return -EOVERFLOW; |
268 | 321 | ||
269 | /* TODO: implement a bulking/enqueue step later */ | 322 | return bq_enqueue(dst, xdpf); |
270 | return dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); | ||
271 | } | 323 | } |
272 | 324 | ||
273 | static void *dev_map_lookup_elem(struct bpf_map *map, void *key) | 325 | static void *dev_map_lookup_elem(struct bpf_map *map, void *key) |
@@ -282,13 +334,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) | |||
282 | { | 334 | { |
283 | if (dev->dev->netdev_ops->ndo_xdp_flush) { | 335 | if (dev->dev->netdev_ops->ndo_xdp_flush) { |
284 | struct net_device *fl = dev->dev; | 336 | struct net_device *fl = dev->dev; |
337 | struct xdp_bulk_queue *bq; | ||
285 | unsigned long *bitmap; | 338 | unsigned long *bitmap; |
339 | |||
286 | int cpu; | 340 | int cpu; |
287 | 341 | ||
288 | for_each_online_cpu(cpu) { | 342 | for_each_online_cpu(cpu) { |
289 | bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); | 343 | bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); |
290 | __clear_bit(dev->bit, bitmap); | 344 | __clear_bit(dev->bit, bitmap); |
291 | 345 | ||
346 | bq = per_cpu_ptr(dev->bulkq, cpu); | ||
347 | bq_xmit_all(dev, bq); | ||
348 | |||
292 | fl->netdev_ops->ndo_xdp_flush(dev->dev); | 349 | fl->netdev_ops->ndo_xdp_flush(dev->dev); |
293 | } | 350 | } |
294 | } | 351 | } |
@@ -300,6 +357,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu) | |||
300 | 357 | ||
301 | dev = container_of(rcu, struct bpf_dtab_netdev, rcu); | 358 | dev = container_of(rcu, struct bpf_dtab_netdev, rcu); |
302 | dev_map_flush_old(dev); | 359 | dev_map_flush_old(dev); |
360 | free_percpu(dev->bulkq); | ||
303 | dev_put(dev->dev); | 361 | dev_put(dev->dev); |
304 | kfree(dev); | 362 | kfree(dev); |
305 | } | 363 | } |
@@ -332,6 +390,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
332 | { | 390 | { |
333 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | 391 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); |
334 | struct net *net = current->nsproxy->net_ns; | 392 | struct net *net = current->nsproxy->net_ns; |
393 | gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; | ||
335 | struct bpf_dtab_netdev *dev, *old_dev; | 394 | struct bpf_dtab_netdev *dev, *old_dev; |
336 | u32 i = *(u32 *)key; | 395 | u32 i = *(u32 *)key; |
337 | u32 ifindex = *(u32 *)value; | 396 | u32 ifindex = *(u32 *)value; |
@@ -346,13 +405,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
346 | if (!ifindex) { | 405 | if (!ifindex) { |
347 | dev = NULL; | 406 | dev = NULL; |
348 | } else { | 407 | } else { |
349 | dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, | 408 | dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node); |
350 | map->numa_node); | ||
351 | if (!dev) | 409 | if (!dev) |
352 | return -ENOMEM; | 410 | return -ENOMEM; |
353 | 411 | ||
412 | dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), | ||
413 | sizeof(void *), gfp); | ||
414 | if (!dev->bulkq) { | ||
415 | kfree(dev); | ||
416 | return -ENOMEM; | ||
417 | } | ||
418 | |||
354 | dev->dev = dev_get_by_index(net, ifindex); | 419 | dev->dev = dev_get_by_index(net, ifindex); |
355 | if (!dev->dev) { | 420 | if (!dev->dev) { |
421 | free_percpu(dev->bulkq); | ||
356 | kfree(dev); | 422 | kfree(dev); |
357 | return -EINVAL; | 423 | return -EINVAL; |
358 | } | 424 | } |