aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-08-10 10:12:22 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-08-10 10:12:23 -0400
commit60afdf066a35317efd5d1d7ae7c7f4ef2b32601f (patch)
treee3ab1b8331d9f20766f6747eac42928f6b13d911
parentc4c20217542469b9caf7f700ac9a2eeb32cb3742 (diff)
parent638264dc90227cca00d20c26680171addce18e51 (diff)
Merge branch 'bpf-veth-xdp-support'
Toshiaki Makita says: ==================== This patch set introduces driver XDP for veth. Basically this is used in conjunction with redirect action of another XDP program. NIC -----------> veth===veth (XDP) (redirect) (XDP) In this case xdp_frame can be forwarded to the peer veth without modification, so we can expect far better performance than generic XDP. Envisioned use-cases -------------------- * Container managed XDP program Container host redirects frames to containers by XDP redirect action, and privileged containers can deploy their own XDP programs. * XDP program cascading Two or more XDP programs can be called for each packet by redirecting xdp frames to veth. * Internal interface for an XDP bridge When using XDP redirection to create a virtual bridge, veth can be used to create an internal interface for the bridge. Implementation -------------- This changeset is making use of NAPI to implement ndo_xdp_xmit and XDP_TX/REDIRECT. This is mainly because XDP heavily relies on NAPI context. - patch 1: Export a function needed for veth XDP. - patch 2-3: Basic implementation of veth XDP. - patch 4-6: Add ndo_xdp_xmit. - patch 7-9: Add XDP_TX and XDP_REDIRECT. - patch 10: Performance optimization for multi-queue env. Tests and performance numbers ----------------------------- Tested with a simple XDP program which only redirects packets between NIC and veth. I used i40e 25G NIC (XXV710) for the physical NIC. The server has 20 of Xeon Silver 2.20 GHz cores. pktgen --(wire)--> XXV710 (i40e) <--(XDP redirect)--> veth===veth (XDP) The rightmost veth loads XDP progs and just does DROP or TX. The number of packets is measured in the XDP progs. The leftmost pktgen sends packets at 37.1 Mpps (almost 25G wire speed). veth XDP action Flows Mpps ================================ DROP 1 10.6 DROP 2 21.2 DROP 100 36.0 TX 1 5.0 TX 2 10.0 TX 100 31.0 I also measured netperf TCP_STREAM but was not so great performance due to lack of tx/rx checksum offload and TSO, etc. netperf <--(wire)--> XXV710 (i40e) <--(XDP redirect)--> veth===veth (XDP PASS) Direction Flows Gbps ============================== external->veth 1 20.8 external->veth 2 23.5 external->veth 100 23.6 veth->external 1 9.0 veth->external 2 17.8 veth->external 100 22.9 Also tested doing ifup/down or load/unload a XDP program repeatedly during processing XDP packets in order to check if enabling/disabling NAPI is working as expected, and found no problems. v8: - Don't use xdp_frame pointer address to calculate skb->head, headroom, and xdp_buff.data_hard_start. v7: - Introduce xdp_scrub_frame() to clear kernel pointers in xdp_frame and use it instead of memset(). v6: - Check skb->len only if reallocation is needed. - Add __GFP_NOWARN to alloc_page() since it can be triggered by external events. - Fix sparse warning around EXPORT_SYMBOL. v5: - Fix broken SOBs. v4: - Don't adjust MTU automatically. - Skip peer IFF_UP check on .ndo_xdp_xmit() because it is unnecessary. Add comments to explain that. - Use redirect_info instead of xdp_mem_info for storing no_direct flag to avoid per packet copy cost. v3: - Drop skb bulk xmit patch since it makes little performance difference. The hotspot in TCP skb xmit at this point is checksum computation in skb_segment and packet copy on XDP_REDIRECT due to cloned/nonlinear skb. - Fix race on closing device. - Add extack messages in ndo_bpf. v2: - Squash NAPI patch with "Add driver XDP" patch. - Remove conversion from xdp_frame to skb when NAPI is not enabled. - Introduce per-queue XDP ring (patch 8). - Introduce bulk skb xmit when XDP is enabled on the peer (patch 9). ==================== Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/veth.c750
-rw-r--r--include/linux/filter.h35
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--include/net/xdp.h7
-rw-r--r--net/core/filter.c29
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/core/xdp.c6
7 files changed, 801 insertions, 30 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index a69ad39ee57e..e3202af72df5 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -17,22 +17,47 @@
17#include <net/rtnetlink.h> 17#include <net/rtnetlink.h>
18#include <net/dst.h> 18#include <net/dst.h>
19#include <net/xfrm.h> 19#include <net/xfrm.h>
20#include <net/xdp.h>
20#include <linux/veth.h> 21#include <linux/veth.h>
21#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/bpf.h>
24#include <linux/filter.h>
25#include <linux/ptr_ring.h>
26#include <linux/bpf_trace.h>
22 27
23#define DRV_NAME "veth" 28#define DRV_NAME "veth"
24#define DRV_VERSION "1.0" 29#define DRV_VERSION "1.0"
25 30
31#define VETH_XDP_FLAG BIT(0)
32#define VETH_RING_SIZE 256
33#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
34
35/* Separating two types of XDP xmit */
36#define VETH_XDP_TX BIT(0)
37#define VETH_XDP_REDIR BIT(1)
38
26struct pcpu_vstats { 39struct pcpu_vstats {
27 u64 packets; 40 u64 packets;
28 u64 bytes; 41 u64 bytes;
29 struct u64_stats_sync syncp; 42 struct u64_stats_sync syncp;
30}; 43};
31 44
45struct veth_rq {
46 struct napi_struct xdp_napi;
47 struct net_device *dev;
48 struct bpf_prog __rcu *xdp_prog;
49 struct xdp_mem_info xdp_mem;
50 bool rx_notify_masked;
51 struct ptr_ring xdp_ring;
52 struct xdp_rxq_info xdp_rxq;
53};
54
32struct veth_priv { 55struct veth_priv {
33 struct net_device __rcu *peer; 56 struct net_device __rcu *peer;
34 atomic64_t dropped; 57 atomic64_t dropped;
35 unsigned requested_headroom; 58 struct bpf_prog *_xdp_prog;
59 struct veth_rq *rq;
60 unsigned int requested_headroom;
36}; 61};
37 62
38/* 63/*
@@ -98,11 +123,67 @@ static const struct ethtool_ops veth_ethtool_ops = {
98 .get_link_ksettings = veth_get_link_ksettings, 123 .get_link_ksettings = veth_get_link_ksettings,
99}; 124};
100 125
126/* general routines */
127
128static bool veth_is_xdp_frame(void *ptr)
129{
130 return (unsigned long)ptr & VETH_XDP_FLAG;
131}
132
133static void *veth_ptr_to_xdp(void *ptr)
134{
135 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
136}
137
138static void *veth_xdp_to_ptr(void *ptr)
139{
140 return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
141}
142
143static void veth_ptr_free(void *ptr)
144{
145 if (veth_is_xdp_frame(ptr))
146 xdp_return_frame(veth_ptr_to_xdp(ptr));
147 else
148 kfree_skb(ptr);
149}
150
151static void __veth_xdp_flush(struct veth_rq *rq)
152{
153 /* Write ptr_ring before reading rx_notify_masked */
154 smp_mb();
155 if (!rq->rx_notify_masked) {
156 rq->rx_notify_masked = true;
157 napi_schedule(&rq->xdp_napi);
158 }
159}
160
161static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
162{
163 if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
164 dev_kfree_skb_any(skb);
165 return NET_RX_DROP;
166 }
167
168 return NET_RX_SUCCESS;
169}
170
171static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
172 struct veth_rq *rq, bool xdp)
173{
174 return __dev_forward_skb(dev, skb) ?: xdp ?
175 veth_xdp_rx(rq, skb) :
176 netif_rx(skb);
177}
178
101static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 179static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
102{ 180{
103 struct veth_priv *priv = netdev_priv(dev); 181 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
182 struct veth_rq *rq = NULL;
104 struct net_device *rcv; 183 struct net_device *rcv;
105 int length = skb->len; 184 int length = skb->len;
185 bool rcv_xdp = false;
186 int rxq;
106 187
107 rcu_read_lock(); 188 rcu_read_lock();
108 rcv = rcu_dereference(priv->peer); 189 rcv = rcu_dereference(priv->peer);
@@ -111,7 +192,16 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
111 goto drop; 192 goto drop;
112 } 193 }
113 194
114 if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { 195 rcv_priv = netdev_priv(rcv);
196 rxq = skb_get_queue_mapping(skb);
197 if (rxq < rcv->real_num_rx_queues) {
198 rq = &rcv_priv->rq[rxq];
199 rcv_xdp = rcu_access_pointer(rq->xdp_prog);
200 if (rcv_xdp)
201 skb_record_rx_queue(skb, rxq);
202 }
203
204 if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
115 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); 205 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
116 206
117 u64_stats_update_begin(&stats->syncp); 207 u64_stats_update_begin(&stats->syncp);
@@ -122,14 +212,15 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
122drop: 212drop:
123 atomic64_inc(&priv->dropped); 213 atomic64_inc(&priv->dropped);
124 } 214 }
215
216 if (rcv_xdp)
217 __veth_xdp_flush(rq);
218
125 rcu_read_unlock(); 219 rcu_read_unlock();
220
126 return NETDEV_TX_OK; 221 return NETDEV_TX_OK;
127} 222}
128 223
129/*
130 * general routines
131 */
132
133static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) 224static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
134{ 225{
135 struct veth_priv *priv = netdev_priv(dev); 226 struct veth_priv *priv = netdev_priv(dev);
@@ -179,18 +270,502 @@ static void veth_set_multicast_list(struct net_device *dev)
179{ 270{
180} 271}
181 272
273static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
274 int buflen)
275{
276 struct sk_buff *skb;
277
278 if (!buflen) {
279 buflen = SKB_DATA_ALIGN(headroom + len) +
280 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
281 }
282 skb = build_skb(head, buflen);
283 if (!skb)
284 return NULL;
285
286 skb_reserve(skb, headroom);
287 skb_put(skb, len);
288
289 return skb;
290}
291
292static int veth_select_rxq(struct net_device *dev)
293{
294 return smp_processor_id() % dev->real_num_rx_queues;
295}
296
297static int veth_xdp_xmit(struct net_device *dev, int n,
298 struct xdp_frame **frames, u32 flags)
299{
300 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
301 struct net_device *rcv;
302 unsigned int max_len;
303 struct veth_rq *rq;
304 int i, drops = 0;
305
306 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
307 return -EINVAL;
308
309 rcv = rcu_dereference(priv->peer);
310 if (unlikely(!rcv))
311 return -ENXIO;
312
313 rcv_priv = netdev_priv(rcv);
314 rq = &rcv_priv->rq[veth_select_rxq(rcv)];
315 /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive
316 * side. This means an XDP program is loaded on the peer and the peer
317 * device is up.
318 */
319 if (!rcu_access_pointer(rq->xdp_prog))
320 return -ENXIO;
321
322 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
323
324 spin_lock(&rq->xdp_ring.producer_lock);
325 for (i = 0; i < n; i++) {
326 struct xdp_frame *frame = frames[i];
327 void *ptr = veth_xdp_to_ptr(frame);
328
329 if (unlikely(frame->len > max_len ||
330 __ptr_ring_produce(&rq->xdp_ring, ptr))) {
331 xdp_return_frame_rx_napi(frame);
332 drops++;
333 }
334 }
335 spin_unlock(&rq->xdp_ring.producer_lock);
336
337 if (flags & XDP_XMIT_FLUSH)
338 __veth_xdp_flush(rq);
339
340 return n - drops;
341}
342
343static void veth_xdp_flush(struct net_device *dev)
344{
345 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
346 struct net_device *rcv;
347 struct veth_rq *rq;
348
349 rcu_read_lock();
350 rcv = rcu_dereference(priv->peer);
351 if (unlikely(!rcv))
352 goto out;
353
354 rcv_priv = netdev_priv(rcv);
355 rq = &rcv_priv->rq[veth_select_rxq(rcv)];
356 /* xdp_ring is initialized on receive side? */
357 if (unlikely(!rcu_access_pointer(rq->xdp_prog)))
358 goto out;
359
360 __veth_xdp_flush(rq);
361out:
362 rcu_read_unlock();
363}
364
365static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
366{
367 struct xdp_frame *frame = convert_to_xdp_frame(xdp);
368
369 if (unlikely(!frame))
370 return -EOVERFLOW;
371
372 return veth_xdp_xmit(dev, 1, &frame, 0);
373}
374
375static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
376 struct xdp_frame *frame,
377 unsigned int *xdp_xmit)
378{
379 void *hard_start = frame->data - frame->headroom;
380 void *head = hard_start - sizeof(struct xdp_frame);
381 int len = frame->len, delta = 0;
382 struct xdp_frame orig_frame;
383 struct bpf_prog *xdp_prog;
384 unsigned int headroom;
385 struct sk_buff *skb;
386
387 rcu_read_lock();
388 xdp_prog = rcu_dereference(rq->xdp_prog);
389 if (likely(xdp_prog)) {
390 struct xdp_buff xdp;
391 u32 act;
392
393 xdp.data_hard_start = hard_start;
394 xdp.data = frame->data;
395 xdp.data_end = frame->data + frame->len;
396 xdp.data_meta = frame->data - frame->metasize;
397 xdp.rxq = &rq->xdp_rxq;
398
399 act = bpf_prog_run_xdp(xdp_prog, &xdp);
400
401 switch (act) {
402 case XDP_PASS:
403 delta = frame->data - xdp.data;
404 len = xdp.data_end - xdp.data;
405 break;
406 case XDP_TX:
407 orig_frame = *frame;
408 xdp.data_hard_start = head;
409 xdp.rxq->mem = frame->mem;
410 if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
411 trace_xdp_exception(rq->dev, xdp_prog, act);
412 frame = &orig_frame;
413 goto err_xdp;
414 }
415 *xdp_xmit |= VETH_XDP_TX;
416 rcu_read_unlock();
417 goto xdp_xmit;
418 case XDP_REDIRECT:
419 orig_frame = *frame;
420 xdp.data_hard_start = head;
421 xdp.rxq->mem = frame->mem;
422 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
423 frame = &orig_frame;
424 goto err_xdp;
425 }
426 *xdp_xmit |= VETH_XDP_REDIR;
427 rcu_read_unlock();
428 goto xdp_xmit;
429 default:
430 bpf_warn_invalid_xdp_action(act);
431 case XDP_ABORTED:
432 trace_xdp_exception(rq->dev, xdp_prog, act);
433 case XDP_DROP:
434 goto err_xdp;
435 }
436 }
437 rcu_read_unlock();
438
439 headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
440 skb = veth_build_skb(head, headroom, len, 0);
441 if (!skb) {
442 xdp_return_frame(frame);
443 goto err;
444 }
445
446 xdp_scrub_frame(frame);
447 skb->protocol = eth_type_trans(skb, rq->dev);
448err:
449 return skb;
450err_xdp:
451 rcu_read_unlock();
452 xdp_return_frame(frame);
453xdp_xmit:
454 return NULL;
455}
456
457static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
458 unsigned int *xdp_xmit)
459{
460 u32 pktlen, headroom, act, metalen;
461 void *orig_data, *orig_data_end;
462 struct bpf_prog *xdp_prog;
463 int mac_len, delta, off;
464 struct xdp_buff xdp;
465
466 rcu_read_lock();
467 xdp_prog = rcu_dereference(rq->xdp_prog);
468 if (unlikely(!xdp_prog)) {
469 rcu_read_unlock();
470 goto out;
471 }
472
473 mac_len = skb->data - skb_mac_header(skb);
474 pktlen = skb->len + mac_len;
475 headroom = skb_headroom(skb) - mac_len;
476
477 if (skb_shared(skb) || skb_head_is_locked(skb) ||
478 skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
479 struct sk_buff *nskb;
480 int size, head_off;
481 void *head, *start;
482 struct page *page;
483
484 size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
485 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
486 if (size > PAGE_SIZE)
487 goto drop;
488
489 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
490 if (!page)
491 goto drop;
492
493 head = page_address(page);
494 start = head + VETH_XDP_HEADROOM;
495 if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
496 page_frag_free(head);
497 goto drop;
498 }
499
500 nskb = veth_build_skb(head,
501 VETH_XDP_HEADROOM + mac_len, skb->len,
502 PAGE_SIZE);
503 if (!nskb) {
504 page_frag_free(head);
505 goto drop;
506 }
507
508 skb_copy_header(nskb, skb);
509 head_off = skb_headroom(nskb) - skb_headroom(skb);
510 skb_headers_offset_update(nskb, head_off);
511 if (skb->sk)
512 skb_set_owner_w(nskb, skb->sk);
513 consume_skb(skb);
514 skb = nskb;
515 }
516
517 xdp.data_hard_start = skb->head;
518 xdp.data = skb_mac_header(skb);
519 xdp.data_end = xdp.data + pktlen;
520 xdp.data_meta = xdp.data;
521 xdp.rxq = &rq->xdp_rxq;
522 orig_data = xdp.data;
523 orig_data_end = xdp.data_end;
524
525 act = bpf_prog_run_xdp(xdp_prog, &xdp);
526
527 switch (act) {
528 case XDP_PASS:
529 break;
530 case XDP_TX:
531 get_page(virt_to_page(xdp.data));
532 consume_skb(skb);
533 xdp.rxq->mem = rq->xdp_mem;
534 if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
535 trace_xdp_exception(rq->dev, xdp_prog, act);
536 goto err_xdp;
537 }
538 *xdp_xmit |= VETH_XDP_TX;
539 rcu_read_unlock();
540 goto xdp_xmit;
541 case XDP_REDIRECT:
542 get_page(virt_to_page(xdp.data));
543 consume_skb(skb);
544 xdp.rxq->mem = rq->xdp_mem;
545 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog))
546 goto err_xdp;
547 *xdp_xmit |= VETH_XDP_REDIR;
548 rcu_read_unlock();
549 goto xdp_xmit;
550 default:
551 bpf_warn_invalid_xdp_action(act);
552 case XDP_ABORTED:
553 trace_xdp_exception(rq->dev, xdp_prog, act);
554 case XDP_DROP:
555 goto drop;
556 }
557 rcu_read_unlock();
558
559 delta = orig_data - xdp.data;
560 off = mac_len + delta;
561 if (off > 0)
562 __skb_push(skb, off);
563 else if (off < 0)
564 __skb_pull(skb, -off);
565 skb->mac_header -= delta;
566 off = xdp.data_end - orig_data_end;
567 if (off != 0)
568 __skb_put(skb, off);
569 skb->protocol = eth_type_trans(skb, rq->dev);
570
571 metalen = xdp.data - xdp.data_meta;
572 if (metalen)
573 skb_metadata_set(skb, metalen);
574out:
575 return skb;
576drop:
577 rcu_read_unlock();
578 kfree_skb(skb);
579 return NULL;
580err_xdp:
581 rcu_read_unlock();
582 page_frag_free(xdp.data);
583xdp_xmit:
584 return NULL;
585}
586
587static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
588{
589 int i, done = 0;
590
591 for (i = 0; i < budget; i++) {
592 void *ptr = __ptr_ring_consume(&rq->xdp_ring);
593 struct sk_buff *skb;
594
595 if (!ptr)
596 break;
597
598 if (veth_is_xdp_frame(ptr)) {
599 skb = veth_xdp_rcv_one(rq, veth_ptr_to_xdp(ptr),
600 xdp_xmit);
601 } else {
602 skb = veth_xdp_rcv_skb(rq, ptr, xdp_xmit);
603 }
604
605 if (skb)
606 napi_gro_receive(&rq->xdp_napi, skb);
607
608 done++;
609 }
610
611 return done;
612}
613
614static int veth_poll(struct napi_struct *napi, int budget)
615{
616 struct veth_rq *rq =
617 container_of(napi, struct veth_rq, xdp_napi);
618 unsigned int xdp_xmit = 0;
619 int done;
620
621 xdp_set_return_frame_no_direct();
622 done = veth_xdp_rcv(rq, budget, &xdp_xmit);
623
624 if (done < budget && napi_complete_done(napi, done)) {
625 /* Write rx_notify_masked before reading ptr_ring */
626 smp_store_mb(rq->rx_notify_masked, false);
627 if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
628 rq->rx_notify_masked = true;
629 napi_schedule(&rq->xdp_napi);
630 }
631 }
632
633 if (xdp_xmit & VETH_XDP_TX)
634 veth_xdp_flush(rq->dev);
635 if (xdp_xmit & VETH_XDP_REDIR)
636 xdp_do_flush_map();
637 xdp_clear_return_frame_no_direct();
638
639 return done;
640}
641
642static int veth_napi_add(struct net_device *dev)
643{
644 struct veth_priv *priv = netdev_priv(dev);
645 int err, i;
646
647 for (i = 0; i < dev->real_num_rx_queues; i++) {
648 struct veth_rq *rq = &priv->rq[i];
649
650 err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
651 if (err)
652 goto err_xdp_ring;
653 }
654
655 for (i = 0; i < dev->real_num_rx_queues; i++) {
656 struct veth_rq *rq = &priv->rq[i];
657
658 netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
659 napi_enable(&rq->xdp_napi);
660 }
661
662 return 0;
663err_xdp_ring:
664 for (i--; i >= 0; i--)
665 ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
666
667 return err;
668}
669
670static void veth_napi_del(struct net_device *dev)
671{
672 struct veth_priv *priv = netdev_priv(dev);
673 int i;
674
675 for (i = 0; i < dev->real_num_rx_queues; i++) {
676 struct veth_rq *rq = &priv->rq[i];
677
678 napi_disable(&rq->xdp_napi);
679 napi_hash_del(&rq->xdp_napi);
680 }
681 synchronize_net();
682
683 for (i = 0; i < dev->real_num_rx_queues; i++) {
684 struct veth_rq *rq = &priv->rq[i];
685
686 netif_napi_del(&rq->xdp_napi);
687 rq->rx_notify_masked = false;
688 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
689 }
690}
691
692static int veth_enable_xdp(struct net_device *dev)
693{
694 struct veth_priv *priv = netdev_priv(dev);
695 int err, i;
696
697 if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
698 for (i = 0; i < dev->real_num_rx_queues; i++) {
699 struct veth_rq *rq = &priv->rq[i];
700
701 err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i);
702 if (err < 0)
703 goto err_rxq_reg;
704
705 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
706 MEM_TYPE_PAGE_SHARED,
707 NULL);
708 if (err < 0)
709 goto err_reg_mem;
710
711 /* Save original mem info as it can be overwritten */
712 rq->xdp_mem = rq->xdp_rxq.mem;
713 }
714
715 err = veth_napi_add(dev);
716 if (err)
717 goto err_rxq_reg;
718 }
719
720 for (i = 0; i < dev->real_num_rx_queues; i++)
721 rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
722
723 return 0;
724err_reg_mem:
725 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
726err_rxq_reg:
727 for (i--; i >= 0; i--)
728 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
729
730 return err;
731}
732
733static void veth_disable_xdp(struct net_device *dev)
734{
735 struct veth_priv *priv = netdev_priv(dev);
736 int i;
737
738 for (i = 0; i < dev->real_num_rx_queues; i++)
739 rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
740 veth_napi_del(dev);
741 for (i = 0; i < dev->real_num_rx_queues; i++) {
742 struct veth_rq *rq = &priv->rq[i];
743
744 rq->xdp_rxq.mem = rq->xdp_mem;
745 xdp_rxq_info_unreg(&rq->xdp_rxq);
746 }
747}
748
182static int veth_open(struct net_device *dev) 749static int veth_open(struct net_device *dev)
183{ 750{
184 struct veth_priv *priv = netdev_priv(dev); 751 struct veth_priv *priv = netdev_priv(dev);
185 struct net_device *peer = rtnl_dereference(priv->peer); 752 struct net_device *peer = rtnl_dereference(priv->peer);
753 int err;
186 754
187 if (!peer) 755 if (!peer)
188 return -ENOTCONN; 756 return -ENOTCONN;
189 757
758 if (priv->_xdp_prog) {
759 err = veth_enable_xdp(dev);
760 if (err)
761 return err;
762 }
763
190 if (peer->flags & IFF_UP) { 764 if (peer->flags & IFF_UP) {
191 netif_carrier_on(dev); 765 netif_carrier_on(dev);
192 netif_carrier_on(peer); 766 netif_carrier_on(peer);
193 } 767 }
768
194 return 0; 769 return 0;
195} 770}
196 771
@@ -203,6 +778,9 @@ static int veth_close(struct net_device *dev)
203 if (peer) 778 if (peer)
204 netif_carrier_off(peer); 779 netif_carrier_off(peer);
205 780
781 if (priv->_xdp_prog)
782 veth_disable_xdp(dev);
783
206 return 0; 784 return 0;
207} 785}
208 786
@@ -228,7 +806,7 @@ static void veth_dev_free(struct net_device *dev)
228static void veth_poll_controller(struct net_device *dev) 806static void veth_poll_controller(struct net_device *dev)
229{ 807{
230 /* veth only receives frames when its peer sends one 808 /* veth only receives frames when its peer sends one
231 * Since it's a synchronous operation, we are guaranteed 809 * Since it has nothing to do with disabling irqs, we are guaranteed
232 * never to have pending data when we poll for it so 810 * never to have pending data when we poll for it so
233 * there is nothing to do here. 811 * there is nothing to do here.
234 * 812 *
@@ -253,6 +831,23 @@ static int veth_get_iflink(const struct net_device *dev)
253 return iflink; 831 return iflink;
254} 832}
255 833
834static netdev_features_t veth_fix_features(struct net_device *dev,
835 netdev_features_t features)
836{
837 struct veth_priv *priv = netdev_priv(dev);
838 struct net_device *peer;
839
840 peer = rtnl_dereference(priv->peer);
841 if (peer) {
842 struct veth_priv *peer_priv = netdev_priv(peer);
843
844 if (peer_priv->_xdp_prog)
845 features &= ~NETIF_F_GSO_SOFTWARE;
846 }
847
848 return features;
849}
850
256static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 851static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
257{ 852{
258 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 853 struct veth_priv *peer_priv, *priv = netdev_priv(dev);
@@ -276,6 +871,103 @@ out:
276 rcu_read_unlock(); 871 rcu_read_unlock();
277} 872}
278 873
874static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
875 struct netlink_ext_ack *extack)
876{
877 struct veth_priv *priv = netdev_priv(dev);
878 struct bpf_prog *old_prog;
879 struct net_device *peer;
880 unsigned int max_mtu;
881 int err;
882
883 old_prog = priv->_xdp_prog;
884 priv->_xdp_prog = prog;
885 peer = rtnl_dereference(priv->peer);
886
887 if (prog) {
888 if (!peer) {
889 NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
890 err = -ENOTCONN;
891 goto err;
892 }
893
894 max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
895 peer->hard_header_len -
896 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
897 if (peer->mtu > max_mtu) {
898 NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
899 err = -ERANGE;
900 goto err;
901 }
902
903 if (dev->real_num_rx_queues < peer->real_num_tx_queues) {
904 NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues");
905 err = -ENOSPC;
906 goto err;
907 }
908
909 if (dev->flags & IFF_UP) {
910 err = veth_enable_xdp(dev);
911 if (err) {
912 NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
913 goto err;
914 }
915 }
916
917 if (!old_prog) {
918 peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
919 peer->max_mtu = max_mtu;
920 }
921 }
922
923 if (old_prog) {
924 if (!prog) {
925 if (dev->flags & IFF_UP)
926 veth_disable_xdp(dev);
927
928 if (peer) {
929 peer->hw_features |= NETIF_F_GSO_SOFTWARE;
930 peer->max_mtu = ETH_MAX_MTU;
931 }
932 }
933 bpf_prog_put(old_prog);
934 }
935
936 if ((!!old_prog ^ !!prog) && peer)
937 netdev_update_features(peer);
938
939 return 0;
940err:
941 priv->_xdp_prog = old_prog;
942
943 return err;
944}
945
946static u32 veth_xdp_query(struct net_device *dev)
947{
948 struct veth_priv *priv = netdev_priv(dev);
949 const struct bpf_prog *xdp_prog;
950
951 xdp_prog = priv->_xdp_prog;
952 if (xdp_prog)
953 return xdp_prog->aux->id;
954
955 return 0;
956}
957
958static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
959{
960 switch (xdp->command) {
961 case XDP_SETUP_PROG:
962 return veth_xdp_set(dev, xdp->prog, xdp->extack);
963 case XDP_QUERY_PROG:
964 xdp->prog_id = veth_xdp_query(dev);
965 return 0;
966 default:
967 return -EINVAL;
968 }
969}
970
279static const struct net_device_ops veth_netdev_ops = { 971static const struct net_device_ops veth_netdev_ops = {
280 .ndo_init = veth_dev_init, 972 .ndo_init = veth_dev_init,
281 .ndo_open = veth_open, 973 .ndo_open = veth_open,
@@ -288,8 +980,11 @@ static const struct net_device_ops veth_netdev_ops = {
288 .ndo_poll_controller = veth_poll_controller, 980 .ndo_poll_controller = veth_poll_controller,
289#endif 981#endif
290 .ndo_get_iflink = veth_get_iflink, 982 .ndo_get_iflink = veth_get_iflink,
983 .ndo_fix_features = veth_fix_features,
291 .ndo_features_check = passthru_features_check, 984 .ndo_features_check = passthru_features_check,
292 .ndo_set_rx_headroom = veth_set_rx_headroom, 985 .ndo_set_rx_headroom = veth_set_rx_headroom,
986 .ndo_bpf = veth_xdp,
987 .ndo_xdp_xmit = veth_xdp_xmit,
293}; 988};
294 989
295#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 990#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
@@ -345,13 +1040,31 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
345 return 0; 1040 return 0;
346} 1041}
347 1042
1043static int veth_alloc_queues(struct net_device *dev)
1044{
1045 struct veth_priv *priv = netdev_priv(dev);
1046
1047 priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL);
1048 if (!priv->rq)
1049 return -ENOMEM;
1050
1051 return 0;
1052}
1053
1054static void veth_free_queues(struct net_device *dev)
1055{
1056 struct veth_priv *priv = netdev_priv(dev);
1057
1058 kfree(priv->rq);
1059}
1060
348static struct rtnl_link_ops veth_link_ops; 1061static struct rtnl_link_ops veth_link_ops;
349 1062
350static int veth_newlink(struct net *src_net, struct net_device *dev, 1063static int veth_newlink(struct net *src_net, struct net_device *dev,
351 struct nlattr *tb[], struct nlattr *data[], 1064 struct nlattr *tb[], struct nlattr *data[],
352 struct netlink_ext_ack *extack) 1065 struct netlink_ext_ack *extack)
353{ 1066{
354 int err; 1067 int err, i;
355 struct net_device *peer; 1068 struct net_device *peer;
356 struct veth_priv *priv; 1069 struct veth_priv *priv;
357 char ifname[IFNAMSIZ]; 1070 char ifname[IFNAMSIZ];
@@ -404,6 +1117,12 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
404 return PTR_ERR(peer); 1117 return PTR_ERR(peer);
405 } 1118 }
406 1119
1120 err = veth_alloc_queues(peer);
1121 if (err) {
1122 put_net(net);
1123 goto err_peer_alloc_queues;
1124 }
1125
407 if (!ifmp || !tbp[IFLA_ADDRESS]) 1126 if (!ifmp || !tbp[IFLA_ADDRESS])
408 eth_hw_addr_random(peer); 1127 eth_hw_addr_random(peer);
409 1128
@@ -432,6 +1151,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
432 * should be re-allocated 1151 * should be re-allocated
433 */ 1152 */
434 1153
1154 err = veth_alloc_queues(dev);
1155 if (err)
1156 goto err_alloc_queues;
1157
435 if (tb[IFLA_ADDRESS] == NULL) 1158 if (tb[IFLA_ADDRESS] == NULL)
436 eth_hw_addr_random(dev); 1159 eth_hw_addr_random(dev);
437 1160
@@ -451,19 +1174,28 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
451 */ 1174 */
452 1175
453 priv = netdev_priv(dev); 1176 priv = netdev_priv(dev);
1177 for (i = 0; i < dev->real_num_rx_queues; i++)
1178 priv->rq[i].dev = dev;
454 rcu_assign_pointer(priv->peer, peer); 1179 rcu_assign_pointer(priv->peer, peer);
455 1180
456 priv = netdev_priv(peer); 1181 priv = netdev_priv(peer);
1182 for (i = 0; i < peer->real_num_rx_queues; i++)
1183 priv->rq[i].dev = peer;
457 rcu_assign_pointer(priv->peer, dev); 1184 rcu_assign_pointer(priv->peer, dev);
1185
458 return 0; 1186 return 0;
459 1187
460err_register_dev: 1188err_register_dev:
1189 veth_free_queues(dev);
1190err_alloc_queues:
461 /* nothing to do */ 1191 /* nothing to do */
462err_configure_peer: 1192err_configure_peer:
463 unregister_netdevice(peer); 1193 unregister_netdevice(peer);
464 return err; 1194 return err;
465 1195
466err_register_peer: 1196err_register_peer:
1197 veth_free_queues(peer);
1198err_peer_alloc_queues:
467 free_netdev(peer); 1199 free_netdev(peer);
468 return err; 1200 return err;
469} 1201}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c73dd7396886..2b072dab32c0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -537,6 +537,20 @@ struct sk_msg_buff {
537 struct list_head list; 537 struct list_head list;
538}; 538};
539 539
540struct bpf_redirect_info {
541 u32 ifindex;
542 u32 flags;
543 struct bpf_map *map;
544 struct bpf_map *map_to_flush;
545 unsigned long map_owner;
546 u32 kern_flags;
547};
548
549DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
550
551/* flags for bpf_redirect_info kern_flags */
552#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */
553
540/* Compute the linear packet data range [data, data_end) which 554/* Compute the linear packet data range [data, data_end) which
541 * will be accessed by various program types (cls_bpf, act_bpf, 555 * will be accessed by various program types (cls_bpf, act_bpf,
542 * lwt, ...). Subsystems allowing direct data access must (!) 556 * lwt, ...). Subsystems allowing direct data access must (!)
@@ -765,6 +779,27 @@ static inline bool bpf_dump_raw_ok(void)
765struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, 779struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
766 const struct bpf_insn *patch, u32 len); 780 const struct bpf_insn *patch, u32 len);
767 781
782static inline bool xdp_return_frame_no_direct(void)
783{
784 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
785
786 return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
787}
788
789static inline void xdp_set_return_frame_no_direct(void)
790{
791 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
792
793 ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
794}
795
796static inline void xdp_clear_return_frame_no_direct(void)
797{
798 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
799
800 ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
801}
802
768static inline int xdp_ok_fwd_dev(const struct net_device *fwd, 803static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
769 unsigned int pktlen) 804 unsigned int pktlen)
770{ 805{
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ebdf158a795..e93b157f526c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1038,6 +1038,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
1038} 1038}
1039 1039
1040struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); 1040struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
1041void skb_headers_offset_update(struct sk_buff *skb, int off);
1041int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); 1042int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
1042struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); 1043struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
1043void skb_copy_header(struct sk_buff *new, const struct sk_buff *old); 1044void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index fcb033f51d8c..76b95256c266 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -84,6 +84,13 @@ struct xdp_frame {
84 struct net_device *dev_rx; /* used by cpumap */ 84 struct net_device *dev_rx; /* used by cpumap */
85}; 85};
86 86
87/* Clear kernel pointers in xdp_frame */
88static inline void xdp_scrub_frame(struct xdp_frame *frame)
89{
90 frame->data = NULL;
91 frame->dev_rx = NULL;
92}
93
87/* Convert xdp_buff to xdp_frame */ 94/* Convert xdp_buff to xdp_frame */
88static inline 95static inline
89struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) 96struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
diff --git a/net/core/filter.c b/net/core/filter.c
index 587bbfbd7db3..2de7dd9f2a57 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2082,19 +2082,12 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
2082 .arg3_type = ARG_ANYTHING, 2082 .arg3_type = ARG_ANYTHING,
2083}; 2083};
2084 2084
2085struct redirect_info { 2085DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2086 u32 ifindex; 2086EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
2087 u32 flags;
2088 struct bpf_map *map;
2089 struct bpf_map *map_to_flush;
2090 unsigned long map_owner;
2091};
2092
2093static DEFINE_PER_CPU(struct redirect_info, redirect_info);
2094 2087
2095BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) 2088BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2096{ 2089{
2097 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2090 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2098 2091
2099 if (unlikely(flags & ~(BPF_F_INGRESS))) 2092 if (unlikely(flags & ~(BPF_F_INGRESS)))
2100 return TC_ACT_SHOT; 2093 return TC_ACT_SHOT;
@@ -2107,7 +2100,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2107 2100
2108int skb_do_redirect(struct sk_buff *skb) 2101int skb_do_redirect(struct sk_buff *skb)
2109{ 2102{
2110 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2103 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2111 struct net_device *dev; 2104 struct net_device *dev;
2112 2105
2113 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); 2106 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
@@ -3200,7 +3193,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
3200 3193
3201void xdp_do_flush_map(void) 3194void xdp_do_flush_map(void)
3202{ 3195{
3203 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 3196 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3204 struct bpf_map *map = ri->map_to_flush; 3197 struct bpf_map *map = ri->map_to_flush;
3205 3198
3206 ri->map_to_flush = NULL; 3199 ri->map_to_flush = NULL;
@@ -3245,7 +3238,7 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
3245static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, 3238static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
3246 struct bpf_prog *xdp_prog) 3239 struct bpf_prog *xdp_prog)
3247{ 3240{
3248 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 3241 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3249 unsigned long map_owner = ri->map_owner; 3242 unsigned long map_owner = ri->map_owner;
3250 struct bpf_map *map = ri->map; 3243 struct bpf_map *map = ri->map;
3251 u32 index = ri->ifindex; 3244 u32 index = ri->ifindex;
@@ -3285,7 +3278,7 @@ err:
3285int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, 3278int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3286 struct bpf_prog *xdp_prog) 3279 struct bpf_prog *xdp_prog)
3287{ 3280{
3288 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 3281 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3289 struct net_device *fwd; 3282 struct net_device *fwd;
3290 u32 index = ri->ifindex; 3283 u32 index = ri->ifindex;
3291 int err; 3284 int err;
@@ -3317,7 +3310,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
3317 struct xdp_buff *xdp, 3310 struct xdp_buff *xdp,
3318 struct bpf_prog *xdp_prog) 3311 struct bpf_prog *xdp_prog)
3319{ 3312{
3320 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 3313 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3321 unsigned long map_owner = ri->map_owner; 3314 unsigned long map_owner = ri->map_owner;
3322 struct bpf_map *map = ri->map; 3315 struct bpf_map *map = ri->map;
3323 u32 index = ri->ifindex; 3316 u32 index = ri->ifindex;
@@ -3368,7 +3361,7 @@ err:
3368int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, 3361int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
3369 struct xdp_buff *xdp, struct bpf_prog *xdp_prog) 3362 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
3370{ 3363{
3371 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 3364 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3372 u32 index = ri->ifindex; 3365 u32 index = ri->ifindex;
3373 struct net_device *fwd; 3366 struct net_device *fwd;
3374 int err = 0; 3367 int err = 0;
@@ -3399,7 +3392,7 @@ EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
3399 3392
3400BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) 3393BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
3401{ 3394{
3402 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 3395 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3403 3396
3404 if (unlikely(flags)) 3397 if (unlikely(flags))
3405 return XDP_ABORTED; 3398 return XDP_ABORTED;
@@ -3423,7 +3416,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
3423BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, 3416BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
3424 unsigned long, map_owner) 3417 unsigned long, map_owner)
3425{ 3418{
3426 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 3419 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3427 3420
3428 if (unlikely(flags)) 3421 if (unlikely(flags))
3429 return XDP_ABORTED; 3422 return XDP_ABORTED;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8d574a88125d..c996c09d095f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1291,7 +1291,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
1291} 1291}
1292EXPORT_SYMBOL(skb_clone); 1292EXPORT_SYMBOL(skb_clone);
1293 1293
1294static void skb_headers_offset_update(struct sk_buff *skb, int off) 1294void skb_headers_offset_update(struct sk_buff *skb, int off)
1295{ 1295{
1296 /* Only adjust this if it actually is csum_start rather than csum */ 1296 /* Only adjust this if it actually is csum_start rather than csum */
1297 if (skb->ip_summed == CHECKSUM_PARTIAL) 1297 if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -1305,6 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
1305 skb->inner_network_header += off; 1305 skb->inner_network_header += off;
1306 skb->inner_mac_header += off; 1306 skb->inner_mac_header += off;
1307} 1307}
1308EXPORT_SYMBOL(skb_headers_offset_update);
1308 1309
1309void skb_copy_header(struct sk_buff *new, const struct sk_buff *old) 1310void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
1310{ 1311{
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 57285383ed00..3dd99e1c04f5 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -330,10 +330,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
330 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 330 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
331 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 331 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
332 page = virt_to_head_page(data); 332 page = virt_to_head_page(data);
333 if (xa) 333 if (xa) {
334 napi_direct &= !xdp_return_frame_no_direct();
334 page_pool_put_page(xa->page_pool, page, napi_direct); 335 page_pool_put_page(xa->page_pool, page, napi_direct);
335 else 336 } else {
336 put_page(page); 337 put_page(page);
338 }
337 rcu_read_unlock(); 339 rcu_read_unlock();
338 break; 340 break;
339 case MEM_TYPE_PAGE_SHARED: 341 case MEM_TYPE_PAGE_SHARED: