From 73b62bd085f4737679ea9afc7867fa5f99ba7d1b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:24 +0800 Subject: virtio-net: remove the warning before XDP linearizing Since we use EWMA to estimate the size of rx buffer. When rx buffer size is underestimated, it's usual to have a packet with more than one buffers. Consider this is not a bug, remove the warning and correct the comment before XDP linearizing. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 08327e005ccc..1067253b98be 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -552,14 +552,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct page *xdp_page; u32 act; - /* No known backend devices should send packets with - * more than a single buffer when XDP conditions are - * met. However it is not strictly illegal so the case - * is handled as an exception and a warning is thrown. - */ + /* This happens when rx buffer size is underestimated */ if (unlikely(num_buf > 1)) { - bpf_warn_invalid_xdp_buffer(); - /* linearize data for XDP */ xdp_page = xdp_linearize_page(rq, num_buf, page, offset, &len); -- cgit v1.2.2 From 275be061b33b945cfb120ee8a570f78c3ccafe56 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:25 +0800 Subject: virtio-net: correctly xmit linearized page on XDP_TX After we linearize page, we should xmit this page instead of the page of first buffer which may lead unexpected result. With this patch, we can see correct packet during XDP_TX. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1067253b98be..fe4562d395e3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -572,7 +572,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) goto err_xdp; - act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len); + act = do_xdp_prog(vi, rq, xdp_prog, xdp_page, offset, len); switch (act) { case XDP_PASS: if (unlikely(xdp_page != page)) -- cgit v1.2.2 From 56a86f84b8332afe8c6fcb4b09d09d9bf094e2db Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:26 +0800 Subject: virtio-net: fix page miscount during XDP linearizing We don't put page during linearizing, the would cause leaking when xmit through XDP_TX or the packet exceeds PAGE_SIZE. Fix them by put page accordingly. Also decrease the number of buffers during linearizing to make sure caller can free buffers correctly when packet exceeds PAGE_SIZE. With this patch, we won't get OOM after linearize huge number of packets. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fe4562d395e3..58ad40e17a74 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -483,7 +483,7 @@ xdp_xmit: * anymore. */ static struct page *xdp_linearize_page(struct receive_queue *rq, - u16 num_buf, + u16 *num_buf, struct page *p, int offset, unsigned int *len) @@ -497,7 +497,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, memcpy(page_address(page) + page_off, page_address(p) + offset, *len); page_off += *len; - while (--num_buf) { + while (--*num_buf) { unsigned int buflen; unsigned long ctx; void *buf; @@ -507,19 +507,22 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, if (unlikely(!ctx)) goto err_buf; + buf = mergeable_ctx_to_buf_address(ctx); + p = virt_to_head_page(buf); + off = buf - page_address(p); + /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ - if ((page_off + buflen) > PAGE_SIZE) + if ((page_off + buflen) > PAGE_SIZE) { + put_page(p); goto err_buf; - - buf = mergeable_ctx_to_buf_address(ctx); - p = virt_to_head_page(buf); - off = buf - page_address(p); + } memcpy(page_address(page) + page_off, page_address(p) + off, buflen); page_off += buflen; + put_page(p); } *len = page_off; @@ -555,7 +558,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, /* This happens when rx buffer size is underestimated */ if (unlikely(num_buf > 1)) { /* linearize data for XDP */ - xdp_page = xdp_linearize_page(rq, num_buf, + xdp_page = xdp_linearize_page(rq, &num_buf, page, offset, &len); if (!xdp_page) goto err_xdp; -- cgit v1.2.2 From 1830f8935f3b173d229b86e9927b3b6d599aa1f6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:27 +0800 Subject: virtio-net: correctly handle XDP_PASS for linearized packets When XDP_PASS were determined for linearized packets, we try to get new buffers in the virtqueue and build skbs from them. This is wrong, we should create skbs based on existed buffers instead. Fixing them by creating skb based on xdp_page. With this patch "ping 192.168.100.4 -s 3900 -M do" works for XDP_PASS. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 58ad40e17a74..470293e2b84d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -578,8 +578,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, act = do_xdp_prog(vi, rq, xdp_prog, xdp_page, offset, len); switch (act) { case XDP_PASS: - if (unlikely(xdp_page != page)) - __free_pages(xdp_page, 0); + /* We can only create skb based on xdp_page. */ + if (unlikely(xdp_page != page)) { + rcu_read_unlock(); + put_page(page); + head_skb = page_to_skb(vi, rq, xdp_page, + 0, len, PAGE_SIZE); + return head_skb; + } break; case XDP_TX: if (unlikely(xdp_page != page)) -- cgit v1.2.2 From b00f70b0dacb3d2e009afce860ebc90219072f5c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:28 +0800 Subject: virtio-net: unbreak csumed packets for XDP_PASS We drop csumed packet when do XDP for packets. This breaks XDP_PASS when GUEST_CSUM is supported. Fix this by allowing csum flag to be set. With this patch, simple TCP works for XDP_PASS. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 470293e2b84d..0778dc88597e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -440,7 +440,7 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtio_net_hdr_mrg_rxbuf *hdr = buf; u32 act; - if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + if (unlikely(hdr->hdr.gso_type)) goto err_xdp; act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len); switch (act) { @@ -572,7 +572,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, * the receive path after XDP is loaded. In practice I * was not able to create this condition. */ - if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + if (unlikely(hdr->hdr.gso_type)) goto err_xdp; act = do_xdp_prog(vi, rq, xdp_prog, xdp_page, offset, len); -- cgit v1.2.2 From 5c33474d41af09f09c98f1df70f267920587bec0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:29 +0800 Subject: virtio-net: make rx buf size estimation works for XDP We don't update ewma rx buf size in the case of XDP. This will lead underestimation of rx buf size which causes host to produce more than one buffers. This will greatly increase the possibility of XDP page linearization. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0778dc88597e..77ae358ec630 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -584,10 +584,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, 0, len, PAGE_SIZE); + ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); return head_skb; } break; case XDP_TX: + ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); @@ -596,6 +598,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, default: if (unlikely(xdp_page != page)) __free_pages(xdp_page, 0); + ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); goto err_xdp; } } -- cgit v1.2.2 From 92502fe86c7c9b3f8543f29641a3c71805e82757 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:30 +0800 Subject: virtio-net: forbid XDP when VIRTIO_NET_F_GUEST_UFO is support When VIRTIO_NET_F_GUEST_UFO is negotiated, host could still send UFO packet that exceeds a single page which could not be handled correctly by XDP. So this patch forbids setting XDP when GUEST_UFO is supported. While at it, forbid XDP for ECN (which comes only from GRO) too to prevent user from misconfiguration. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 77ae358ec630..c1f66d8bfb7b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1684,7 +1684,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) int i, err; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || - virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) { + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) { netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n"); return -EOPNOTSUPP; } -- cgit v1.2.2 From c47a43d3004ad6ff2a94a670cb3274cd6338d41e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:31 +0800 Subject: virtio-net: remove big packet XDP codes Now we in fact don't allow XDP for big packets, remove its codes. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 44 +++----------------------------------------- 1 file changed, 3 insertions(+), 41 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c1f66d8bfb7b..e53365a86ca3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -344,11 +344,7 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi, /* Free up any pending old buffers before queueing new ones. */ while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { struct page *sent_page = virt_to_head_page(xdp_sent); - - if (vi->mergeable_rx_bufs) - put_page(sent_page); - else - give_pages(rq, sent_page); + put_page(sent_page); } /* Zero header and leave csum up to XDP layers */ @@ -360,15 +356,8 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi, err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, xdp->data, GFP_ATOMIC); if (unlikely(err)) { - if (vi->mergeable_rx_bufs) - put_page(page); - else - give_pages(rq, page); + put_page(page); return; // On error abort to avoid unnecessary kick - } else if (!vi->mergeable_rx_bufs) { - /* If not mergeable bufs must be big packets so cleanup pages */ - give_pages(rq, (struct page *)page->private); - page->private = 0; } virtqueue_kick(sq->vq); @@ -430,44 +419,17 @@ static struct sk_buff *receive_big(struct net_device *dev, void *buf, unsigned int len) { - struct bpf_prog *xdp_prog; struct page *page = buf; - struct sk_buff *skb; - - rcu_read_lock(); - xdp_prog = rcu_dereference(rq->xdp_prog); - if (xdp_prog) { - struct virtio_net_hdr_mrg_rxbuf *hdr = buf; - u32 act; - - if (unlikely(hdr->hdr.gso_type)) - goto err_xdp; - act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len); - switch (act) { - case XDP_PASS: - break; - case XDP_TX: - rcu_read_unlock(); - goto xdp_xmit; - case XDP_DROP: - default: - goto err_xdp; - } - } - rcu_read_unlock(); + struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); - skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); if (unlikely(!skb)) goto err; return skb; -err_xdp: - rcu_read_unlock(); err: dev->stats.rx_dropped++; give_pages(rq, page); -xdp_xmit: return NULL; } -- cgit v1.2.2 From bb91accf27335c6dc460e202991ca140fa21e1b5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Dec 2016 22:37:32 +0800 Subject: virtio-net: XDP support for small buffers Commit f600b6905015 ("virtio_net: Add XDP support") leaves the case of small receive buffer untouched. This will confuse the user who want to set XDP but use small buffers. Other than forbid XDP in small buffer mode, let's make it work. XDP then can only work at skb->data since virtio-net create skbs during refill, this is sub optimal which could be optimized in the future. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 112 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 25 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e53365a86ca3..5deeda61d6d3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -333,9 +333,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, static void virtnet_xdp_xmit(struct virtnet_info *vi, struct receive_queue *rq, struct send_queue *sq, - struct xdp_buff *xdp) + struct xdp_buff *xdp, + void *data) { - struct page *page = virt_to_head_page(xdp->data); struct virtio_net_hdr_mrg_rxbuf *hdr; unsigned int num_sg, len; void *xdp_sent; @@ -343,20 +343,45 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi, /* Free up any pending old buffers before queueing new ones. */ while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { - struct page *sent_page = virt_to_head_page(xdp_sent); - put_page(sent_page); + if (vi->mergeable_rx_bufs) { + struct page *sent_page = virt_to_head_page(xdp_sent); + + put_page(sent_page); + } else { /* small buffer */ + struct sk_buff *skb = xdp_sent; + + kfree_skb(skb); + } } - /* Zero header and leave csum up to XDP layers */ - hdr = xdp->data; - memset(hdr, 0, vi->hdr_len); + if (vi->mergeable_rx_bufs) { + /* Zero header and leave csum up to XDP layers */ + hdr = xdp->data; + memset(hdr, 0, vi->hdr_len); + + num_sg = 1; + sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); + } else { /* small buffer */ + struct sk_buff *skb = data; - num_sg = 1; - sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); + /* Zero header and leave csum up to XDP layers */ + hdr = skb_vnet_hdr(skb); + memset(hdr, 0, vi->hdr_len); + + num_sg = 2; + sg_init_table(sq->sg, 2); + sg_set_buf(sq->sg, hdr, vi->hdr_len); + skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); + } err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, - xdp->data, GFP_ATOMIC); + data, GFP_ATOMIC); if (unlikely(err)) { - put_page(page); + if (vi->mergeable_rx_bufs) { + struct page *page = virt_to_head_page(xdp->data); + + put_page(page); + } else /* small buffer */ + kfree_skb(data); return; // On error abort to avoid unnecessary kick } @@ -366,23 +391,26 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi, static u32 do_xdp_prog(struct virtnet_info *vi, struct receive_queue *rq, struct bpf_prog *xdp_prog, - struct page *page, int offset, int len) + void *data, int len) { int hdr_padded_len; struct xdp_buff xdp; + void *buf; unsigned int qp; u32 act; - u8 *buf; - - buf = page_address(page) + offset; - if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) { hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); - else - hdr_padded_len = sizeof(struct padded_vnet_hdr); + xdp.data = data + hdr_padded_len; + xdp.data_end = xdp.data + (len - vi->hdr_len); + buf = data; + } else { /* small buffers */ + struct sk_buff *skb = data; - xdp.data = buf + hdr_padded_len; - xdp.data_end = xdp.data + (len - vi->hdr_len); + xdp.data = skb->data; + xdp.data_end = xdp.data + len; + buf = skb->data; + } act = bpf_prog_run_xdp(xdp_prog, &xdp); switch (act) { @@ -392,8 +420,8 @@ static u32 do_xdp_prog(struct virtnet_info *vi, qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4); - virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp); + xdp.data = buf; + virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, data); return XDP_TX; default: bpf_warn_invalid_xdp_action(act); @@ -403,14 +431,47 @@ static u32 do_xdp_prog(struct virtnet_info *vi, } } -static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len) +static struct sk_buff *receive_small(struct net_device *dev, + struct virtnet_info *vi, + struct receive_queue *rq, + void *buf, unsigned int len) { struct sk_buff * skb = buf; + struct bpf_prog *xdp_prog; len -= vi->hdr_len; skb_trim(skb, len); + rcu_read_lock(); + xdp_prog = rcu_dereference(rq->xdp_prog); + if (xdp_prog) { + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + u32 act; + + if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + goto err_xdp; + act = do_xdp_prog(vi, rq, xdp_prog, skb, len); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + rcu_read_unlock(); + goto xdp_xmit; + case XDP_DROP: + default: + goto err_xdp; + } + } + rcu_read_unlock(); + return skb; + +err_xdp: + rcu_read_unlock(); + dev->stats.rx_dropped++; + kfree_skb(skb); +xdp_xmit: + return NULL; } static struct sk_buff *receive_big(struct net_device *dev, @@ -537,7 +598,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; - act = do_xdp_prog(vi, rq, xdp_prog, xdp_page, offset, len); + act = do_xdp_prog(vi, rq, xdp_prog, + page_address(xdp_page) + offset, len); switch (act) { case XDP_PASS: /* We can only create skb based on xdp_page. */ @@ -672,7 +734,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, else if (vi->big_packets) skb = receive_big(dev, vi, rq, buf, len); else - skb = receive_small(vi, buf, len); + skb = receive_small(dev, vi, rq, buf, len); if (unlikely(!skb)) return; -- cgit v1.2.2