aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/virtio_net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r--drivers/net/virtio_net.c110
1 files changed, 76 insertions, 34 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 626c27352ae2..23374603e4d9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -443,12 +443,8 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
443 sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); 443 sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
444 444
445 err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); 445 err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
446 if (unlikely(err)) { 446 if (unlikely(err))
447 struct page *page = virt_to_head_page(xdp->data); 447 return false; /* Caller handle free/refcnt */
448
449 put_page(page);
450 return false;
451 }
452 448
453 return true; 449 return true;
454} 450}
@@ -456,8 +452,18 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
456static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) 452static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
457{ 453{
458 struct virtnet_info *vi = netdev_priv(dev); 454 struct virtnet_info *vi = netdev_priv(dev);
459 bool sent = __virtnet_xdp_xmit(vi, xdp); 455 struct receive_queue *rq = vi->rq;
456 struct bpf_prog *xdp_prog;
457 bool sent;
458
459 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
460 * indicate XDP resources have been successfully allocated.
461 */
462 xdp_prog = rcu_dereference(rq->xdp_prog);
463 if (!xdp_prog)
464 return -ENXIO;
460 465
466 sent = __virtnet_xdp_xmit(vi, xdp);
461 if (!sent) 467 if (!sent)
462 return -ENOSPC; 468 return -ENOSPC;
463 return 0; 469 return 0;
@@ -498,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
498 page_off += *len; 504 page_off += *len;
499 505
500 while (--*num_buf) { 506 while (--*num_buf) {
507 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
501 unsigned int buflen; 508 unsigned int buflen;
502 void *buf; 509 void *buf;
503 int off; 510 int off;
@@ -512,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
512 /* guard against a misconfigured or uncooperative backend that 519 /* guard against a misconfigured or uncooperative backend that
513 * is sending packet larger than the MTU. 520 * is sending packet larger than the MTU.
514 */ 521 */
515 if ((page_off + buflen) > PAGE_SIZE) { 522 if ((page_off + buflen + tailroom) > PAGE_SIZE) {
516 put_page(p); 523 put_page(p);
517 goto err_buf; 524 goto err_buf;
518 } 525 }
@@ -546,8 +553,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
546 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 553 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
547 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 554 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
548 struct page *page = virt_to_head_page(buf); 555 struct page *page = virt_to_head_page(buf);
549 unsigned int delta = 0, err; 556 unsigned int delta = 0;
550 struct page *xdp_page; 557 struct page *xdp_page;
558 bool sent;
559 int err;
560
551 len -= vi->hdr_len; 561 len -= vi->hdr_len;
552 562
553 rcu_read_lock(); 563 rcu_read_lock();
@@ -558,7 +568,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
558 void *orig_data; 568 void *orig_data;
559 u32 act; 569 u32 act;
560 570
561 if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) 571 if (unlikely(hdr->hdr.gso_type))
562 goto err_xdp; 572 goto err_xdp;
563 573
564 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 574 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
@@ -596,16 +606,19 @@ static struct sk_buff *receive_small(struct net_device *dev,
596 delta = orig_data - xdp.data; 606 delta = orig_data - xdp.data;
597 break; 607 break;
598 case XDP_TX: 608 case XDP_TX:
599 if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) 609 sent = __virtnet_xdp_xmit(vi, &xdp);
610 if (unlikely(!sent)) {
600 trace_xdp_exception(vi->dev, xdp_prog, act); 611 trace_xdp_exception(vi->dev, xdp_prog, act);
601 else 612 goto err_xdp;
602 *xdp_xmit = true; 613 }
614 *xdp_xmit = true;
603 rcu_read_unlock(); 615 rcu_read_unlock();
604 goto xdp_xmit; 616 goto xdp_xmit;
605 case XDP_REDIRECT: 617 case XDP_REDIRECT:
606 err = xdp_do_redirect(dev, &xdp, xdp_prog); 618 err = xdp_do_redirect(dev, &xdp, xdp_prog);
607 if (!err) 619 if (err)
608 *xdp_xmit = true; 620 goto err_xdp;
621 *xdp_xmit = true;
609 rcu_read_unlock(); 622 rcu_read_unlock();
610 goto xdp_xmit; 623 goto xdp_xmit;
611 default: 624 default:
@@ -677,6 +690,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
677 struct bpf_prog *xdp_prog; 690 struct bpf_prog *xdp_prog;
678 unsigned int truesize; 691 unsigned int truesize;
679 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 692 unsigned int headroom = mergeable_ctx_to_headroom(ctx);
693 bool sent;
680 int err; 694 int err;
681 695
682 head_skb = NULL; 696 head_skb = NULL;
@@ -689,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
689 void *data; 703 void *data;
690 u32 act; 704 u32 act;
691 705
692 /* This happens when rx buffer size is underestimated */ 706 /* This happens when rx buffer size is underestimated
707 * or headroom is not enough because of the buffer
708 * was refilled before XDP is set. This should only
709 * happen for the first several packets, so we don't
710 * care much about its performance.
711 */
693 if (unlikely(num_buf > 1 || 712 if (unlikely(num_buf > 1 ||
694 headroom < virtnet_get_headroom(vi))) { 713 headroom < virtnet_get_headroom(vi))) {
695 /* linearize data for XDP */ 714 /* linearize data for XDP */
@@ -724,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
724 743
725 act = bpf_prog_run_xdp(xdp_prog, &xdp); 744 act = bpf_prog_run_xdp(xdp_prog, &xdp);
726 745
727 if (act != XDP_PASS)
728 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
729
730 switch (act) { 746 switch (act) {
731 case XDP_PASS: 747 case XDP_PASS:
732 /* recalculate offset to account for any header 748 /* recalculate offset to account for any header
@@ -746,18 +762,28 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
746 } 762 }
747 break; 763 break;
748 case XDP_TX: 764 case XDP_TX:
749 if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) 765 sent = __virtnet_xdp_xmit(vi, &xdp);
766 if (unlikely(!sent)) {
750 trace_xdp_exception(vi->dev, xdp_prog, act); 767 trace_xdp_exception(vi->dev, xdp_prog, act);
751 else 768 if (unlikely(xdp_page != page))
752 *xdp_xmit = true; 769 put_page(xdp_page);
770 goto err_xdp;
771 }
772 *xdp_xmit = true;
753 if (unlikely(xdp_page != page)) 773 if (unlikely(xdp_page != page))
754 goto err_xdp; 774 goto err_xdp;
755 rcu_read_unlock(); 775 rcu_read_unlock();
756 goto xdp_xmit; 776 goto xdp_xmit;
757 case XDP_REDIRECT: 777 case XDP_REDIRECT:
758 err = xdp_do_redirect(dev, &xdp, xdp_prog); 778 err = xdp_do_redirect(dev, &xdp, xdp_prog);
759 if (!err) 779 if (err) {
760 *xdp_xmit = true; 780 if (unlikely(xdp_page != page))
781 put_page(xdp_page);
782 goto err_xdp;
783 }
784 *xdp_xmit = true;
785 if (unlikely(xdp_page != page))
786 goto err_xdp;
761 rcu_read_unlock(); 787 rcu_read_unlock();
762 goto xdp_xmit; 788 goto xdp_xmit;
763 default: 789 default:
@@ -1003,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
1003} 1029}
1004 1030
1005static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 1031static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
1006 struct ewma_pkt_len *avg_pkt_len) 1032 struct ewma_pkt_len *avg_pkt_len,
1033 unsigned int room)
1007{ 1034{
1008 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1035 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1009 unsigned int len; 1036 unsigned int len;
1010 1037
1011 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 1038 if (room)
1039 return PAGE_SIZE - room;
1040
1041 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
1012 rq->min_buf_len, PAGE_SIZE - hdr_len); 1042 rq->min_buf_len, PAGE_SIZE - hdr_len);
1043
1013 return ALIGN(len, L1_CACHE_BYTES); 1044 return ALIGN(len, L1_CACHE_BYTES);
1014} 1045}
1015 1046
@@ -1018,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
1018{ 1049{
1019 struct page_frag *alloc_frag = &rq->alloc_frag; 1050 struct page_frag *alloc_frag = &rq->alloc_frag;
1020 unsigned int headroom = virtnet_get_headroom(vi); 1051 unsigned int headroom = virtnet_get_headroom(vi);
1052 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
1053 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
1021 char *buf; 1054 char *buf;
1022 void *ctx; 1055 void *ctx;
1023 int err; 1056 int err;
1024 unsigned int len, hole; 1057 unsigned int len, hole;
1025 1058
1026 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); 1059 /* Extra tailroom is needed to satisfy XDP's assumption. This
1027 if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) 1060 * means rx frags coalescing won't work, but consider we've
1061 * disabled GSO for XDP, it won't be a big issue.
1062 */
1063 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
1064 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
1028 return -ENOMEM; 1065 return -ENOMEM;
1029 1066
1030 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1067 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1031 buf += headroom; /* advance address leaving hole at front of pkt */ 1068 buf += headroom; /* advance address leaving hole at front of pkt */
1032 get_page(alloc_frag->page); 1069 get_page(alloc_frag->page);
1033 alloc_frag->offset += len + headroom; 1070 alloc_frag->offset += len + room;
1034 hole = alloc_frag->size - alloc_frag->offset; 1071 hole = alloc_frag->size - alloc_frag->offset;
1035 if (hole < len + headroom) { 1072 if (hole < len + room) {
1036 /* To avoid internal fragmentation, if there is very likely not 1073 /* To avoid internal fragmentation, if there is very likely not
1037 * enough space for another buffer, add the remaining space to 1074 * enough space for another buffer, add the remaining space to
1038 * the current buffer. 1075 * the current buffer.
@@ -2175,8 +2212,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
2175 } 2212 }
2176 2213
2177 /* Make sure NAPI is not using any XDP TX queues for RX. */ 2214 /* Make sure NAPI is not using any XDP TX queues for RX. */
2178 for (i = 0; i < vi->max_queue_pairs; i++) 2215 if (netif_running(dev))
2179 napi_disable(&vi->rq[i].napi); 2216 for (i = 0; i < vi->max_queue_pairs; i++)
2217 napi_disable(&vi->rq[i].napi);
2180 2218
2181 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 2219 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
2182 err = _virtnet_set_queues(vi, curr_qp + xdp_qp); 2220 err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
@@ -2195,7 +2233,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
2195 } 2233 }
2196 if (old_prog) 2234 if (old_prog)
2197 bpf_prog_put(old_prog); 2235 bpf_prog_put(old_prog);
2198 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 2236 if (netif_running(dev))
2237 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2199 } 2238 }
2200 2239
2201 return 0; 2240 return 0;
@@ -2566,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
2566{ 2605{
2567 struct virtnet_info *vi = netdev_priv(queue->dev); 2606 struct virtnet_info *vi = netdev_priv(queue->dev);
2568 unsigned int queue_index = get_netdev_rx_queue_index(queue); 2607 unsigned int queue_index = get_netdev_rx_queue_index(queue);
2608 unsigned int headroom = virtnet_get_headroom(vi);
2609 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2569 struct ewma_pkt_len *avg; 2610 struct ewma_pkt_len *avg;
2570 2611
2571 BUG_ON(queue_index >= vi->max_queue_pairs); 2612 BUG_ON(queue_index >= vi->max_queue_pairs);
2572 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 2613 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
2573 return sprintf(buf, "%u\n", 2614 return sprintf(buf, "%u\n",
2574 get_mergeable_buf_len(&vi->rq[queue_index], avg)); 2615 get_mergeable_buf_len(&vi->rq[queue_index], avg,
2616 SKB_DATA_ALIGN(headroom + tailroom)));
2575} 2617}
2576 2618
2577static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 2619static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =