diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 110 |
1 files changed, 76 insertions, 34 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 626c27352ae2..23374603e4d9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -443,12 +443,8 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, | |||
443 | sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); | 443 | sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); |
444 | 444 | ||
445 | err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); | 445 | err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); |
446 | if (unlikely(err)) { | 446 | if (unlikely(err)) |
447 | struct page *page = virt_to_head_page(xdp->data); | 447 | return false; /* Caller handle free/refcnt */ |
448 | |||
449 | put_page(page); | ||
450 | return false; | ||
451 | } | ||
452 | 448 | ||
453 | return true; | 449 | return true; |
454 | } | 450 | } |
@@ -456,8 +452,18 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, | |||
456 | static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) | 452 | static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) |
457 | { | 453 | { |
458 | struct virtnet_info *vi = netdev_priv(dev); | 454 | struct virtnet_info *vi = netdev_priv(dev); |
459 | bool sent = __virtnet_xdp_xmit(vi, xdp); | 455 | struct receive_queue *rq = vi->rq; |
456 | struct bpf_prog *xdp_prog; | ||
457 | bool sent; | ||
458 | |||
459 | /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this | ||
460 | * indicate XDP resources have been successfully allocated. | ||
461 | */ | ||
462 | xdp_prog = rcu_dereference(rq->xdp_prog); | ||
463 | if (!xdp_prog) | ||
464 | return -ENXIO; | ||
460 | 465 | ||
466 | sent = __virtnet_xdp_xmit(vi, xdp); | ||
461 | if (!sent) | 467 | if (!sent) |
462 | return -ENOSPC; | 468 | return -ENOSPC; |
463 | return 0; | 469 | return 0; |
@@ -498,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, | |||
498 | page_off += *len; | 504 | page_off += *len; |
499 | 505 | ||
500 | while (--*num_buf) { | 506 | while (--*num_buf) { |
507 | int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||
501 | unsigned int buflen; | 508 | unsigned int buflen; |
502 | void *buf; | 509 | void *buf; |
503 | int off; | 510 | int off; |
@@ -512,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, | |||
512 | /* guard against a misconfigured or uncooperative backend that | 519 | /* guard against a misconfigured or uncooperative backend that |
513 | * is sending packet larger than the MTU. | 520 | * is sending packet larger than the MTU. |
514 | */ | 521 | */ |
515 | if ((page_off + buflen) > PAGE_SIZE) { | 522 | if ((page_off + buflen + tailroom) > PAGE_SIZE) { |
516 | put_page(p); | 523 | put_page(p); |
517 | goto err_buf; | 524 | goto err_buf; |
518 | } | 525 | } |
@@ -546,8 +553,11 @@ static struct sk_buff *receive_small(struct net_device *dev, | |||
546 | unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + | 553 | unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + |
547 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 554 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
548 | struct page *page = virt_to_head_page(buf); | 555 | struct page *page = virt_to_head_page(buf); |
549 | unsigned int delta = 0, err; | 556 | unsigned int delta = 0; |
550 | struct page *xdp_page; | 557 | struct page *xdp_page; |
558 | bool sent; | ||
559 | int err; | ||
560 | |||
551 | len -= vi->hdr_len; | 561 | len -= vi->hdr_len; |
552 | 562 | ||
553 | rcu_read_lock(); | 563 | rcu_read_lock(); |
@@ -558,7 +568,7 @@ static struct sk_buff *receive_small(struct net_device *dev, | |||
558 | void *orig_data; | 568 | void *orig_data; |
559 | u32 act; | 569 | u32 act; |
560 | 570 | ||
561 | if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) | 571 | if (unlikely(hdr->hdr.gso_type)) |
562 | goto err_xdp; | 572 | goto err_xdp; |
563 | 573 | ||
564 | if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { | 574 | if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { |
@@ -596,16 +606,19 @@ static struct sk_buff *receive_small(struct net_device *dev, | |||
596 | delta = orig_data - xdp.data; | 606 | delta = orig_data - xdp.data; |
597 | break; | 607 | break; |
598 | case XDP_TX: | 608 | case XDP_TX: |
599 | if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) | 609 | sent = __virtnet_xdp_xmit(vi, &xdp); |
610 | if (unlikely(!sent)) { | ||
600 | trace_xdp_exception(vi->dev, xdp_prog, act); | 611 | trace_xdp_exception(vi->dev, xdp_prog, act); |
601 | else | 612 | goto err_xdp; |
602 | *xdp_xmit = true; | 613 | } |
614 | *xdp_xmit = true; | ||
603 | rcu_read_unlock(); | 615 | rcu_read_unlock(); |
604 | goto xdp_xmit; | 616 | goto xdp_xmit; |
605 | case XDP_REDIRECT: | 617 | case XDP_REDIRECT: |
606 | err = xdp_do_redirect(dev, &xdp, xdp_prog); | 618 | err = xdp_do_redirect(dev, &xdp, xdp_prog); |
607 | if (!err) | 619 | if (err) |
608 | *xdp_xmit = true; | 620 | goto err_xdp; |
621 | *xdp_xmit = true; | ||
609 | rcu_read_unlock(); | 622 | rcu_read_unlock(); |
610 | goto xdp_xmit; | 623 | goto xdp_xmit; |
611 | default: | 624 | default: |
@@ -677,6 +690,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
677 | struct bpf_prog *xdp_prog; | 690 | struct bpf_prog *xdp_prog; |
678 | unsigned int truesize; | 691 | unsigned int truesize; |
679 | unsigned int headroom = mergeable_ctx_to_headroom(ctx); | 692 | unsigned int headroom = mergeable_ctx_to_headroom(ctx); |
693 | bool sent; | ||
680 | int err; | 694 | int err; |
681 | 695 | ||
682 | head_skb = NULL; | 696 | head_skb = NULL; |
@@ -689,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
689 | void *data; | 703 | void *data; |
690 | u32 act; | 704 | u32 act; |
691 | 705 | ||
692 | /* This happens when rx buffer size is underestimated */ | 706 | /* This happens when rx buffer size is underestimated |
707 | * or headroom is not enough because of the buffer | ||
708 | * was refilled before XDP is set. This should only | ||
709 | * happen for the first several packets, so we don't | ||
710 | * care much about its performance. | ||
711 | */ | ||
693 | if (unlikely(num_buf > 1 || | 712 | if (unlikely(num_buf > 1 || |
694 | headroom < virtnet_get_headroom(vi))) { | 713 | headroom < virtnet_get_headroom(vi))) { |
695 | /* linearize data for XDP */ | 714 | /* linearize data for XDP */ |
@@ -724,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
724 | 743 | ||
725 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | 744 | act = bpf_prog_run_xdp(xdp_prog, &xdp); |
726 | 745 | ||
727 | if (act != XDP_PASS) | ||
728 | ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); | ||
729 | |||
730 | switch (act) { | 746 | switch (act) { |
731 | case XDP_PASS: | 747 | case XDP_PASS: |
732 | /* recalculate offset to account for any header | 748 | /* recalculate offset to account for any header |
@@ -746,18 +762,28 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
746 | } | 762 | } |
747 | break; | 763 | break; |
748 | case XDP_TX: | 764 | case XDP_TX: |
749 | if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) | 765 | sent = __virtnet_xdp_xmit(vi, &xdp); |
766 | if (unlikely(!sent)) { | ||
750 | trace_xdp_exception(vi->dev, xdp_prog, act); | 767 | trace_xdp_exception(vi->dev, xdp_prog, act); |
751 | else | 768 | if (unlikely(xdp_page != page)) |
752 | *xdp_xmit = true; | 769 | put_page(xdp_page); |
770 | goto err_xdp; | ||
771 | } | ||
772 | *xdp_xmit = true; | ||
753 | if (unlikely(xdp_page != page)) | 773 | if (unlikely(xdp_page != page)) |
754 | goto err_xdp; | 774 | goto err_xdp; |
755 | rcu_read_unlock(); | 775 | rcu_read_unlock(); |
756 | goto xdp_xmit; | 776 | goto xdp_xmit; |
757 | case XDP_REDIRECT: | 777 | case XDP_REDIRECT: |
758 | err = xdp_do_redirect(dev, &xdp, xdp_prog); | 778 | err = xdp_do_redirect(dev, &xdp, xdp_prog); |
759 | if (!err) | 779 | if (err) { |
760 | *xdp_xmit = true; | 780 | if (unlikely(xdp_page != page)) |
781 | put_page(xdp_page); | ||
782 | goto err_xdp; | ||
783 | } | ||
784 | *xdp_xmit = true; | ||
785 | if (unlikely(xdp_page != page)) | ||
786 | goto err_xdp; | ||
761 | rcu_read_unlock(); | 787 | rcu_read_unlock(); |
762 | goto xdp_xmit; | 788 | goto xdp_xmit; |
763 | default: | 789 | default: |
@@ -1003,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, | |||
1003 | } | 1029 | } |
1004 | 1030 | ||
1005 | static unsigned int get_mergeable_buf_len(struct receive_queue *rq, | 1031 | static unsigned int get_mergeable_buf_len(struct receive_queue *rq, |
1006 | struct ewma_pkt_len *avg_pkt_len) | 1032 | struct ewma_pkt_len *avg_pkt_len, |
1033 | unsigned int room) | ||
1007 | { | 1034 | { |
1008 | const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); | 1035 | const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); |
1009 | unsigned int len; | 1036 | unsigned int len; |
1010 | 1037 | ||
1011 | len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), | 1038 | if (room) |
1039 | return PAGE_SIZE - room; | ||
1040 | |||
1041 | len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), | ||
1012 | rq->min_buf_len, PAGE_SIZE - hdr_len); | 1042 | rq->min_buf_len, PAGE_SIZE - hdr_len); |
1043 | |||
1013 | return ALIGN(len, L1_CACHE_BYTES); | 1044 | return ALIGN(len, L1_CACHE_BYTES); |
1014 | } | 1045 | } |
1015 | 1046 | ||
@@ -1018,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, | |||
1018 | { | 1049 | { |
1019 | struct page_frag *alloc_frag = &rq->alloc_frag; | 1050 | struct page_frag *alloc_frag = &rq->alloc_frag; |
1020 | unsigned int headroom = virtnet_get_headroom(vi); | 1051 | unsigned int headroom = virtnet_get_headroom(vi); |
1052 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; | ||
1053 | unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); | ||
1021 | char *buf; | 1054 | char *buf; |
1022 | void *ctx; | 1055 | void *ctx; |
1023 | int err; | 1056 | int err; |
1024 | unsigned int len, hole; | 1057 | unsigned int len, hole; |
1025 | 1058 | ||
1026 | len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); | 1059 | /* Extra tailroom is needed to satisfy XDP's assumption. This |
1027 | if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) | 1060 | * means rx frags coalescing won't work, but consider we've |
1061 | * disabled GSO for XDP, it won't be a big issue. | ||
1062 | */ | ||
1063 | len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); | ||
1064 | if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) | ||
1028 | return -ENOMEM; | 1065 | return -ENOMEM; |
1029 | 1066 | ||
1030 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; | 1067 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; |
1031 | buf += headroom; /* advance address leaving hole at front of pkt */ | 1068 | buf += headroom; /* advance address leaving hole at front of pkt */ |
1032 | get_page(alloc_frag->page); | 1069 | get_page(alloc_frag->page); |
1033 | alloc_frag->offset += len + headroom; | 1070 | alloc_frag->offset += len + room; |
1034 | hole = alloc_frag->size - alloc_frag->offset; | 1071 | hole = alloc_frag->size - alloc_frag->offset; |
1035 | if (hole < len + headroom) { | 1072 | if (hole < len + room) { |
1036 | /* To avoid internal fragmentation, if there is very likely not | 1073 | /* To avoid internal fragmentation, if there is very likely not |
1037 | * enough space for another buffer, add the remaining space to | 1074 | * enough space for another buffer, add the remaining space to |
1038 | * the current buffer. | 1075 | * the current buffer. |
@@ -2175,8 +2212,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, | |||
2175 | } | 2212 | } |
2176 | 2213 | ||
2177 | /* Make sure NAPI is not using any XDP TX queues for RX. */ | 2214 | /* Make sure NAPI is not using any XDP TX queues for RX. */ |
2178 | for (i = 0; i < vi->max_queue_pairs; i++) | 2215 | if (netif_running(dev)) |
2179 | napi_disable(&vi->rq[i].napi); | 2216 | for (i = 0; i < vi->max_queue_pairs; i++) |
2217 | napi_disable(&vi->rq[i].napi); | ||
2180 | 2218 | ||
2181 | netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); | 2219 | netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); |
2182 | err = _virtnet_set_queues(vi, curr_qp + xdp_qp); | 2220 | err = _virtnet_set_queues(vi, curr_qp + xdp_qp); |
@@ -2195,7 +2233,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, | |||
2195 | } | 2233 | } |
2196 | if (old_prog) | 2234 | if (old_prog) |
2197 | bpf_prog_put(old_prog); | 2235 | bpf_prog_put(old_prog); |
2198 | virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); | 2236 | if (netif_running(dev)) |
2237 | virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); | ||
2199 | } | 2238 | } |
2200 | 2239 | ||
2201 | return 0; | 2240 | return 0; |
@@ -2566,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, | |||
2566 | { | 2605 | { |
2567 | struct virtnet_info *vi = netdev_priv(queue->dev); | 2606 | struct virtnet_info *vi = netdev_priv(queue->dev); |
2568 | unsigned int queue_index = get_netdev_rx_queue_index(queue); | 2607 | unsigned int queue_index = get_netdev_rx_queue_index(queue); |
2608 | unsigned int headroom = virtnet_get_headroom(vi); | ||
2609 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; | ||
2569 | struct ewma_pkt_len *avg; | 2610 | struct ewma_pkt_len *avg; |
2570 | 2611 | ||
2571 | BUG_ON(queue_index >= vi->max_queue_pairs); | 2612 | BUG_ON(queue_index >= vi->max_queue_pairs); |
2572 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; | 2613 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; |
2573 | return sprintf(buf, "%u\n", | 2614 | return sprintf(buf, "%u\n", |
2574 | get_mergeable_buf_len(&vi->rq[queue_index], avg)); | 2615 | get_mergeable_buf_len(&vi->rq[queue_index], avg, |
2616 | SKB_DATA_ALIGN(headroom + tailroom))); | ||
2575 | } | 2617 | } |
2576 | 2618 | ||
2577 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = | 2619 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = |