diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 62 |
1 files changed, 47 insertions, 15 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9bb9e562b893..23374603e4d9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, | |||
504 | page_off += *len; | 504 | page_off += *len; |
505 | 505 | ||
506 | while (--*num_buf) { | 506 | while (--*num_buf) { |
507 | int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||
507 | unsigned int buflen; | 508 | unsigned int buflen; |
508 | void *buf; | 509 | void *buf; |
509 | int off; | 510 | int off; |
@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, | |||
518 | /* guard against a misconfigured or uncooperative backend that | 519 | /* guard against a misconfigured or uncooperative backend that |
519 | * is sending packet larger than the MTU. | 520 | * is sending packet larger than the MTU. |
520 | */ | 521 | */ |
521 | if ((page_off + buflen) > PAGE_SIZE) { | 522 | if ((page_off + buflen + tailroom) > PAGE_SIZE) { |
522 | put_page(p); | 523 | put_page(p); |
523 | goto err_buf; | 524 | goto err_buf; |
524 | } | 525 | } |
@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
690 | unsigned int truesize; | 691 | unsigned int truesize; |
691 | unsigned int headroom = mergeable_ctx_to_headroom(ctx); | 692 | unsigned int headroom = mergeable_ctx_to_headroom(ctx); |
692 | bool sent; | 693 | bool sent; |
694 | int err; | ||
693 | 695 | ||
694 | head_skb = NULL; | 696 | head_skb = NULL; |
695 | 697 | ||
@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
701 | void *data; | 703 | void *data; |
702 | u32 act; | 704 | u32 act; |
703 | 705 | ||
704 | /* This happens when rx buffer size is underestimated */ | 706 | /* This happens when rx buffer size is underestimated |
707 | * or headroom is not enough because of the buffer | ||
708 | * was refilled before XDP is set. This should only | ||
709 | * happen for the first several packets, so we don't | ||
710 | * care much about its performance. | ||
711 | */ | ||
705 | if (unlikely(num_buf > 1 || | 712 | if (unlikely(num_buf > 1 || |
706 | headroom < virtnet_get_headroom(vi))) { | 713 | headroom < virtnet_get_headroom(vi))) { |
707 | /* linearize data for XDP */ | 714 | /* linearize data for XDP */ |
@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
736 | 743 | ||
737 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | 744 | act = bpf_prog_run_xdp(xdp_prog, &xdp); |
738 | 745 | ||
739 | if (act != XDP_PASS) | ||
740 | ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); | ||
741 | |||
742 | switch (act) { | 746 | switch (act) { |
743 | case XDP_PASS: | 747 | case XDP_PASS: |
744 | /* recalculate offset to account for any header | 748 | /* recalculate offset to account for any header |
@@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
770 | goto err_xdp; | 774 | goto err_xdp; |
771 | rcu_read_unlock(); | 775 | rcu_read_unlock(); |
772 | goto xdp_xmit; | 776 | goto xdp_xmit; |
777 | case XDP_REDIRECT: | ||
778 | err = xdp_do_redirect(dev, &xdp, xdp_prog); | ||
779 | if (err) { | ||
780 | if (unlikely(xdp_page != page)) | ||
781 | put_page(xdp_page); | ||
782 | goto err_xdp; | ||
783 | } | ||
784 | *xdp_xmit = true; | ||
785 | if (unlikely(xdp_page != page)) | ||
786 | goto err_xdp; | ||
787 | rcu_read_unlock(); | ||
788 | goto xdp_xmit; | ||
773 | default: | 789 | default: |
774 | bpf_warn_invalid_xdp_action(act); | 790 | bpf_warn_invalid_xdp_action(act); |
775 | case XDP_ABORTED: | 791 | case XDP_ABORTED: |
@@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, | |||
1013 | } | 1029 | } |
1014 | 1030 | ||
1015 | static unsigned int get_mergeable_buf_len(struct receive_queue *rq, | 1031 | static unsigned int get_mergeable_buf_len(struct receive_queue *rq, |
1016 | struct ewma_pkt_len *avg_pkt_len) | 1032 | struct ewma_pkt_len *avg_pkt_len, |
1033 | unsigned int room) | ||
1017 | { | 1034 | { |
1018 | const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); | 1035 | const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); |
1019 | unsigned int len; | 1036 | unsigned int len; |
1020 | 1037 | ||
1021 | len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), | 1038 | if (room) |
1039 | return PAGE_SIZE - room; | ||
1040 | |||
1041 | len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), | ||
1022 | rq->min_buf_len, PAGE_SIZE - hdr_len); | 1042 | rq->min_buf_len, PAGE_SIZE - hdr_len); |
1043 | |||
1023 | return ALIGN(len, L1_CACHE_BYTES); | 1044 | return ALIGN(len, L1_CACHE_BYTES); |
1024 | } | 1045 | } |
1025 | 1046 | ||
@@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, | |||
1028 | { | 1049 | { |
1029 | struct page_frag *alloc_frag = &rq->alloc_frag; | 1050 | struct page_frag *alloc_frag = &rq->alloc_frag; |
1030 | unsigned int headroom = virtnet_get_headroom(vi); | 1051 | unsigned int headroom = virtnet_get_headroom(vi); |
1052 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; | ||
1053 | unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); | ||
1031 | char *buf; | 1054 | char *buf; |
1032 | void *ctx; | 1055 | void *ctx; |
1033 | int err; | 1056 | int err; |
1034 | unsigned int len, hole; | 1057 | unsigned int len, hole; |
1035 | 1058 | ||
1036 | len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); | 1059 | /* Extra tailroom is needed to satisfy XDP's assumption. This |
1037 | if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) | 1060 | * means rx frags coalescing won't work, but consider we've |
1061 | * disabled GSO for XDP, it won't be a big issue. | ||
1062 | */ | ||
1063 | len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); | ||
1064 | if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) | ||
1038 | return -ENOMEM; | 1065 | return -ENOMEM; |
1039 | 1066 | ||
1040 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; | 1067 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; |
1041 | buf += headroom; /* advance address leaving hole at front of pkt */ | 1068 | buf += headroom; /* advance address leaving hole at front of pkt */ |
1042 | get_page(alloc_frag->page); | 1069 | get_page(alloc_frag->page); |
1043 | alloc_frag->offset += len + headroom; | 1070 | alloc_frag->offset += len + room; |
1044 | hole = alloc_frag->size - alloc_frag->offset; | 1071 | hole = alloc_frag->size - alloc_frag->offset; |
1045 | if (hole < len + headroom) { | 1072 | if (hole < len + room) { |
1046 | /* To avoid internal fragmentation, if there is very likely not | 1073 | /* To avoid internal fragmentation, if there is very likely not |
1047 | * enough space for another buffer, add the remaining space to | 1074 | * enough space for another buffer, add the remaining space to |
1048 | * the current buffer. | 1075 | * the current buffer. |
@@ -2185,8 +2212,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, | |||
2185 | } | 2212 | } |
2186 | 2213 | ||
2187 | /* Make sure NAPI is not using any XDP TX queues for RX. */ | 2214 | /* Make sure NAPI is not using any XDP TX queues for RX. */ |
2188 | for (i = 0; i < vi->max_queue_pairs; i++) | 2215 | if (netif_running(dev)) |
2189 | napi_disable(&vi->rq[i].napi); | 2216 | for (i = 0; i < vi->max_queue_pairs; i++) |
2217 | napi_disable(&vi->rq[i].napi); | ||
2190 | 2218 | ||
2191 | netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); | 2219 | netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); |
2192 | err = _virtnet_set_queues(vi, curr_qp + xdp_qp); | 2220 | err = _virtnet_set_queues(vi, curr_qp + xdp_qp); |
@@ -2205,7 +2233,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, | |||
2205 | } | 2233 | } |
2206 | if (old_prog) | 2234 | if (old_prog) |
2207 | bpf_prog_put(old_prog); | 2235 | bpf_prog_put(old_prog); |
2208 | virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); | 2236 | if (netif_running(dev)) |
2237 | virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); | ||
2209 | } | 2238 | } |
2210 | 2239 | ||
2211 | return 0; | 2240 | return 0; |
@@ -2576,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, | |||
2576 | { | 2605 | { |
2577 | struct virtnet_info *vi = netdev_priv(queue->dev); | 2606 | struct virtnet_info *vi = netdev_priv(queue->dev); |
2578 | unsigned int queue_index = get_netdev_rx_queue_index(queue); | 2607 | unsigned int queue_index = get_netdev_rx_queue_index(queue); |
2608 | unsigned int headroom = virtnet_get_headroom(vi); | ||
2609 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; | ||
2579 | struct ewma_pkt_len *avg; | 2610 | struct ewma_pkt_len *avg; |
2580 | 2611 | ||
2581 | BUG_ON(queue_index >= vi->max_queue_pairs); | 2612 | BUG_ON(queue_index >= vi->max_queue_pairs); |
2582 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; | 2613 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; |
2583 | return sprintf(buf, "%u\n", | 2614 | return sprintf(buf, "%u\n", |
2584 | get_mergeable_buf_len(&vi->rq[queue_index], avg)); | 2615 | get_mergeable_buf_len(&vi->rq[queue_index], avg, |
2616 | SKB_DATA_ALIGN(headroom + tailroom))); | ||
2585 | } | 2617 | } |
2586 | 2618 | ||
2587 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = | 2619 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = |