diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 257 |
1 files changed, 164 insertions, 93 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5d776447d9c3..5632a99cbbd2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -13,8 +13,7 @@ | |||
13 | * GNU General Public License for more details. | 13 | * GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | 17 | */ |
19 | //#define DEBUG | 18 | //#define DEBUG |
20 | #include <linux/netdevice.h> | 19 | #include <linux/netdevice.h> |
@@ -27,6 +26,7 @@ | |||
27 | #include <linux/if_vlan.h> | 26 | #include <linux/if_vlan.h> |
28 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
29 | #include <linux/cpu.h> | 28 | #include <linux/cpu.h> |
29 | #include <linux/average.h> | ||
30 | 30 | ||
31 | static int napi_weight = NAPI_POLL_WEIGHT; | 31 | static int napi_weight = NAPI_POLL_WEIGHT; |
32 | module_param(napi_weight, int, 0444); | 32 | module_param(napi_weight, int, 0444); |
@@ -37,11 +37,18 @@ module_param(gso, bool, 0444); | |||
37 | 37 | ||
38 | /* FIXME: MTU in config. */ | 38 | /* FIXME: MTU in config. */ |
39 | #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) | 39 | #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) |
40 | #define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \ | ||
41 | sizeof(struct virtio_net_hdr_mrg_rxbuf), \ | ||
42 | L1_CACHE_BYTES)) | ||
43 | #define GOOD_COPY_LEN 128 | 40 | #define GOOD_COPY_LEN 128 |
44 | 41 | ||
42 | /* Weight used for the RX packet size EWMA. The average packet size is used to | ||
43 | * determine the packet buffer size when refilling RX rings. As the entire RX | ||
44 | * ring may be refilled at once, the weight is chosen so that the EWMA will be | ||
45 | * insensitive to short-term, transient changes in packet size. | ||
46 | */ | ||
47 | #define RECEIVE_AVG_WEIGHT 64 | ||
48 | |||
49 | /* Minimum alignment for mergeable packet buffers. */ | ||
50 | #define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256) | ||
51 | |||
45 | #define VIRTNET_DRIVER_VERSION "1.0.0" | 52 | #define VIRTNET_DRIVER_VERSION "1.0.0" |
46 | 53 | ||
47 | struct virtnet_stats { | 54 | struct virtnet_stats { |
@@ -73,12 +80,15 @@ struct receive_queue { | |||
73 | 80 | ||
74 | struct napi_struct napi; | 81 | struct napi_struct napi; |
75 | 82 | ||
76 | /* Number of input buffers, and max we've ever had. */ | ||
77 | unsigned int num, max; | ||
78 | |||
79 | /* Chain pages by the private ptr. */ | 83 | /* Chain pages by the private ptr. */ |
80 | struct page *pages; | 84 | struct page *pages; |
81 | 85 | ||
86 | /* Average packet length for mergeable receive buffers. */ | ||
87 | struct ewma mrg_avg_pkt_len; | ||
88 | |||
89 | /* Page frag for packet buffer allocation. */ | ||
90 | struct page_frag alloc_frag; | ||
91 | |||
82 | /* RX: fragments + linear part + virtio header */ | 92 | /* RX: fragments + linear part + virtio header */ |
83 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; | 93 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
84 | 94 | ||
@@ -127,11 +137,6 @@ struct virtnet_info { | |||
127 | /* Lock for config space updates */ | 137 | /* Lock for config space updates */ |
128 | struct mutex config_lock; | 138 | struct mutex config_lock; |
129 | 139 | ||
130 | /* Page_frag for GFP_KERNEL packet buffer allocation when we run | ||
131 | * low on memory. | ||
132 | */ | ||
133 | struct page_frag alloc_frag; | ||
134 | |||
135 | /* Does the affinity hint is set for virtqueues? */ | 140 | /* Does the affinity hint is set for virtqueues? */ |
136 | bool affinity_hint_set; | 141 | bool affinity_hint_set; |
137 | 142 | ||
@@ -222,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq) | |||
222 | netif_wake_subqueue(vi->dev, vq2txq(vq)); | 227 | netif_wake_subqueue(vi->dev, vq2txq(vq)); |
223 | } | 228 | } |
224 | 229 | ||
230 | static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx) | ||
231 | { | ||
232 | unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1); | ||
233 | return (truesize + 1) * MERGEABLE_BUFFER_ALIGN; | ||
234 | } | ||
235 | |||
236 | static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx) | ||
237 | { | ||
238 | return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN); | ||
239 | |||
240 | } | ||
241 | |||
242 | static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize) | ||
243 | { | ||
244 | unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN; | ||
245 | return (unsigned long)buf | (size - 1); | ||
246 | } | ||
247 | |||
225 | /* Called from bottom half context */ | 248 | /* Called from bottom half context */ |
226 | static struct sk_buff *page_to_skb(struct receive_queue *rq, | 249 | static struct sk_buff *page_to_skb(struct receive_queue *rq, |
227 | struct page *page, unsigned int offset, | 250 | struct page *page, unsigned int offset, |
@@ -330,38 +353,34 @@ err: | |||
330 | 353 | ||
331 | static struct sk_buff *receive_mergeable(struct net_device *dev, | 354 | static struct sk_buff *receive_mergeable(struct net_device *dev, |
332 | struct receive_queue *rq, | 355 | struct receive_queue *rq, |
333 | void *buf, | 356 | unsigned long ctx, |
334 | unsigned int len) | 357 | unsigned int len) |
335 | { | 358 | { |
359 | void *buf = mergeable_ctx_to_buf_address(ctx); | ||
336 | struct skb_vnet_hdr *hdr = buf; | 360 | struct skb_vnet_hdr *hdr = buf; |
337 | int num_buf = hdr->mhdr.num_buffers; | 361 | int num_buf = hdr->mhdr.num_buffers; |
338 | struct page *page = virt_to_head_page(buf); | 362 | struct page *page = virt_to_head_page(buf); |
339 | int offset = buf - page_address(page); | 363 | int offset = buf - page_address(page); |
340 | struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, | 364 | unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); |
341 | MERGE_BUFFER_LEN); | 365 | |
366 | struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize); | ||
342 | struct sk_buff *curr_skb = head_skb; | 367 | struct sk_buff *curr_skb = head_skb; |
343 | 368 | ||
344 | if (unlikely(!curr_skb)) | 369 | if (unlikely(!curr_skb)) |
345 | goto err_skb; | 370 | goto err_skb; |
346 | |||
347 | while (--num_buf) { | 371 | while (--num_buf) { |
348 | int num_skb_frags; | 372 | int num_skb_frags; |
349 | 373 | ||
350 | buf = virtqueue_get_buf(rq->vq, &len); | 374 | ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); |
351 | if (unlikely(!buf)) { | 375 | if (unlikely(!ctx)) { |
352 | pr_debug("%s: rx error: %d buffers out of %d missing\n", | 376 | pr_debug("%s: rx error: %d buffers out of %d missing\n", |
353 | dev->name, num_buf, hdr->mhdr.num_buffers); | 377 | dev->name, num_buf, hdr->mhdr.num_buffers); |
354 | dev->stats.rx_length_errors++; | 378 | dev->stats.rx_length_errors++; |
355 | goto err_buf; | 379 | goto err_buf; |
356 | } | 380 | } |
357 | if (unlikely(len > MERGE_BUFFER_LEN)) { | ||
358 | pr_debug("%s: rx error: merge buffer too long\n", | ||
359 | dev->name); | ||
360 | len = MERGE_BUFFER_LEN; | ||
361 | } | ||
362 | 381 | ||
382 | buf = mergeable_ctx_to_buf_address(ctx); | ||
363 | page = virt_to_head_page(buf); | 383 | page = virt_to_head_page(buf); |
364 | --rq->num; | ||
365 | 384 | ||
366 | num_skb_frags = skb_shinfo(curr_skb)->nr_frags; | 385 | num_skb_frags = skb_shinfo(curr_skb)->nr_frags; |
367 | if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { | 386 | if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { |
@@ -377,37 +396,38 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
377 | head_skb->truesize += nskb->truesize; | 396 | head_skb->truesize += nskb->truesize; |
378 | num_skb_frags = 0; | 397 | num_skb_frags = 0; |
379 | } | 398 | } |
399 | truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); | ||
380 | if (curr_skb != head_skb) { | 400 | if (curr_skb != head_skb) { |
381 | head_skb->data_len += len; | 401 | head_skb->data_len += len; |
382 | head_skb->len += len; | 402 | head_skb->len += len; |
383 | head_skb->truesize += MERGE_BUFFER_LEN; | 403 | head_skb->truesize += truesize; |
384 | } | 404 | } |
385 | offset = buf - page_address(page); | 405 | offset = buf - page_address(page); |
386 | if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { | 406 | if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { |
387 | put_page(page); | 407 | put_page(page); |
388 | skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, | 408 | skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, |
389 | len, MERGE_BUFFER_LEN); | 409 | len, truesize); |
390 | } else { | 410 | } else { |
391 | skb_add_rx_frag(curr_skb, num_skb_frags, page, | 411 | skb_add_rx_frag(curr_skb, num_skb_frags, page, |
392 | offset, len, MERGE_BUFFER_LEN); | 412 | offset, len, truesize); |
393 | } | 413 | } |
394 | } | 414 | } |
395 | 415 | ||
416 | ewma_add(&rq->mrg_avg_pkt_len, head_skb->len); | ||
396 | return head_skb; | 417 | return head_skb; |
397 | 418 | ||
398 | err_skb: | 419 | err_skb: |
399 | put_page(page); | 420 | put_page(page); |
400 | while (--num_buf) { | 421 | while (--num_buf) { |
401 | buf = virtqueue_get_buf(rq->vq, &len); | 422 | ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); |
402 | if (unlikely(!buf)) { | 423 | if (unlikely(!ctx)) { |
403 | pr_debug("%s: rx error: %d buffers missing\n", | 424 | pr_debug("%s: rx error: %d buffers missing\n", |
404 | dev->name, num_buf); | 425 | dev->name, num_buf); |
405 | dev->stats.rx_length_errors++; | 426 | dev->stats.rx_length_errors++; |
406 | break; | 427 | break; |
407 | } | 428 | } |
408 | page = virt_to_head_page(buf); | 429 | page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx)); |
409 | put_page(page); | 430 | put_page(page); |
410 | --rq->num; | ||
411 | } | 431 | } |
412 | err_buf: | 432 | err_buf: |
413 | dev->stats.rx_dropped++; | 433 | dev->stats.rx_dropped++; |
@@ -426,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) | |||
426 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { | 446 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { |
427 | pr_debug("%s: short packet %i\n", dev->name, len); | 447 | pr_debug("%s: short packet %i\n", dev->name, len); |
428 | dev->stats.rx_length_errors++; | 448 | dev->stats.rx_length_errors++; |
429 | if (vi->mergeable_rx_bufs) | 449 | if (vi->mergeable_rx_bufs) { |
430 | put_page(virt_to_head_page(buf)); | 450 | unsigned long ctx = (unsigned long)buf; |
431 | else if (vi->big_packets) | 451 | void *base = mergeable_ctx_to_buf_address(ctx); |
452 | put_page(virt_to_head_page(base)); | ||
453 | } else if (vi->big_packets) { | ||
432 | give_pages(rq, buf); | 454 | give_pages(rq, buf); |
433 | else | 455 | } else { |
434 | dev_kfree_skb(buf); | 456 | dev_kfree_skb(buf); |
457 | } | ||
435 | return; | 458 | return; |
436 | } | 459 | } |
437 | 460 | ||
438 | if (vi->mergeable_rx_bufs) | 461 | if (vi->mergeable_rx_bufs) |
439 | skb = receive_mergeable(dev, rq, buf, len); | 462 | skb = receive_mergeable(dev, rq, (unsigned long)buf, len); |
440 | else if (vi->big_packets) | 463 | else if (vi->big_packets) |
441 | skb = receive_big(dev, rq, buf, len); | 464 | skb = receive_big(dev, rq, buf, len); |
442 | else | 465 | else |
@@ -577,28 +600,45 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) | |||
577 | return err; | 600 | return err; |
578 | } | 601 | } |
579 | 602 | ||
603 | static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len) | ||
604 | { | ||
605 | const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); | ||
606 | unsigned int len; | ||
607 | |||
608 | len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len), | ||
609 | GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); | ||
610 | return ALIGN(len, MERGEABLE_BUFFER_ALIGN); | ||
611 | } | ||
612 | |||
580 | static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) | 613 | static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) |
581 | { | 614 | { |
582 | struct virtnet_info *vi = rq->vq->vdev->priv; | 615 | struct page_frag *alloc_frag = &rq->alloc_frag; |
583 | char *buf = NULL; | 616 | char *buf; |
617 | unsigned long ctx; | ||
584 | int err; | 618 | int err; |
619 | unsigned int len, hole; | ||
585 | 620 | ||
586 | if (gfp & __GFP_WAIT) { | 621 | len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); |
587 | if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, | 622 | if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) |
588 | gfp)) { | ||
589 | buf = (char *)page_address(vi->alloc_frag.page) + | ||
590 | vi->alloc_frag.offset; | ||
591 | get_page(vi->alloc_frag.page); | ||
592 | vi->alloc_frag.offset += MERGE_BUFFER_LEN; | ||
593 | } | ||
594 | } else { | ||
595 | buf = netdev_alloc_frag(MERGE_BUFFER_LEN); | ||
596 | } | ||
597 | if (!buf) | ||
598 | return -ENOMEM; | 623 | return -ENOMEM; |
599 | 624 | ||
600 | sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); | 625 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; |
601 | err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); | 626 | ctx = mergeable_buf_to_ctx(buf, len); |
627 | get_page(alloc_frag->page); | ||
628 | alloc_frag->offset += len; | ||
629 | hole = alloc_frag->size - alloc_frag->offset; | ||
630 | if (hole < len) { | ||
631 | /* To avoid internal fragmentation, if there is very likely not | ||
632 | * enough space for another buffer, add the remaining space to | ||
633 | * the current buffer. This extra space is not included in | ||
634 | * the truesize stored in ctx. | ||
635 | */ | ||
636 | len += hole; | ||
637 | alloc_frag->offset += hole; | ||
638 | } | ||
639 | |||
640 | sg_init_one(rq->sg, buf, len); | ||
641 | err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp); | ||
602 | if (err < 0) | 642 | if (err < 0) |
603 | put_page(virt_to_head_page(buf)); | 643 | put_page(virt_to_head_page(buf)); |
604 | 644 | ||
@@ -618,6 +658,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) | |||
618 | int err; | 658 | int err; |
619 | bool oom; | 659 | bool oom; |
620 | 660 | ||
661 | gfp |= __GFP_COLD; | ||
621 | do { | 662 | do { |
622 | if (vi->mergeable_rx_bufs) | 663 | if (vi->mergeable_rx_bufs) |
623 | err = add_recvbuf_mergeable(rq, gfp); | 664 | err = add_recvbuf_mergeable(rq, gfp); |
@@ -629,10 +670,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) | |||
629 | oom = err == -ENOMEM; | 670 | oom = err == -ENOMEM; |
630 | if (err) | 671 | if (err) |
631 | break; | 672 | break; |
632 | ++rq->num; | ||
633 | } while (rq->vq->num_free); | 673 | } while (rq->vq->num_free); |
634 | if (unlikely(rq->num > rq->max)) | ||
635 | rq->max = rq->num; | ||
636 | if (unlikely(!virtqueue_kick(rq->vq))) | 674 | if (unlikely(!virtqueue_kick(rq->vq))) |
637 | return false; | 675 | return false; |
638 | return !oom; | 676 | return !oom; |
@@ -700,11 +738,10 @@ again: | |||
700 | while (received < budget && | 738 | while (received < budget && |
701 | (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { | 739 | (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { |
702 | receive_buf(rq, buf, len); | 740 | receive_buf(rq, buf, len); |
703 | --rq->num; | ||
704 | received++; | 741 | received++; |
705 | } | 742 | } |
706 | 743 | ||
707 | if (rq->num < rq->max / 2) { | 744 | if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { |
708 | if (!try_fill_recv(rq, GFP_ATOMIC)) | 745 | if (!try_fill_recv(rq, GFP_ATOMIC)) |
709 | schedule_delayed_work(&vi->refill, 0); | 746 | schedule_delayed_work(&vi->refill, 0); |
710 | } | 747 | } |
@@ -874,16 +911,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
874 | /* | 911 | /* |
875 | * Send command via the control virtqueue and check status. Commands | 912 | * Send command via the control virtqueue and check status. Commands |
876 | * supported by the hypervisor, as indicated by feature bits, should | 913 | * supported by the hypervisor, as indicated by feature bits, should |
877 | * never fail unless improperly formated. | 914 | * never fail unless improperly formatted. |
878 | */ | 915 | */ |
879 | static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, | 916 | static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, |
880 | struct scatterlist *out, | 917 | struct scatterlist *out) |
881 | struct scatterlist *in) | ||
882 | { | 918 | { |
883 | struct scatterlist *sgs[4], hdr, stat; | 919 | struct scatterlist *sgs[4], hdr, stat; |
884 | struct virtio_net_ctrl_hdr ctrl; | 920 | struct virtio_net_ctrl_hdr ctrl; |
885 | virtio_net_ctrl_ack status = ~0; | 921 | virtio_net_ctrl_ack status = ~0; |
886 | unsigned out_num = 0, in_num = 0, tmp; | 922 | unsigned out_num = 0, tmp; |
887 | 923 | ||
888 | /* Caller should know better */ | 924 | /* Caller should know better */ |
889 | BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); | 925 | BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); |
@@ -896,16 +932,13 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, | |||
896 | 932 | ||
897 | if (out) | 933 | if (out) |
898 | sgs[out_num++] = out; | 934 | sgs[out_num++] = out; |
899 | if (in) | ||
900 | sgs[out_num + in_num++] = in; | ||
901 | 935 | ||
902 | /* Add return status. */ | 936 | /* Add return status. */ |
903 | sg_init_one(&stat, &status, sizeof(status)); | 937 | sg_init_one(&stat, &status, sizeof(status)); |
904 | sgs[out_num + in_num++] = &stat; | 938 | sgs[out_num] = &stat; |
905 | 939 | ||
906 | BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); | 940 | BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); |
907 | BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC) | 941 | BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC) < 0); |
908 | < 0); | ||
909 | 942 | ||
910 | if (unlikely(!virtqueue_kick(vi->cvq))) | 943 | if (unlikely(!virtqueue_kick(vi->cvq))) |
911 | return status == VIRTIO_NET_OK; | 944 | return status == VIRTIO_NET_OK; |
@@ -935,8 +968,7 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) | |||
935 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { | 968 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { |
936 | sg_init_one(&sg, addr->sa_data, dev->addr_len); | 969 | sg_init_one(&sg, addr->sa_data, dev->addr_len); |
937 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, | 970 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
938 | VIRTIO_NET_CTRL_MAC_ADDR_SET, | 971 | VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { |
939 | &sg, NULL)) { | ||
940 | dev_warn(&vdev->dev, | 972 | dev_warn(&vdev->dev, |
941 | "Failed to set mac address by vq command.\n"); | 973 | "Failed to set mac address by vq command.\n"); |
942 | return -EINVAL; | 974 | return -EINVAL; |
@@ -1009,7 +1041,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) | |||
1009 | { | 1041 | { |
1010 | rtnl_lock(); | 1042 | rtnl_lock(); |
1011 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, | 1043 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, |
1012 | VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL, NULL)) | 1044 | VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) |
1013 | dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); | 1045 | dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); |
1014 | rtnl_unlock(); | 1046 | rtnl_unlock(); |
1015 | } | 1047 | } |
@@ -1027,7 +1059,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) | |||
1027 | sg_init_one(&sg, &s, sizeof(s)); | 1059 | sg_init_one(&sg, &s, sizeof(s)); |
1028 | 1060 | ||
1029 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, | 1061 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, |
1030 | VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, NULL)) { | 1062 | VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { |
1031 | dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", | 1063 | dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", |
1032 | queue_pairs); | 1064 | queue_pairs); |
1033 | return -EINVAL; | 1065 | return -EINVAL; |
@@ -1067,7 +1099,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) | |||
1067 | void *buf; | 1099 | void *buf; |
1068 | int i; | 1100 | int i; |
1069 | 1101 | ||
1070 | /* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */ | 1102 | /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ |
1071 | if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) | 1103 | if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) |
1072 | return; | 1104 | return; |
1073 | 1105 | ||
@@ -1077,16 +1109,14 @@ static void virtnet_set_rx_mode(struct net_device *dev) | |||
1077 | sg_init_one(sg, &promisc, sizeof(promisc)); | 1109 | sg_init_one(sg, &promisc, sizeof(promisc)); |
1078 | 1110 | ||
1079 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, | 1111 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, |
1080 | VIRTIO_NET_CTRL_RX_PROMISC, | 1112 | VIRTIO_NET_CTRL_RX_PROMISC, sg)) |
1081 | sg, NULL)) | ||
1082 | dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", | 1113 | dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", |
1083 | promisc ? "en" : "dis"); | 1114 | promisc ? "en" : "dis"); |
1084 | 1115 | ||
1085 | sg_init_one(sg, &allmulti, sizeof(allmulti)); | 1116 | sg_init_one(sg, &allmulti, sizeof(allmulti)); |
1086 | 1117 | ||
1087 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, | 1118 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, |
1088 | VIRTIO_NET_CTRL_RX_ALLMULTI, | 1119 | VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) |
1089 | sg, NULL)) | ||
1090 | dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", | 1120 | dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", |
1091 | allmulti ? "en" : "dis"); | 1121 | allmulti ? "en" : "dis"); |
1092 | 1122 | ||
@@ -1122,8 +1152,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) | |||
1122 | sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); | 1152 | sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); |
1123 | 1153 | ||
1124 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, | 1154 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
1125 | VIRTIO_NET_CTRL_MAC_TABLE_SET, | 1155 | VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) |
1126 | sg, NULL)) | ||
1127 | dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); | 1156 | dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); |
1128 | 1157 | ||
1129 | kfree(buf); | 1158 | kfree(buf); |
@@ -1138,7 +1167,7 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev, | |||
1138 | sg_init_one(&sg, &vid, sizeof(vid)); | 1167 | sg_init_one(&sg, &vid, sizeof(vid)); |
1139 | 1168 | ||
1140 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, | 1169 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, |
1141 | VIRTIO_NET_CTRL_VLAN_ADD, &sg, NULL)) | 1170 | VIRTIO_NET_CTRL_VLAN_ADD, &sg)) |
1142 | dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); | 1171 | dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); |
1143 | return 0; | 1172 | return 0; |
1144 | } | 1173 | } |
@@ -1152,7 +1181,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, | |||
1152 | sg_init_one(&sg, &vid, sizeof(vid)); | 1181 | sg_init_one(&sg, &vid, sizeof(vid)); |
1153 | 1182 | ||
1154 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, | 1183 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, |
1155 | VIRTIO_NET_CTRL_VLAN_DEL, &sg, NULL)) | 1184 | VIRTIO_NET_CTRL_VLAN_DEL, &sg)) |
1156 | dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); | 1185 | dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); |
1157 | return 0; | 1186 | return 0; |
1158 | } | 1187 | } |
@@ -1386,6 +1415,14 @@ static void free_receive_bufs(struct virtnet_info *vi) | |||
1386 | } | 1415 | } |
1387 | } | 1416 | } |
1388 | 1417 | ||
1418 | static void free_receive_page_frags(struct virtnet_info *vi) | ||
1419 | { | ||
1420 | int i; | ||
1421 | for (i = 0; i < vi->max_queue_pairs; i++) | ||
1422 | if (vi->rq[i].alloc_frag.page) | ||
1423 | put_page(vi->rq[i].alloc_frag.page); | ||
1424 | } | ||
1425 | |||
1389 | static void free_unused_bufs(struct virtnet_info *vi) | 1426 | static void free_unused_bufs(struct virtnet_info *vi) |
1390 | { | 1427 | { |
1391 | void *buf; | 1428 | void *buf; |
@@ -1401,15 +1438,16 @@ static void free_unused_bufs(struct virtnet_info *vi) | |||
1401 | struct virtqueue *vq = vi->rq[i].vq; | 1438 | struct virtqueue *vq = vi->rq[i].vq; |
1402 | 1439 | ||
1403 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { | 1440 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { |
1404 | if (vi->mergeable_rx_bufs) | 1441 | if (vi->mergeable_rx_bufs) { |
1405 | put_page(virt_to_head_page(buf)); | 1442 | unsigned long ctx = (unsigned long)buf; |
1406 | else if (vi->big_packets) | 1443 | void *base = mergeable_ctx_to_buf_address(ctx); |
1444 | put_page(virt_to_head_page(base)); | ||
1445 | } else if (vi->big_packets) { | ||
1407 | give_pages(&vi->rq[i], buf); | 1446 | give_pages(&vi->rq[i], buf); |
1408 | else | 1447 | } else { |
1409 | dev_kfree_skb(buf); | 1448 | dev_kfree_skb(buf); |
1410 | --vi->rq[i].num; | 1449 | } |
1411 | } | 1450 | } |
1412 | BUG_ON(vi->rq[i].num != 0); | ||
1413 | } | 1451 | } |
1414 | } | 1452 | } |
1415 | 1453 | ||
@@ -1516,6 +1554,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) | |||
1516 | napi_weight); | 1554 | napi_weight); |
1517 | 1555 | ||
1518 | sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); | 1556 | sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); |
1557 | ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT); | ||
1519 | sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); | 1558 | sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); |
1520 | } | 1559 | } |
1521 | 1560 | ||
@@ -1552,6 +1591,33 @@ err: | |||
1552 | return ret; | 1591 | return ret; |
1553 | } | 1592 | } |
1554 | 1593 | ||
1594 | #ifdef CONFIG_SYSFS | ||
1595 | static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, | ||
1596 | struct rx_queue_attribute *attribute, char *buf) | ||
1597 | { | ||
1598 | struct virtnet_info *vi = netdev_priv(queue->dev); | ||
1599 | unsigned int queue_index = get_netdev_rx_queue_index(queue); | ||
1600 | struct ewma *avg; | ||
1601 | |||
1602 | BUG_ON(queue_index >= vi->max_queue_pairs); | ||
1603 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; | ||
1604 | return sprintf(buf, "%u\n", get_mergeable_buf_len(avg)); | ||
1605 | } | ||
1606 | |||
1607 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = | ||
1608 | __ATTR_RO(mergeable_rx_buffer_size); | ||
1609 | |||
1610 | static struct attribute *virtio_net_mrg_rx_attrs[] = { | ||
1611 | &mergeable_rx_buffer_size_attribute.attr, | ||
1612 | NULL | ||
1613 | }; | ||
1614 | |||
1615 | static const struct attribute_group virtio_net_mrg_rx_group = { | ||
1616 | .name = "virtio_net", | ||
1617 | .attrs = virtio_net_mrg_rx_attrs | ||
1618 | }; | ||
1619 | #endif | ||
1620 | |||
1555 | static int virtnet_probe(struct virtio_device *vdev) | 1621 | static int virtnet_probe(struct virtio_device *vdev) |
1556 | { | 1622 | { |
1557 | int i, err; | 1623 | int i, err; |
@@ -1645,7 +1711,8 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1645 | /* If we can receive ANY GSO packets, we must allocate large ones. */ | 1711 | /* If we can receive ANY GSO packets, we must allocate large ones. */ |
1646 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || | 1712 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
1647 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || | 1713 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || |
1648 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) | 1714 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) || |
1715 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO)) | ||
1649 | vi->big_packets = true; | 1716 | vi->big_packets = true; |
1650 | 1717 | ||
1651 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) | 1718 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) |
@@ -1666,6 +1733,10 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1666 | if (err) | 1733 | if (err) |
1667 | goto free_stats; | 1734 | goto free_stats; |
1668 | 1735 | ||
1736 | #ifdef CONFIG_SYSFS | ||
1737 | if (vi->mergeable_rx_bufs) | ||
1738 | dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; | ||
1739 | #endif | ||
1669 | netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); | 1740 | netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); |
1670 | netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); | 1741 | netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); |
1671 | 1742 | ||
@@ -1680,7 +1751,8 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1680 | try_fill_recv(&vi->rq[i], GFP_KERNEL); | 1751 | try_fill_recv(&vi->rq[i], GFP_KERNEL); |
1681 | 1752 | ||
1682 | /* If we didn't even get one input buffer, we're useless. */ | 1753 | /* If we didn't even get one input buffer, we're useless. */ |
1683 | if (vi->rq[i].num == 0) { | 1754 | if (vi->rq[i].vq->num_free == |
1755 | virtqueue_get_vring_size(vi->rq[i].vq)) { | ||
1684 | free_unused_bufs(vi); | 1756 | free_unused_bufs(vi); |
1685 | err = -ENOMEM; | 1757 | err = -ENOMEM; |
1686 | goto free_recv_bufs; | 1758 | goto free_recv_bufs; |
@@ -1714,9 +1786,8 @@ free_recv_bufs: | |||
1714 | unregister_netdev(dev); | 1786 | unregister_netdev(dev); |
1715 | free_vqs: | 1787 | free_vqs: |
1716 | cancel_delayed_work_sync(&vi->refill); | 1788 | cancel_delayed_work_sync(&vi->refill); |
1789 | free_receive_page_frags(vi); | ||
1717 | virtnet_del_vqs(vi); | 1790 | virtnet_del_vqs(vi); |
1718 | if (vi->alloc_frag.page) | ||
1719 | put_page(vi->alloc_frag.page); | ||
1720 | free_stats: | 1791 | free_stats: |
1721 | free_percpu(vi->stats); | 1792 | free_percpu(vi->stats); |
1722 | free: | 1793 | free: |
@@ -1733,6 +1804,8 @@ static void remove_vq_common(struct virtnet_info *vi) | |||
1733 | 1804 | ||
1734 | free_receive_bufs(vi); | 1805 | free_receive_bufs(vi); |
1735 | 1806 | ||
1807 | free_receive_page_frags(vi); | ||
1808 | |||
1736 | virtnet_del_vqs(vi); | 1809 | virtnet_del_vqs(vi); |
1737 | } | 1810 | } |
1738 | 1811 | ||
@@ -1750,8 +1823,6 @@ static void virtnet_remove(struct virtio_device *vdev) | |||
1750 | unregister_netdev(vi->dev); | 1823 | unregister_netdev(vi->dev); |
1751 | 1824 | ||
1752 | remove_vq_common(vi); | 1825 | remove_vq_common(vi); |
1753 | if (vi->alloc_frag.page) | ||
1754 | put_page(vi->alloc_frag.page); | ||
1755 | 1826 | ||
1756 | flush_work(&vi->config_work); | 1827 | flush_work(&vi->config_work); |
1757 | 1828 | ||