aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/virtio_net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r--drivers/net/virtio_net.c257
1 files changed, 164 insertions, 93 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 5d776447d9c3..5632a99cbbd2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -13,8 +13,7 @@
13 * GNU General Public License for more details. 13 * GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 17 */
19//#define DEBUG 18//#define DEBUG
20#include <linux/netdevice.h> 19#include <linux/netdevice.h>
@@ -27,6 +26,7 @@
27#include <linux/if_vlan.h> 26#include <linux/if_vlan.h>
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/cpu.h> 28#include <linux/cpu.h>
29#include <linux/average.h>
30 30
31static int napi_weight = NAPI_POLL_WEIGHT; 31static int napi_weight = NAPI_POLL_WEIGHT;
32module_param(napi_weight, int, 0444); 32module_param(napi_weight, int, 0444);
@@ -37,11 +37,18 @@ module_param(gso, bool, 0444);
37 37
38/* FIXME: MTU in config. */ 38/* FIXME: MTU in config. */
39#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 39#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
40#define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \
41 sizeof(struct virtio_net_hdr_mrg_rxbuf), \
42 L1_CACHE_BYTES))
43#define GOOD_COPY_LEN 128 40#define GOOD_COPY_LEN 128
44 41
42/* Weight used for the RX packet size EWMA. The average packet size is used to
43 * determine the packet buffer size when refilling RX rings. As the entire RX
44 * ring may be refilled at once, the weight is chosen so that the EWMA will be
45 * insensitive to short-term, transient changes in packet size.
46 */
47#define RECEIVE_AVG_WEIGHT 64
48
49/* Minimum alignment for mergeable packet buffers. */
50#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256)
51
45#define VIRTNET_DRIVER_VERSION "1.0.0" 52#define VIRTNET_DRIVER_VERSION "1.0.0"
46 53
47struct virtnet_stats { 54struct virtnet_stats {
@@ -73,12 +80,15 @@ struct receive_queue {
73 80
74 struct napi_struct napi; 81 struct napi_struct napi;
75 82
76 /* Number of input buffers, and max we've ever had. */
77 unsigned int num, max;
78
79 /* Chain pages by the private ptr. */ 83 /* Chain pages by the private ptr. */
80 struct page *pages; 84 struct page *pages;
81 85
86 /* Average packet length for mergeable receive buffers. */
87 struct ewma mrg_avg_pkt_len;
88
89 /* Page frag for packet buffer allocation. */
90 struct page_frag alloc_frag;
91
82 /* RX: fragments + linear part + virtio header */ 92 /* RX: fragments + linear part + virtio header */
83 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 93 struct scatterlist sg[MAX_SKB_FRAGS + 2];
84 94
@@ -127,11 +137,6 @@ struct virtnet_info {
127 /* Lock for config space updates */ 137 /* Lock for config space updates */
128 struct mutex config_lock; 138 struct mutex config_lock;
129 139
130 /* Page_frag for GFP_KERNEL packet buffer allocation when we run
131 * low on memory.
132 */
133 struct page_frag alloc_frag;
134
135 /* Does the affinity hint is set for virtqueues? */ 140 /* Does the affinity hint is set for virtqueues? */
136 bool affinity_hint_set; 141 bool affinity_hint_set;
137 142
@@ -222,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq)
222 netif_wake_subqueue(vi->dev, vq2txq(vq)); 227 netif_wake_subqueue(vi->dev, vq2txq(vq));
223} 228}
224 229
230static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
231{
232 unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
233 return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
234}
235
236static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
237{
238 return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
239
240}
241
242static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
243{
244 unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
245 return (unsigned long)buf | (size - 1);
246}
247
225/* Called from bottom half context */ 248/* Called from bottom half context */
226static struct sk_buff *page_to_skb(struct receive_queue *rq, 249static struct sk_buff *page_to_skb(struct receive_queue *rq,
227 struct page *page, unsigned int offset, 250 struct page *page, unsigned int offset,
@@ -330,38 +353,34 @@ err:
330 353
331static struct sk_buff *receive_mergeable(struct net_device *dev, 354static struct sk_buff *receive_mergeable(struct net_device *dev,
332 struct receive_queue *rq, 355 struct receive_queue *rq,
333 void *buf, 356 unsigned long ctx,
334 unsigned int len) 357 unsigned int len)
335{ 358{
359 void *buf = mergeable_ctx_to_buf_address(ctx);
336 struct skb_vnet_hdr *hdr = buf; 360 struct skb_vnet_hdr *hdr = buf;
337 int num_buf = hdr->mhdr.num_buffers; 361 int num_buf = hdr->mhdr.num_buffers;
338 struct page *page = virt_to_head_page(buf); 362 struct page *page = virt_to_head_page(buf);
339 int offset = buf - page_address(page); 363 int offset = buf - page_address(page);
340 struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, 364 unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
341 MERGE_BUFFER_LEN); 365
366 struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize);
342 struct sk_buff *curr_skb = head_skb; 367 struct sk_buff *curr_skb = head_skb;
343 368
344 if (unlikely(!curr_skb)) 369 if (unlikely(!curr_skb))
345 goto err_skb; 370 goto err_skb;
346
347 while (--num_buf) { 371 while (--num_buf) {
348 int num_skb_frags; 372 int num_skb_frags;
349 373
350 buf = virtqueue_get_buf(rq->vq, &len); 374 ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
351 if (unlikely(!buf)) { 375 if (unlikely(!ctx)) {
352 pr_debug("%s: rx error: %d buffers out of %d missing\n", 376 pr_debug("%s: rx error: %d buffers out of %d missing\n",
353 dev->name, num_buf, hdr->mhdr.num_buffers); 377 dev->name, num_buf, hdr->mhdr.num_buffers);
354 dev->stats.rx_length_errors++; 378 dev->stats.rx_length_errors++;
355 goto err_buf; 379 goto err_buf;
356 } 380 }
357 if (unlikely(len > MERGE_BUFFER_LEN)) {
358 pr_debug("%s: rx error: merge buffer too long\n",
359 dev->name);
360 len = MERGE_BUFFER_LEN;
361 }
362 381
382 buf = mergeable_ctx_to_buf_address(ctx);
363 page = virt_to_head_page(buf); 383 page = virt_to_head_page(buf);
364 --rq->num;
365 384
366 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 385 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
367 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 386 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -377,37 +396,38 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
377 head_skb->truesize += nskb->truesize; 396 head_skb->truesize += nskb->truesize;
378 num_skb_frags = 0; 397 num_skb_frags = 0;
379 } 398 }
399 truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
380 if (curr_skb != head_skb) { 400 if (curr_skb != head_skb) {
381 head_skb->data_len += len; 401 head_skb->data_len += len;
382 head_skb->len += len; 402 head_skb->len += len;
383 head_skb->truesize += MERGE_BUFFER_LEN; 403 head_skb->truesize += truesize;
384 } 404 }
385 offset = buf - page_address(page); 405 offset = buf - page_address(page);
386 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 406 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
387 put_page(page); 407 put_page(page);
388 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 408 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
389 len, MERGE_BUFFER_LEN); 409 len, truesize);
390 } else { 410 } else {
391 skb_add_rx_frag(curr_skb, num_skb_frags, page, 411 skb_add_rx_frag(curr_skb, num_skb_frags, page,
392 offset, len, MERGE_BUFFER_LEN); 412 offset, len, truesize);
393 } 413 }
394 } 414 }
395 415
416 ewma_add(&rq->mrg_avg_pkt_len, head_skb->len);
396 return head_skb; 417 return head_skb;
397 418
398err_skb: 419err_skb:
399 put_page(page); 420 put_page(page);
400 while (--num_buf) { 421 while (--num_buf) {
401 buf = virtqueue_get_buf(rq->vq, &len); 422 ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
402 if (unlikely(!buf)) { 423 if (unlikely(!ctx)) {
403 pr_debug("%s: rx error: %d buffers missing\n", 424 pr_debug("%s: rx error: %d buffers missing\n",
404 dev->name, num_buf); 425 dev->name, num_buf);
405 dev->stats.rx_length_errors++; 426 dev->stats.rx_length_errors++;
406 break; 427 break;
407 } 428 }
408 page = virt_to_head_page(buf); 429 page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
409 put_page(page); 430 put_page(page);
410 --rq->num;
411 } 431 }
412err_buf: 432err_buf:
413 dev->stats.rx_dropped++; 433 dev->stats.rx_dropped++;
@@ -426,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
426 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { 446 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
427 pr_debug("%s: short packet %i\n", dev->name, len); 447 pr_debug("%s: short packet %i\n", dev->name, len);
428 dev->stats.rx_length_errors++; 448 dev->stats.rx_length_errors++;
429 if (vi->mergeable_rx_bufs) 449 if (vi->mergeable_rx_bufs) {
430 put_page(virt_to_head_page(buf)); 450 unsigned long ctx = (unsigned long)buf;
431 else if (vi->big_packets) 451 void *base = mergeable_ctx_to_buf_address(ctx);
452 put_page(virt_to_head_page(base));
453 } else if (vi->big_packets) {
432 give_pages(rq, buf); 454 give_pages(rq, buf);
433 else 455 } else {
434 dev_kfree_skb(buf); 456 dev_kfree_skb(buf);
457 }
435 return; 458 return;
436 } 459 }
437 460
438 if (vi->mergeable_rx_bufs) 461 if (vi->mergeable_rx_bufs)
439 skb = receive_mergeable(dev, rq, buf, len); 462 skb = receive_mergeable(dev, rq, (unsigned long)buf, len);
440 else if (vi->big_packets) 463 else if (vi->big_packets)
441 skb = receive_big(dev, rq, buf, len); 464 skb = receive_big(dev, rq, buf, len);
442 else 465 else
@@ -577,28 +600,45 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
577 return err; 600 return err;
578} 601}
579 602
603static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len)
604{
605 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
606 unsigned int len;
607
608 len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len),
609 GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
610 return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
611}
612
580static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) 613static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
581{ 614{
582 struct virtnet_info *vi = rq->vq->vdev->priv; 615 struct page_frag *alloc_frag = &rq->alloc_frag;
583 char *buf = NULL; 616 char *buf;
617 unsigned long ctx;
584 int err; 618 int err;
619 unsigned int len, hole;
585 620
586 if (gfp & __GFP_WAIT) { 621 len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
587 if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, 622 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
588 gfp)) {
589 buf = (char *)page_address(vi->alloc_frag.page) +
590 vi->alloc_frag.offset;
591 get_page(vi->alloc_frag.page);
592 vi->alloc_frag.offset += MERGE_BUFFER_LEN;
593 }
594 } else {
595 buf = netdev_alloc_frag(MERGE_BUFFER_LEN);
596 }
597 if (!buf)
598 return -ENOMEM; 623 return -ENOMEM;
599 624
600 sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); 625 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
601 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); 626 ctx = mergeable_buf_to_ctx(buf, len);
627 get_page(alloc_frag->page);
628 alloc_frag->offset += len;
629 hole = alloc_frag->size - alloc_frag->offset;
630 if (hole < len) {
631 /* To avoid internal fragmentation, if there is very likely not
632 * enough space for another buffer, add the remaining space to
633 * the current buffer. This extra space is not included in
634 * the truesize stored in ctx.
635 */
636 len += hole;
637 alloc_frag->offset += hole;
638 }
639
640 sg_init_one(rq->sg, buf, len);
641 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
602 if (err < 0) 642 if (err < 0)
603 put_page(virt_to_head_page(buf)); 643 put_page(virt_to_head_page(buf));
604 644
@@ -618,6 +658,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
618 int err; 658 int err;
619 bool oom; 659 bool oom;
620 660
661 gfp |= __GFP_COLD;
621 do { 662 do {
622 if (vi->mergeable_rx_bufs) 663 if (vi->mergeable_rx_bufs)
623 err = add_recvbuf_mergeable(rq, gfp); 664 err = add_recvbuf_mergeable(rq, gfp);
@@ -629,10 +670,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
629 oom = err == -ENOMEM; 670 oom = err == -ENOMEM;
630 if (err) 671 if (err)
631 break; 672 break;
632 ++rq->num;
633 } while (rq->vq->num_free); 673 } while (rq->vq->num_free);
634 if (unlikely(rq->num > rq->max))
635 rq->max = rq->num;
636 if (unlikely(!virtqueue_kick(rq->vq))) 674 if (unlikely(!virtqueue_kick(rq->vq)))
637 return false; 675 return false;
638 return !oom; 676 return !oom;
@@ -700,11 +738,10 @@ again:
700 while (received < budget && 738 while (received < budget &&
701 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 739 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
702 receive_buf(rq, buf, len); 740 receive_buf(rq, buf, len);
703 --rq->num;
704 received++; 741 received++;
705 } 742 }
706 743
707 if (rq->num < rq->max / 2) { 744 if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
708 if (!try_fill_recv(rq, GFP_ATOMIC)) 745 if (!try_fill_recv(rq, GFP_ATOMIC))
709 schedule_delayed_work(&vi->refill, 0); 746 schedule_delayed_work(&vi->refill, 0);
710 } 747 }
@@ -874,16 +911,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
874/* 911/*
875 * Send command via the control virtqueue and check status. Commands 912 * Send command via the control virtqueue and check status. Commands
876 * supported by the hypervisor, as indicated by feature bits, should 913 * supported by the hypervisor, as indicated by feature bits, should
877 * never fail unless improperly formated. 914 * never fail unless improperly formatted.
878 */ 915 */
879static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 916static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
880 struct scatterlist *out, 917 struct scatterlist *out)
881 struct scatterlist *in)
882{ 918{
883 struct scatterlist *sgs[4], hdr, stat; 919 struct scatterlist *sgs[4], hdr, stat;
884 struct virtio_net_ctrl_hdr ctrl; 920 struct virtio_net_ctrl_hdr ctrl;
885 virtio_net_ctrl_ack status = ~0; 921 virtio_net_ctrl_ack status = ~0;
886 unsigned out_num = 0, in_num = 0, tmp; 922 unsigned out_num = 0, tmp;
887 923
888 /* Caller should know better */ 924 /* Caller should know better */
889 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 925 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
@@ -896,16 +932,13 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
896 932
897 if (out) 933 if (out)
898 sgs[out_num++] = out; 934 sgs[out_num++] = out;
899 if (in)
900 sgs[out_num + in_num++] = in;
901 935
902 /* Add return status. */ 936 /* Add return status. */
903 sg_init_one(&stat, &status, sizeof(status)); 937 sg_init_one(&stat, &status, sizeof(status));
904 sgs[out_num + in_num++] = &stat; 938 sgs[out_num] = &stat;
905 939
906 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 940 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
907 BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC) 941 BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC) < 0);
908 < 0);
909 942
910 if (unlikely(!virtqueue_kick(vi->cvq))) 943 if (unlikely(!virtqueue_kick(vi->cvq)))
911 return status == VIRTIO_NET_OK; 944 return status == VIRTIO_NET_OK;
@@ -935,8 +968,7 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p)
935 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 968 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
936 sg_init_one(&sg, addr->sa_data, dev->addr_len); 969 sg_init_one(&sg, addr->sa_data, dev->addr_len);
937 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 970 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
938 VIRTIO_NET_CTRL_MAC_ADDR_SET, 971 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
939 &sg, NULL)) {
940 dev_warn(&vdev->dev, 972 dev_warn(&vdev->dev,
941 "Failed to set mac address by vq command.\n"); 973 "Failed to set mac address by vq command.\n");
942 return -EINVAL; 974 return -EINVAL;
@@ -1009,7 +1041,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
1009{ 1041{
1010 rtnl_lock(); 1042 rtnl_lock();
1011 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 1043 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
1012 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL, NULL)) 1044 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
1013 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 1045 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
1014 rtnl_unlock(); 1046 rtnl_unlock();
1015} 1047}
@@ -1027,7 +1059,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1027 sg_init_one(&sg, &s, sizeof(s)); 1059 sg_init_one(&sg, &s, sizeof(s));
1028 1060
1029 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 1061 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
1030 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, NULL)) { 1062 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
1031 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 1063 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
1032 queue_pairs); 1064 queue_pairs);
1033 return -EINVAL; 1065 return -EINVAL;
@@ -1067,7 +1099,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
1067 void *buf; 1099 void *buf;
1068 int i; 1100 int i;
1069 1101
1070 /* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */ 1102 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
1071 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 1103 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1072 return; 1104 return;
1073 1105
@@ -1077,16 +1109,14 @@ static void virtnet_set_rx_mode(struct net_device *dev)
1077 sg_init_one(sg, &promisc, sizeof(promisc)); 1109 sg_init_one(sg, &promisc, sizeof(promisc));
1078 1110
1079 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 1111 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1080 VIRTIO_NET_CTRL_RX_PROMISC, 1112 VIRTIO_NET_CTRL_RX_PROMISC, sg))
1081 sg, NULL))
1082 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 1113 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1083 promisc ? "en" : "dis"); 1114 promisc ? "en" : "dis");
1084 1115
1085 sg_init_one(sg, &allmulti, sizeof(allmulti)); 1116 sg_init_one(sg, &allmulti, sizeof(allmulti));
1086 1117
1087 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 1118 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1088 VIRTIO_NET_CTRL_RX_ALLMULTI, 1119 VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
1089 sg, NULL))
1090 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 1120 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1091 allmulti ? "en" : "dis"); 1121 allmulti ? "en" : "dis");
1092 1122
@@ -1122,8 +1152,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
1122 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 1152 sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1123 1153
1124 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 1154 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1125 VIRTIO_NET_CTRL_MAC_TABLE_SET, 1155 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
1126 sg, NULL))
1127 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 1156 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
1128 1157
1129 kfree(buf); 1158 kfree(buf);
@@ -1138,7 +1167,7 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1138 sg_init_one(&sg, &vid, sizeof(vid)); 1167 sg_init_one(&sg, &vid, sizeof(vid));
1139 1168
1140 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 1169 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1141 VIRTIO_NET_CTRL_VLAN_ADD, &sg, NULL)) 1170 VIRTIO_NET_CTRL_VLAN_ADD, &sg))
1142 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 1171 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1143 return 0; 1172 return 0;
1144} 1173}
@@ -1152,7 +1181,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1152 sg_init_one(&sg, &vid, sizeof(vid)); 1181 sg_init_one(&sg, &vid, sizeof(vid));
1153 1182
1154 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 1183 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1155 VIRTIO_NET_CTRL_VLAN_DEL, &sg, NULL)) 1184 VIRTIO_NET_CTRL_VLAN_DEL, &sg))
1156 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 1185 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1157 return 0; 1186 return 0;
1158} 1187}
@@ -1386,6 +1415,14 @@ static void free_receive_bufs(struct virtnet_info *vi)
1386 } 1415 }
1387} 1416}
1388 1417
1418static void free_receive_page_frags(struct virtnet_info *vi)
1419{
1420 int i;
1421 for (i = 0; i < vi->max_queue_pairs; i++)
1422 if (vi->rq[i].alloc_frag.page)
1423 put_page(vi->rq[i].alloc_frag.page);
1424}
1425
1389static void free_unused_bufs(struct virtnet_info *vi) 1426static void free_unused_bufs(struct virtnet_info *vi)
1390{ 1427{
1391 void *buf; 1428 void *buf;
@@ -1401,15 +1438,16 @@ static void free_unused_bufs(struct virtnet_info *vi)
1401 struct virtqueue *vq = vi->rq[i].vq; 1438 struct virtqueue *vq = vi->rq[i].vq;
1402 1439
1403 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { 1440 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1404 if (vi->mergeable_rx_bufs) 1441 if (vi->mergeable_rx_bufs) {
1405 put_page(virt_to_head_page(buf)); 1442 unsigned long ctx = (unsigned long)buf;
1406 else if (vi->big_packets) 1443 void *base = mergeable_ctx_to_buf_address(ctx);
1444 put_page(virt_to_head_page(base));
1445 } else if (vi->big_packets) {
1407 give_pages(&vi->rq[i], buf); 1446 give_pages(&vi->rq[i], buf);
1408 else 1447 } else {
1409 dev_kfree_skb(buf); 1448 dev_kfree_skb(buf);
1410 --vi->rq[i].num; 1449 }
1411 } 1450 }
1412 BUG_ON(vi->rq[i].num != 0);
1413 } 1451 }
1414} 1452}
1415 1453
@@ -1516,6 +1554,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
1516 napi_weight); 1554 napi_weight);
1517 1555
1518 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 1556 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1557 ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
1519 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 1558 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1520 } 1559 }
1521 1560
@@ -1552,6 +1591,33 @@ err:
1552 return ret; 1591 return ret;
1553} 1592}
1554 1593
1594#ifdef CONFIG_SYSFS
1595static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
1596 struct rx_queue_attribute *attribute, char *buf)
1597{
1598 struct virtnet_info *vi = netdev_priv(queue->dev);
1599 unsigned int queue_index = get_netdev_rx_queue_index(queue);
1600 struct ewma *avg;
1601
1602 BUG_ON(queue_index >= vi->max_queue_pairs);
1603 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
1604 return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
1605}
1606
1607static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
1608 __ATTR_RO(mergeable_rx_buffer_size);
1609
1610static struct attribute *virtio_net_mrg_rx_attrs[] = {
1611 &mergeable_rx_buffer_size_attribute.attr,
1612 NULL
1613};
1614
1615static const struct attribute_group virtio_net_mrg_rx_group = {
1616 .name = "virtio_net",
1617 .attrs = virtio_net_mrg_rx_attrs
1618};
1619#endif
1620
1555static int virtnet_probe(struct virtio_device *vdev) 1621static int virtnet_probe(struct virtio_device *vdev)
1556{ 1622{
1557 int i, err; 1623 int i, err;
@@ -1645,7 +1711,8 @@ static int virtnet_probe(struct virtio_device *vdev)
1645 /* If we can receive ANY GSO packets, we must allocate large ones. */ 1711 /* If we can receive ANY GSO packets, we must allocate large ones. */
1646 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 1712 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
1647 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || 1713 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
1648 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) 1714 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
1715 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
1649 vi->big_packets = true; 1716 vi->big_packets = true;
1650 1717
1651 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) 1718 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
@@ -1666,6 +1733,10 @@ static int virtnet_probe(struct virtio_device *vdev)
1666 if (err) 1733 if (err)
1667 goto free_stats; 1734 goto free_stats;
1668 1735
1736#ifdef CONFIG_SYSFS
1737 if (vi->mergeable_rx_bufs)
1738 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
1739#endif
1669 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 1740 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
1670 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 1741 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
1671 1742
@@ -1680,7 +1751,8 @@ static int virtnet_probe(struct virtio_device *vdev)
1680 try_fill_recv(&vi->rq[i], GFP_KERNEL); 1751 try_fill_recv(&vi->rq[i], GFP_KERNEL);
1681 1752
1682 /* If we didn't even get one input buffer, we're useless. */ 1753 /* If we didn't even get one input buffer, we're useless. */
1683 if (vi->rq[i].num == 0) { 1754 if (vi->rq[i].vq->num_free ==
1755 virtqueue_get_vring_size(vi->rq[i].vq)) {
1684 free_unused_bufs(vi); 1756 free_unused_bufs(vi);
1685 err = -ENOMEM; 1757 err = -ENOMEM;
1686 goto free_recv_bufs; 1758 goto free_recv_bufs;
@@ -1714,9 +1786,8 @@ free_recv_bufs:
1714 unregister_netdev(dev); 1786 unregister_netdev(dev);
1715free_vqs: 1787free_vqs:
1716 cancel_delayed_work_sync(&vi->refill); 1788 cancel_delayed_work_sync(&vi->refill);
1789 free_receive_page_frags(vi);
1717 virtnet_del_vqs(vi); 1790 virtnet_del_vqs(vi);
1718 if (vi->alloc_frag.page)
1719 put_page(vi->alloc_frag.page);
1720free_stats: 1791free_stats:
1721 free_percpu(vi->stats); 1792 free_percpu(vi->stats);
1722free: 1793free:
@@ -1733,6 +1804,8 @@ static void remove_vq_common(struct virtnet_info *vi)
1733 1804
1734 free_receive_bufs(vi); 1805 free_receive_bufs(vi);
1735 1806
1807 free_receive_page_frags(vi);
1808
1736 virtnet_del_vqs(vi); 1809 virtnet_del_vqs(vi);
1737} 1810}
1738 1811
@@ -1750,8 +1823,6 @@ static void virtnet_remove(struct virtio_device *vdev)
1750 unregister_netdev(vi->dev); 1823 unregister_netdev(vi->dev);
1751 1824
1752 remove_vq_common(vi); 1825 remove_vq_common(vi);
1753 if (vi->alloc_frag.page)
1754 put_page(vi->alloc_frag.page);
1755 1826
1756 flush_work(&vi->config_work); 1827 flush_work(&vi->config_work);
1757 1828