aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark McLoughlin <markmc@redhat.com>2008-11-17 01:41:34 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-17 01:41:34 -0500
commit3f2c31d90327f21d76d296af34aa4ca547932ff4 (patch)
treefbcccc1ab9af94982dd2a98fae6b32c7a0e14e3a
parent0276b4972e932ea8bf2941dcd37e9caac5652ed7 (diff)
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation)
If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/virtio_net.c173
-rw-r--r--include/linux/virtio_net.h9
2 files changed, 162 insertions, 20 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 27559c987d47..e6b5d6ef9ea8 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -34,6 +34,7 @@ module_param(gso, bool, 0444);
34 34
35/* FIXME: MTU in config. */ 35/* FIXME: MTU in config. */
36#define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) 36#define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
37#define GOOD_COPY_LEN 128
37 38
38struct virtnet_info 39struct virtnet_info
39{ 40{
@@ -58,6 +59,9 @@ struct virtnet_info
58 /* I like... big packets and I cannot lie! */ 59 /* I like... big packets and I cannot lie! */
59 bool big_packets; 60 bool big_packets;
60 61
62 /* Host will merge rx buffers for big packets (shake it! shake it!) */
63 bool mergeable_rx_bufs;
64
61 /* Receive & send queues. */ 65 /* Receive & send queues. */
62 struct sk_buff_head recv; 66 struct sk_buff_head recv;
63 struct sk_buff_head send; 67 struct sk_buff_head send;
@@ -66,16 +70,11 @@ struct virtnet_info
66 struct page *pages; 70 struct page *pages;
67}; 71};
68 72
69static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) 73static inline void *skb_vnet_hdr(struct sk_buff *skb)
70{ 74{
71 return (struct virtio_net_hdr *)skb->cb; 75 return (struct virtio_net_hdr *)skb->cb;
72} 76}
73 77
74static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
75{
76 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
77}
78
79static void give_a_page(struct virtnet_info *vi, struct page *page) 78static void give_a_page(struct virtnet_info *vi, struct page *page)
80{ 79{
81 page->private = (unsigned long)vi->pages; 80 page->private = (unsigned long)vi->pages;
@@ -121,25 +120,97 @@ static void skb_xmit_done(struct virtqueue *svq)
121static void receive_skb(struct net_device *dev, struct sk_buff *skb, 120static void receive_skb(struct net_device *dev, struct sk_buff *skb,
122 unsigned len) 121 unsigned len)
123{ 122{
123 struct virtnet_info *vi = netdev_priv(dev);
124 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); 124 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
125 int err; 125 int err;
126 int i;
126 127
127 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { 128 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
128 pr_debug("%s: short packet %i\n", dev->name, len); 129 pr_debug("%s: short packet %i\n", dev->name, len);
129 dev->stats.rx_length_errors++; 130 dev->stats.rx_length_errors++;
130 goto drop; 131 goto drop;
131 } 132 }
132 len -= sizeof(struct virtio_net_hdr);
133 133
134 if (len <= MAX_PACKET_LEN) 134 if (vi->mergeable_rx_bufs) {
135 trim_pages(netdev_priv(dev), skb); 135 struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb);
136 unsigned int copy;
137 char *p = page_address(skb_shinfo(skb)->frags[0].page);
136 138
137 err = pskb_trim(skb, len); 139 if (len > PAGE_SIZE)
138 if (err) { 140 len = PAGE_SIZE;
139 pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); 141 len -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
140 dev->stats.rx_dropped++; 142
141 goto drop; 143 memcpy(hdr, p, sizeof(*mhdr));
144 p += sizeof(*mhdr);
145
146 copy = len;
147 if (copy > skb_tailroom(skb))
148 copy = skb_tailroom(skb);
149
150 memcpy(skb_put(skb, copy), p, copy);
151
152 len -= copy;
153
154 if (!len) {
155 give_a_page(vi, skb_shinfo(skb)->frags[0].page);
156 skb_shinfo(skb)->nr_frags--;
157 } else {
158 skb_shinfo(skb)->frags[0].page_offset +=
159 sizeof(*mhdr) + copy;
160 skb_shinfo(skb)->frags[0].size = len;
161 skb->data_len += len;
162 skb->len += len;
163 }
164
165 while (--mhdr->num_buffers) {
166 struct sk_buff *nskb;
167
168 i = skb_shinfo(skb)->nr_frags;
169 if (i >= MAX_SKB_FRAGS) {
170 pr_debug("%s: packet too long %d\n", dev->name,
171 len);
172 dev->stats.rx_length_errors++;
173 goto drop;
174 }
175
176 nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
177 if (!nskb) {
178 pr_debug("%s: rx error: %d buffers missing\n",
179 dev->name, mhdr->num_buffers);
180 dev->stats.rx_length_errors++;
181 goto drop;
182 }
183
184 __skb_unlink(nskb, &vi->recv);
185 vi->num--;
186
187 skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0];
188 skb_shinfo(nskb)->nr_frags = 0;
189 kfree_skb(nskb);
190
191 if (len > PAGE_SIZE)
192 len = PAGE_SIZE;
193
194 skb_shinfo(skb)->frags[i].size = len;
195 skb_shinfo(skb)->nr_frags++;
196 skb->data_len += len;
197 skb->len += len;
198 }
199 } else {
200 len -= sizeof(struct virtio_net_hdr);
201
202 if (len <= MAX_PACKET_LEN)
203 trim_pages(vi, skb);
204
205 err = pskb_trim(skb, len);
206 if (err) {
207 pr_debug("%s: pskb_trim failed %i %d\n", dev->name,
208 len, err);
209 dev->stats.rx_dropped++;
210 goto drop;
211 }
142 } 212 }
213
143 skb->truesize += skb->data_len; 214 skb->truesize += skb->data_len;
144 dev->stats.rx_bytes += skb->len; 215 dev->stats.rx_bytes += skb->len;
145 dev->stats.rx_packets++; 216 dev->stats.rx_packets++;
@@ -198,7 +269,7 @@ drop:
198 dev_kfree_skb(skb); 269 dev_kfree_skb(skb);
199} 270}
200 271
201static void try_fill_recv(struct virtnet_info *vi) 272static void try_fill_recv_maxbufs(struct virtnet_info *vi)
202{ 273{
203 struct sk_buff *skb; 274 struct sk_buff *skb;
204 struct scatterlist sg[2+MAX_SKB_FRAGS]; 275 struct scatterlist sg[2+MAX_SKB_FRAGS];
@@ -206,12 +277,16 @@ static void try_fill_recv(struct virtnet_info *vi)
206 277
207 sg_init_table(sg, 2+MAX_SKB_FRAGS); 278 sg_init_table(sg, 2+MAX_SKB_FRAGS);
208 for (;;) { 279 for (;;) {
280 struct virtio_net_hdr *hdr;
281
209 skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN); 282 skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
210 if (unlikely(!skb)) 283 if (unlikely(!skb))
211 break; 284 break;
212 285
213 skb_put(skb, MAX_PACKET_LEN); 286 skb_put(skb, MAX_PACKET_LEN);
214 vnet_hdr_to_sg(sg, skb); 287
288 hdr = skb_vnet_hdr(skb);
289 sg_init_one(sg, hdr, sizeof(*hdr));
215 290
216 if (vi->big_packets) { 291 if (vi->big_packets) {
217 for (i = 0; i < MAX_SKB_FRAGS; i++) { 292 for (i = 0; i < MAX_SKB_FRAGS; i++) {
@@ -247,6 +322,54 @@ static void try_fill_recv(struct virtnet_info *vi)
247 vi->rvq->vq_ops->kick(vi->rvq); 322 vi->rvq->vq_ops->kick(vi->rvq);
248} 323}
249 324
325static void try_fill_recv(struct virtnet_info *vi)
326{
327 struct sk_buff *skb;
328 struct scatterlist sg[1];
329 int err;
330
331 if (!vi->mergeable_rx_bufs) {
332 try_fill_recv_maxbufs(vi);
333 return;
334 }
335
336 for (;;) {
337 skb_frag_t *f;
338
339 skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN);
340 if (unlikely(!skb))
341 break;
342
343 skb_reserve(skb, NET_IP_ALIGN);
344
345 f = &skb_shinfo(skb)->frags[0];
346 f->page = get_a_page(vi, GFP_ATOMIC);
347 if (!f->page) {
348 kfree_skb(skb);
349 break;
350 }
351
352 f->page_offset = 0;
353 f->size = PAGE_SIZE;
354
355 skb_shinfo(skb)->nr_frags++;
356
357 sg_init_one(sg, page_address(f->page), PAGE_SIZE);
358 skb_queue_head(&vi->recv, skb);
359
360 err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
361 if (err) {
362 skb_unlink(skb, &vi->recv);
363 kfree_skb(skb);
364 break;
365 }
366 vi->num++;
367 }
368 if (unlikely(vi->num > vi->max))
369 vi->max = vi->num;
370 vi->rvq->vq_ops->kick(vi->rvq);
371}
372
250static void skb_recv_done(struct virtqueue *rvq) 373static void skb_recv_done(struct virtqueue *rvq)
251{ 374{
252 struct virtnet_info *vi = rvq->vdev->priv; 375 struct virtnet_info *vi = rvq->vdev->priv;
@@ -325,15 +448,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
325{ 448{
326 int num, err; 449 int num, err;
327 struct scatterlist sg[2+MAX_SKB_FRAGS]; 450 struct scatterlist sg[2+MAX_SKB_FRAGS];
328 struct virtio_net_hdr *hdr; 451 struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb);
452 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
329 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 453 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
330 454
331 sg_init_table(sg, 2+MAX_SKB_FRAGS); 455 sg_init_table(sg, 2+MAX_SKB_FRAGS);
332 456
333 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 457 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
334 458
335 /* Encode metadata header at front. */
336 hdr = skb_vnet_hdr(skb);
337 if (skb->ip_summed == CHECKSUM_PARTIAL) { 459 if (skb->ip_summed == CHECKSUM_PARTIAL) {
338 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 460 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
339 hdr->csum_start = skb->csum_start - skb_headroom(skb); 461 hdr->csum_start = skb->csum_start - skb_headroom(skb);
@@ -361,7 +483,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
361 hdr->gso_size = hdr->hdr_len = 0; 483 hdr->gso_size = hdr->hdr_len = 0;
362 } 484 }
363 485
364 vnet_hdr_to_sg(sg, skb); 486 mhdr->num_buffers = 0;
487
488 /* Encode metadata header at front. */
489 if (vi->mergeable_rx_bufs)
490 sg_init_one(sg, mhdr, sizeof(*mhdr));
491 else
492 sg_init_one(sg, hdr, sizeof(*hdr));
493
365 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; 494 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
366 495
367 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); 496 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
@@ -551,6 +680,9 @@ static int virtnet_probe(struct virtio_device *vdev)
551 || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) 680 || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
552 vi->big_packets = true; 681 vi->big_packets = true;
553 682
683 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
684 vi->mergeable_rx_bufs = true;
685
554 /* We expect two virtqueues, receive then send. */ 686 /* We expect two virtqueues, receive then send. */
555 vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); 687 vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
556 if (IS_ERR(vi->rvq)) { 688 if (IS_ERR(vi->rvq)) {
@@ -643,6 +775,7 @@ static unsigned int features[] = {
643 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, 775 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
644 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, 776 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
645 VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ 777 VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
778 VIRTIO_NET_F_MRG_RXBUF,
646 VIRTIO_F_NOTIFY_ON_EMPTY, 779 VIRTIO_F_NOTIFY_ON_EMPTY,
647}; 780};
648 781
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 5e33761b9b8a..5cdd0aa8bde9 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -20,6 +20,7 @@
20#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ 20#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
21#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ 21#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
22#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ 22#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
23#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */
23 24
24struct virtio_net_config 25struct virtio_net_config
25{ 26{
@@ -44,4 +45,12 @@ struct virtio_net_hdr
44 __u16 csum_start; /* Position to start checksumming from */ 45 __u16 csum_start; /* Position to start checksumming from */
45 __u16 csum_offset; /* Offset after that to place checksum */ 46 __u16 csum_offset; /* Offset after that to place checksum */
46}; 47};
48
49/* This is the version of the header to use when the MRG_RXBUF
50 * feature has been negotiated. */
51struct virtio_net_hdr_mrg_rxbuf {
52 struct virtio_net_hdr hdr;
53 __u16 num_buffers; /* Number of merged rx buffers */
54};
55
47#endif /* _LINUX_VIRTIO_NET_H */ 56#endif /* _LINUX_VIRTIO_NET_H */