diff options
author | Mark McLoughlin <markmc@redhat.com> | 2008-11-17 01:41:34 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-11-17 01:41:34 -0500 |
commit | 3f2c31d90327f21d76d296af34aa4ca547932ff4 (patch) | |
tree | fbcccc1ab9af94982dd2a98fae6b32c7a0e14e3a | |
parent | 0276b4972e932ea8bf2941dcd37e9caac5652ed7 (diff) |
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation)
If segmentation offload is enabled by the host, we currently allocate
maximum sized packet buffers and pass them to the host. This uses up
20 ring entries, allowing us to supply only 20 packet buffers to the
host with a 256 entry ring. This is a huge overhead when receiving
small packets, and is most keenly felt when receiving MTU sized
packets from off-host.
The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support
using receive buffers which are smaller than the maximum packet size.
In order to transfer large packets to the guest, the host merges
together multiple receive buffers to form a larger logical buffer.
The number of merged buffers is returned to the guest via a field in
the virtio_net_hdr.
Make use of this support by supplying single page receive buffers to
the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of
the payload to the skb's linear data buffer and adjust the fragment
offset to point to the remaining data. This ensures proper alignment
and allows us to not use any paged data for small packets. If the
payload occupies multiple pages, we simply append those pages as
fragments and free the associated skbs.
This scheme allows us to be efficient in our use of ring entries
while still supporting large packets. Benchmarking using netperf from
an external machine to a guest over a 10Gb/s network shows a 100%
improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark
with GSO disabled on the host side, throughput was seen to increase
from 700Mb/s to 1.7Gb/s.
Based on a patch from Herbert Xu.
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv)
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/virtio_net.c | 173 | ||||
-rw-r--r-- | include/linux/virtio_net.h | 9 |
2 files changed, 162 insertions, 20 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 27559c987d47..e6b5d6ef9ea8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -34,6 +34,7 @@ module_param(gso, bool, 0444); | |||
34 | 34 | ||
35 | /* FIXME: MTU in config. */ | 35 | /* FIXME: MTU in config. */ |
36 | #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) | 36 | #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) |
37 | #define GOOD_COPY_LEN 128 | ||
37 | 38 | ||
38 | struct virtnet_info | 39 | struct virtnet_info |
39 | { | 40 | { |
@@ -58,6 +59,9 @@ struct virtnet_info | |||
58 | /* I like... big packets and I cannot lie! */ | 59 | /* I like... big packets and I cannot lie! */ |
59 | bool big_packets; | 60 | bool big_packets; |
60 | 61 | ||
62 | /* Host will merge rx buffers for big packets (shake it! shake it!) */ | ||
63 | bool mergeable_rx_bufs; | ||
64 | |||
61 | /* Receive & send queues. */ | 65 | /* Receive & send queues. */ |
62 | struct sk_buff_head recv; | 66 | struct sk_buff_head recv; |
63 | struct sk_buff_head send; | 67 | struct sk_buff_head send; |
@@ -66,16 +70,11 @@ struct virtnet_info | |||
66 | struct page *pages; | 70 | struct page *pages; |
67 | }; | 71 | }; |
68 | 72 | ||
69 | static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) | 73 | static inline void *skb_vnet_hdr(struct sk_buff *skb) |
70 | { | 74 | { |
71 | return (struct virtio_net_hdr *)skb->cb; | 75 | return (struct virtio_net_hdr *)skb->cb; |
72 | } | 76 | } |
73 | 77 | ||
74 | static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) | ||
75 | { | ||
76 | sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); | ||
77 | } | ||
78 | |||
79 | static void give_a_page(struct virtnet_info *vi, struct page *page) | 78 | static void give_a_page(struct virtnet_info *vi, struct page *page) |
80 | { | 79 | { |
81 | page->private = (unsigned long)vi->pages; | 80 | page->private = (unsigned long)vi->pages; |
@@ -121,25 +120,97 @@ static void skb_xmit_done(struct virtqueue *svq) | |||
121 | static void receive_skb(struct net_device *dev, struct sk_buff *skb, | 120 | static void receive_skb(struct net_device *dev, struct sk_buff *skb, |
122 | unsigned len) | 121 | unsigned len) |
123 | { | 122 | { |
123 | struct virtnet_info *vi = netdev_priv(dev); | ||
124 | struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); | 124 | struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); |
125 | int err; | 125 | int err; |
126 | int i; | ||
126 | 127 | ||
127 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { | 128 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { |
128 | pr_debug("%s: short packet %i\n", dev->name, len); | 129 | pr_debug("%s: short packet %i\n", dev->name, len); |
129 | dev->stats.rx_length_errors++; | 130 | dev->stats.rx_length_errors++; |
130 | goto drop; | 131 | goto drop; |
131 | } | 132 | } |
132 | len -= sizeof(struct virtio_net_hdr); | ||
133 | 133 | ||
134 | if (len <= MAX_PACKET_LEN) | 134 | if (vi->mergeable_rx_bufs) { |
135 | trim_pages(netdev_priv(dev), skb); | 135 | struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); |
136 | unsigned int copy; | ||
137 | char *p = page_address(skb_shinfo(skb)->frags[0].page); | ||
136 | 138 | ||
137 | err = pskb_trim(skb, len); | 139 | if (len > PAGE_SIZE) |
138 | if (err) { | 140 | len = PAGE_SIZE; |
139 | pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); | 141 | len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); |
140 | dev->stats.rx_dropped++; | 142 | |
141 | goto drop; | 143 | memcpy(hdr, p, sizeof(*mhdr)); |
144 | p += sizeof(*mhdr); | ||
145 | |||
146 | copy = len; | ||
147 | if (copy > skb_tailroom(skb)) | ||
148 | copy = skb_tailroom(skb); | ||
149 | |||
150 | memcpy(skb_put(skb, copy), p, copy); | ||
151 | |||
152 | len -= copy; | ||
153 | |||
154 | if (!len) { | ||
155 | give_a_page(vi, skb_shinfo(skb)->frags[0].page); | ||
156 | skb_shinfo(skb)->nr_frags--; | ||
157 | } else { | ||
158 | skb_shinfo(skb)->frags[0].page_offset += | ||
159 | sizeof(*mhdr) + copy; | ||
160 | skb_shinfo(skb)->frags[0].size = len; | ||
161 | skb->data_len += len; | ||
162 | skb->len += len; | ||
163 | } | ||
164 | |||
165 | while (--mhdr->num_buffers) { | ||
166 | struct sk_buff *nskb; | ||
167 | |||
168 | i = skb_shinfo(skb)->nr_frags; | ||
169 | if (i >= MAX_SKB_FRAGS) { | ||
170 | pr_debug("%s: packet too long %d\n", dev->name, | ||
171 | len); | ||
172 | dev->stats.rx_length_errors++; | ||
173 | goto drop; | ||
174 | } | ||
175 | |||
176 | nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); | ||
177 | if (!nskb) { | ||
178 | pr_debug("%s: rx error: %d buffers missing\n", | ||
179 | dev->name, mhdr->num_buffers); | ||
180 | dev->stats.rx_length_errors++; | ||
181 | goto drop; | ||
182 | } | ||
183 | |||
184 | __skb_unlink(nskb, &vi->recv); | ||
185 | vi->num--; | ||
186 | |||
187 | skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0]; | ||
188 | skb_shinfo(nskb)->nr_frags = 0; | ||
189 | kfree_skb(nskb); | ||
190 | |||
191 | if (len > PAGE_SIZE) | ||
192 | len = PAGE_SIZE; | ||
193 | |||
194 | skb_shinfo(skb)->frags[i].size = len; | ||
195 | skb_shinfo(skb)->nr_frags++; | ||
196 | skb->data_len += len; | ||
197 | skb->len += len; | ||
198 | } | ||
199 | } else { | ||
200 | len -= sizeof(struct virtio_net_hdr); | ||
201 | |||
202 | if (len <= MAX_PACKET_LEN) | ||
203 | trim_pages(vi, skb); | ||
204 | |||
205 | err = pskb_trim(skb, len); | ||
206 | if (err) { | ||
207 | pr_debug("%s: pskb_trim failed %i %d\n", dev->name, | ||
208 | len, err); | ||
209 | dev->stats.rx_dropped++; | ||
210 | goto drop; | ||
211 | } | ||
142 | } | 212 | } |
213 | |||
143 | skb->truesize += skb->data_len; | 214 | skb->truesize += skb->data_len; |
144 | dev->stats.rx_bytes += skb->len; | 215 | dev->stats.rx_bytes += skb->len; |
145 | dev->stats.rx_packets++; | 216 | dev->stats.rx_packets++; |
@@ -198,7 +269,7 @@ drop: | |||
198 | dev_kfree_skb(skb); | 269 | dev_kfree_skb(skb); |
199 | } | 270 | } |
200 | 271 | ||
201 | static void try_fill_recv(struct virtnet_info *vi) | 272 | static void try_fill_recv_maxbufs(struct virtnet_info *vi) |
202 | { | 273 | { |
203 | struct sk_buff *skb; | 274 | struct sk_buff *skb; |
204 | struct scatterlist sg[2+MAX_SKB_FRAGS]; | 275 | struct scatterlist sg[2+MAX_SKB_FRAGS]; |
@@ -206,12 +277,16 @@ static void try_fill_recv(struct virtnet_info *vi) | |||
206 | 277 | ||
207 | sg_init_table(sg, 2+MAX_SKB_FRAGS); | 278 | sg_init_table(sg, 2+MAX_SKB_FRAGS); |
208 | for (;;) { | 279 | for (;;) { |
280 | struct virtio_net_hdr *hdr; | ||
281 | |||
209 | skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN); | 282 | skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN); |
210 | if (unlikely(!skb)) | 283 | if (unlikely(!skb)) |
211 | break; | 284 | break; |
212 | 285 | ||
213 | skb_put(skb, MAX_PACKET_LEN); | 286 | skb_put(skb, MAX_PACKET_LEN); |
214 | vnet_hdr_to_sg(sg, skb); | 287 | |
288 | hdr = skb_vnet_hdr(skb); | ||
289 | sg_init_one(sg, hdr, sizeof(*hdr)); | ||
215 | 290 | ||
216 | if (vi->big_packets) { | 291 | if (vi->big_packets) { |
217 | for (i = 0; i < MAX_SKB_FRAGS; i++) { | 292 | for (i = 0; i < MAX_SKB_FRAGS; i++) { |
@@ -247,6 +322,54 @@ static void try_fill_recv(struct virtnet_info *vi) | |||
247 | vi->rvq->vq_ops->kick(vi->rvq); | 322 | vi->rvq->vq_ops->kick(vi->rvq); |
248 | } | 323 | } |
249 | 324 | ||
325 | static void try_fill_recv(struct virtnet_info *vi) | ||
326 | { | ||
327 | struct sk_buff *skb; | ||
328 | struct scatterlist sg[1]; | ||
329 | int err; | ||
330 | |||
331 | if (!vi->mergeable_rx_bufs) { | ||
332 | try_fill_recv_maxbufs(vi); | ||
333 | return; | ||
334 | } | ||
335 | |||
336 | for (;;) { | ||
337 | skb_frag_t *f; | ||
338 | |||
339 | skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); | ||
340 | if (unlikely(!skb)) | ||
341 | break; | ||
342 | |||
343 | skb_reserve(skb, NET_IP_ALIGN); | ||
344 | |||
345 | f = &skb_shinfo(skb)->frags[0]; | ||
346 | f->page = get_a_page(vi, GFP_ATOMIC); | ||
347 | if (!f->page) { | ||
348 | kfree_skb(skb); | ||
349 | break; | ||
350 | } | ||
351 | |||
352 | f->page_offset = 0; | ||
353 | f->size = PAGE_SIZE; | ||
354 | |||
355 | skb_shinfo(skb)->nr_frags++; | ||
356 | |||
357 | sg_init_one(sg, page_address(f->page), PAGE_SIZE); | ||
358 | skb_queue_head(&vi->recv, skb); | ||
359 | |||
360 | err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb); | ||
361 | if (err) { | ||
362 | skb_unlink(skb, &vi->recv); | ||
363 | kfree_skb(skb); | ||
364 | break; | ||
365 | } | ||
366 | vi->num++; | ||
367 | } | ||
368 | if (unlikely(vi->num > vi->max)) | ||
369 | vi->max = vi->num; | ||
370 | vi->rvq->vq_ops->kick(vi->rvq); | ||
371 | } | ||
372 | |||
250 | static void skb_recv_done(struct virtqueue *rvq) | 373 | static void skb_recv_done(struct virtqueue *rvq) |
251 | { | 374 | { |
252 | struct virtnet_info *vi = rvq->vdev->priv; | 375 | struct virtnet_info *vi = rvq->vdev->priv; |
@@ -325,15 +448,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) | |||
325 | { | 448 | { |
326 | int num, err; | 449 | int num, err; |
327 | struct scatterlist sg[2+MAX_SKB_FRAGS]; | 450 | struct scatterlist sg[2+MAX_SKB_FRAGS]; |
328 | struct virtio_net_hdr *hdr; | 451 | struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); |
452 | struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); | ||
329 | const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; | 453 | const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; |
330 | 454 | ||
331 | sg_init_table(sg, 2+MAX_SKB_FRAGS); | 455 | sg_init_table(sg, 2+MAX_SKB_FRAGS); |
332 | 456 | ||
333 | pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); | 457 | pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); |
334 | 458 | ||
335 | /* Encode metadata header at front. */ | ||
336 | hdr = skb_vnet_hdr(skb); | ||
337 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 459 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
338 | hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | 460 | hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; |
339 | hdr->csum_start = skb->csum_start - skb_headroom(skb); | 461 | hdr->csum_start = skb->csum_start - skb_headroom(skb); |
@@ -361,7 +483,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) | |||
361 | hdr->gso_size = hdr->hdr_len = 0; | 483 | hdr->gso_size = hdr->hdr_len = 0; |
362 | } | 484 | } |
363 | 485 | ||
364 | vnet_hdr_to_sg(sg, skb); | 486 | mhdr->num_buffers = 0; |
487 | |||
488 | /* Encode metadata header at front. */ | ||
489 | if (vi->mergeable_rx_bufs) | ||
490 | sg_init_one(sg, mhdr, sizeof(*mhdr)); | ||
491 | else | ||
492 | sg_init_one(sg, hdr, sizeof(*hdr)); | ||
493 | |||
365 | num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; | 494 | num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; |
366 | 495 | ||
367 | err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); | 496 | err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); |
@@ -551,6 +680,9 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
551 | || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) | 680 | || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) |
552 | vi->big_packets = true; | 681 | vi->big_packets = true; |
553 | 682 | ||
683 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) | ||
684 | vi->mergeable_rx_bufs = true; | ||
685 | |||
554 | /* We expect two virtqueues, receive then send. */ | 686 | /* We expect two virtqueues, receive then send. */ |
555 | vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); | 687 | vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); |
556 | if (IS_ERR(vi->rvq)) { | 688 | if (IS_ERR(vi->rvq)) { |
@@ -643,6 +775,7 @@ static unsigned int features[] = { | |||
643 | VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, | 775 | VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, |
644 | VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, | 776 | VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, |
645 | VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ | 777 | VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ |
778 | VIRTIO_NET_F_MRG_RXBUF, | ||
646 | VIRTIO_F_NOTIFY_ON_EMPTY, | 779 | VIRTIO_F_NOTIFY_ON_EMPTY, |
647 | }; | 780 | }; |
648 | 781 | ||
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5e33761b9b8a..5cdd0aa8bde9 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ | 20 | #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ |
21 | #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ | 21 | #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ |
22 | #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ | 22 | #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ |
23 | #define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ | ||
23 | 24 | ||
24 | struct virtio_net_config | 25 | struct virtio_net_config |
25 | { | 26 | { |
@@ -44,4 +45,12 @@ struct virtio_net_hdr | |||
44 | __u16 csum_start; /* Position to start checksumming from */ | 45 | __u16 csum_start; /* Position to start checksumming from */ |
45 | __u16 csum_offset; /* Offset after that to place checksum */ | 46 | __u16 csum_offset; /* Offset after that to place checksum */ |
46 | }; | 47 | }; |
48 | |||
49 | /* This is the version of the header to use when the MRG_RXBUF | ||
50 | * feature has been negotiated. */ | ||
51 | struct virtio_net_hdr_mrg_rxbuf { | ||
52 | struct virtio_net_hdr hdr; | ||
53 | __u16 num_buffers; /* Number of merged rx buffers */ | ||
54 | }; | ||
55 | |||
47 | #endif /* _LINUX_VIRTIO_NET_H */ | 56 | #endif /* _LINUX_VIRTIO_NET_H */ |