diff options
author | Arnd Bergmann <arnd@arndb.de> | 2010-02-18 00:48:17 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-02-18 17:08:38 -0500 |
commit | b9fb9ee07e67fce0b7bfd517a48710465706c30a (patch) | |
tree | 10dc1f67a21f2f065f603adf2fc7f86fd009a758 | |
parent | 501c774cb13c3ef8fb7fc5f08fa19473f7d9a0db (diff) |
macvtap: add GSO/csum offload support
Added flags field to macvtap_queue to enable/disable processing of
virtio_net_hdr via IFF_VNET_HDR. This flag is checked to prepend virtio_net_hdr
in the receive path and process/skip virtio_net_hdr in the send path.
Original patch by Sridhar, further changes by Arnd.
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/macvtap.c | 206 |
1 files changed, 182 insertions, 24 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index e354501ab297..55ceae09738e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <net/net_namespace.h> | 17 | #include <net/net_namespace.h> |
18 | #include <net/rtnetlink.h> | 18 | #include <net/rtnetlink.h> |
19 | #include <net/sock.h> | 19 | #include <net/sock.h> |
20 | #include <linux/virtio_net.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * A macvtap queue is the central object of this driver, it connects | 23 | * A macvtap queue is the central object of this driver, it connects |
@@ -37,6 +38,7 @@ struct macvtap_queue { | |||
37 | struct socket sock; | 38 | struct socket sock; |
38 | struct macvlan_dev *vlan; | 39 | struct macvlan_dev *vlan; |
39 | struct file *file; | 40 | struct file *file; |
41 | unsigned int flags; | ||
40 | }; | 42 | }; |
41 | 43 | ||
42 | static struct proto macvtap_proto = { | 44 | static struct proto macvtap_proto = { |
@@ -276,6 +278,7 @@ static int macvtap_open(struct inode *inode, struct file *file) | |||
276 | q->sock.ops = &macvtap_socket_ops; | 278 | q->sock.ops = &macvtap_socket_ops; |
277 | sock_init_data(&q->sock, &q->sk); | 279 | sock_init_data(&q->sock, &q->sk); |
278 | q->sk.sk_write_space = macvtap_sock_write_space; | 280 | q->sk.sk_write_space = macvtap_sock_write_space; |
281 | q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; | ||
279 | 282 | ||
280 | err = macvtap_set_queue(dev, file, q); | 283 | err = macvtap_set_queue(dev, file, q); |
281 | if (err) | 284 | if (err) |
@@ -318,6 +321,111 @@ out: | |||
318 | return mask; | 321 | return mask; |
319 | } | 322 | } |
320 | 323 | ||
324 | static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, | ||
325 | size_t len, size_t linear, | ||
326 | int noblock, int *err) | ||
327 | { | ||
328 | struct sk_buff *skb; | ||
329 | |||
330 | /* Under a page? Don't bother with paged skb. */ | ||
331 | if (prepad + len < PAGE_SIZE || !linear) | ||
332 | linear = len; | ||
333 | |||
334 | skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, | ||
335 | err); | ||
336 | if (!skb) | ||
337 | return NULL; | ||
338 | |||
339 | skb_reserve(skb, prepad); | ||
340 | skb_put(skb, linear); | ||
341 | skb->data_len = len - linear; | ||
342 | skb->len += len - linear; | ||
343 | |||
344 | return skb; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should | ||
349 | * be shared with the tun/tap driver. | ||
350 | */ | ||
351 | static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, | ||
352 | struct virtio_net_hdr *vnet_hdr) | ||
353 | { | ||
354 | unsigned short gso_type = 0; | ||
355 | if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { | ||
356 | switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { | ||
357 | case VIRTIO_NET_HDR_GSO_TCPV4: | ||
358 | gso_type = SKB_GSO_TCPV4; | ||
359 | break; | ||
360 | case VIRTIO_NET_HDR_GSO_TCPV6: | ||
361 | gso_type = SKB_GSO_TCPV6; | ||
362 | break; | ||
363 | case VIRTIO_NET_HDR_GSO_UDP: | ||
364 | gso_type = SKB_GSO_UDP; | ||
365 | break; | ||
366 | default: | ||
367 | return -EINVAL; | ||
368 | } | ||
369 | |||
370 | if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) | ||
371 | gso_type |= SKB_GSO_TCP_ECN; | ||
372 | |||
373 | if (vnet_hdr->gso_size == 0) | ||
374 | return -EINVAL; | ||
375 | } | ||
376 | |||
377 | if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { | ||
378 | if (!skb_partial_csum_set(skb, vnet_hdr->csum_start, | ||
379 | vnet_hdr->csum_offset)) | ||
380 | return -EINVAL; | ||
381 | } | ||
382 | |||
383 | if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { | ||
384 | skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; | ||
385 | skb_shinfo(skb)->gso_type = gso_type; | ||
386 | |||
387 | /* Header must be checked, and gso_segs computed. */ | ||
388 | skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | ||
389 | skb_shinfo(skb)->gso_segs = 0; | ||
390 | } | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, | ||
395 | struct virtio_net_hdr *vnet_hdr) | ||
396 | { | ||
397 | memset(vnet_hdr, 0, sizeof(*vnet_hdr)); | ||
398 | |||
399 | if (skb_is_gso(skb)) { | ||
400 | struct skb_shared_info *sinfo = skb_shinfo(skb); | ||
401 | |||
402 | /* This is a hint as to how much should be linear. */ | ||
403 | vnet_hdr->hdr_len = skb_headlen(skb); | ||
404 | vnet_hdr->gso_size = sinfo->gso_size; | ||
405 | if (sinfo->gso_type & SKB_GSO_TCPV4) | ||
406 | vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; | ||
407 | else if (sinfo->gso_type & SKB_GSO_TCPV6) | ||
408 | vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; | ||
409 | else if (sinfo->gso_type & SKB_GSO_UDP) | ||
410 | vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; | ||
411 | else | ||
412 | BUG(); | ||
413 | if (sinfo->gso_type & SKB_GSO_TCP_ECN) | ||
414 | vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; | ||
415 | } else | ||
416 | vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; | ||
417 | |||
418 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
419 | vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | ||
420 | vnet_hdr->csum_start = skb->csum_start - | ||
421 | skb_headroom(skb); | ||
422 | vnet_hdr->csum_offset = skb->csum_offset; | ||
423 | } /* else everything is zero */ | ||
424 | |||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | |||
321 | /* Get packet from user space buffer */ | 429 | /* Get packet from user space buffer */ |
322 | static ssize_t macvtap_get_user(struct macvtap_queue *q, | 430 | static ssize_t macvtap_get_user(struct macvtap_queue *q, |
323 | const struct iovec *iv, size_t count, | 431 | const struct iovec *iv, size_t count, |
@@ -327,22 +435,53 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, | |||
327 | struct macvlan_dev *vlan; | 435 | struct macvlan_dev *vlan; |
328 | size_t len = count; | 436 | size_t len = count; |
329 | int err; | 437 | int err; |
438 | struct virtio_net_hdr vnet_hdr = { 0 }; | ||
439 | int vnet_hdr_len = 0; | ||
440 | |||
441 | if (q->flags & IFF_VNET_HDR) { | ||
442 | vnet_hdr_len = sizeof(vnet_hdr); | ||
443 | |||
444 | err = -EINVAL; | ||
445 | if ((len -= vnet_hdr_len) < 0) | ||
446 | goto err; | ||
447 | |||
448 | err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, | ||
449 | vnet_hdr_len); | ||
450 | if (err < 0) | ||
451 | goto err; | ||
452 | if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && | ||
453 | vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > | ||
454 | vnet_hdr.hdr_len) | ||
455 | vnet_hdr.hdr_len = vnet_hdr.csum_start + | ||
456 | vnet_hdr.csum_offset + 2; | ||
457 | err = -EINVAL; | ||
458 | if (vnet_hdr.hdr_len > len) | ||
459 | goto err; | ||
460 | } | ||
330 | 461 | ||
462 | err = -EINVAL; | ||
331 | if (unlikely(len < ETH_HLEN)) | 463 | if (unlikely(len < ETH_HLEN)) |
332 | return -EINVAL; | 464 | goto err; |
333 | 465 | ||
334 | skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err); | 466 | skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len, |
467 | noblock, &err); | ||
335 | if (!skb) | 468 | if (!skb) |
336 | goto err; | 469 | goto err; |
337 | 470 | ||
338 | skb_reserve(skb, NET_IP_ALIGN); | 471 | err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); |
339 | skb_put(skb, count); | ||
340 | |||
341 | err = skb_copy_datagram_from_iovec(skb, 0, iv, 0, len); | ||
342 | if (err) | 472 | if (err) |
343 | goto err; | 473 | goto err_kfree; |
344 | 474 | ||
345 | skb_set_network_header(skb, ETH_HLEN); | 475 | skb_set_network_header(skb, ETH_HLEN); |
476 | skb_reset_mac_header(skb); | ||
477 | skb->protocol = eth_hdr(skb)->h_proto; | ||
478 | |||
479 | if (vnet_hdr_len) { | ||
480 | err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr); | ||
481 | if (err) | ||
482 | goto err_kfree; | ||
483 | } | ||
484 | |||
346 | rcu_read_lock_bh(); | 485 | rcu_read_lock_bh(); |
347 | vlan = rcu_dereference(q->vlan); | 486 | vlan = rcu_dereference(q->vlan); |
348 | if (vlan) | 487 | if (vlan) |
@@ -353,15 +492,16 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, | |||
353 | 492 | ||
354 | return count; | 493 | return count; |
355 | 494 | ||
495 | err_kfree: | ||
496 | kfree_skb(skb); | ||
497 | |||
356 | err: | 498 | err: |
357 | rcu_read_lock_bh(); | 499 | rcu_read_lock_bh(); |
358 | vlan = rcu_dereference(q->vlan); | 500 | vlan = rcu_dereference(q->vlan); |
359 | if (vlan) | 501 | if (vlan) |
360 | macvlan_count_rx(q->vlan, 0, false, false); | 502 | netdev_get_tx_queue(vlan->dev, 0)->tx_dropped++; |
361 | rcu_read_unlock_bh(); | 503 | rcu_read_unlock_bh(); |
362 | 504 | ||
363 | kfree_skb(skb); | ||
364 | |||
365 | return err; | 505 | return err; |
366 | } | 506 | } |
367 | 507 | ||
@@ -384,10 +524,25 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, | |||
384 | { | 524 | { |
385 | struct macvlan_dev *vlan; | 525 | struct macvlan_dev *vlan; |
386 | int ret; | 526 | int ret; |
527 | int vnet_hdr_len = 0; | ||
528 | |||
529 | if (q->flags & IFF_VNET_HDR) { | ||
530 | struct virtio_net_hdr vnet_hdr; | ||
531 | vnet_hdr_len = sizeof (vnet_hdr); | ||
532 | if ((len -= vnet_hdr_len) < 0) | ||
533 | return -EINVAL; | ||
534 | |||
535 | ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); | ||
536 | if (ret) | ||
537 | return ret; | ||
538 | |||
539 | if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, vnet_hdr_len)) | ||
540 | return -EFAULT; | ||
541 | } | ||
387 | 542 | ||
388 | len = min_t(int, skb->len, len); | 543 | len = min_t(int, skb->len, len); |
389 | 544 | ||
390 | ret = skb_copy_datagram_const_iovec(skb, 0, iv, 0, len); | 545 | ret = skb_copy_datagram_const_iovec(skb, 0, iv, vnet_hdr_len, len); |
391 | 546 | ||
392 | rcu_read_lock_bh(); | 547 | rcu_read_lock_bh(); |
393 | vlan = rcu_dereference(q->vlan); | 548 | vlan = rcu_dereference(q->vlan); |
@@ -395,7 +550,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, | |||
395 | macvlan_count_rx(vlan, len, ret == 0, 0); | 550 | macvlan_count_rx(vlan, len, ret == 0, 0); |
396 | rcu_read_unlock_bh(); | 551 | rcu_read_unlock_bh(); |
397 | 552 | ||
398 | return ret ? ret : len; | 553 | return ret ? ret : (len + vnet_hdr_len); |
399 | } | 554 | } |
400 | 555 | ||
401 | static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, | 556 | static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, |
@@ -473,9 +628,14 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, | |||
473 | /* ignore the name, just look at flags */ | 628 | /* ignore the name, just look at flags */ |
474 | if (get_user(u, &ifr->ifr_flags)) | 629 | if (get_user(u, &ifr->ifr_flags)) |
475 | return -EFAULT; | 630 | return -EFAULT; |
476 | if (u != (IFF_TAP | IFF_NO_PI)) | 631 | |
477 | return -EINVAL; | 632 | ret = 0; |
478 | return 0; | 633 | if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) |
634 | ret = -EINVAL; | ||
635 | else | ||
636 | q->flags = u; | ||
637 | |||
638 | return ret; | ||
479 | 639 | ||
480 | case TUNGETIFF: | 640 | case TUNGETIFF: |
481 | rcu_read_lock_bh(); | 641 | rcu_read_lock_bh(); |
@@ -489,13 +649,13 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, | |||
489 | 649 | ||
490 | ret = 0; | 650 | ret = 0; |
491 | if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || | 651 | if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || |
492 | put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags)) | 652 | put_user(q->flags, &ifr->ifr_flags)) |
493 | ret = -EFAULT; | 653 | ret = -EFAULT; |
494 | dev_put(vlan->dev); | 654 | dev_put(vlan->dev); |
495 | return ret; | 655 | return ret; |
496 | 656 | ||
497 | case TUNGETFEATURES: | 657 | case TUNGETFEATURES: |
498 | if (put_user((IFF_TAP | IFF_NO_PI), up)) | 658 | if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) |
499 | return -EFAULT; | 659 | return -EFAULT; |
500 | return 0; | 660 | return 0; |
501 | 661 | ||
@@ -509,15 +669,13 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, | |||
509 | case TUNSETOFFLOAD: | 669 | case TUNSETOFFLOAD: |
510 | /* let the user check for future flags */ | 670 | /* let the user check for future flags */ |
511 | if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | | 671 | if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | |
512 | TUN_F_TSO_ECN | TUN_F_UFO)) | 672 | TUN_F_TSO_ECN | TUN_F_UFO)) |
513 | return -EINVAL; | ||
514 | |||
515 | /* TODO: add support for these, so far we don't | ||
516 | support any offload */ | ||
517 | if (arg & (TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | | ||
518 | TUN_F_TSO_ECN | TUN_F_UFO)) | ||
519 | return -EINVAL; | 673 | return -EINVAL; |
520 | 674 | ||
675 | /* TODO: only accept frames with the features that | ||
676 | got enabled for forwarded frames */ | ||
677 | if (!(q->flags & IFF_VNET_HDR)) | ||
678 | return -EINVAL; | ||
521 | return 0; | 679 | return 0; |
522 | 680 | ||
523 | default: | 681 | default: |