diff options
author | Ananda Raju <ananda.raju@neterion.com> | 2005-10-18 18:46:41 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@mandriva.com> | 2005-10-28 14:30:00 -0400 |
commit | e89e9cf539a28df7d0eb1d0a545368e9920b34ac (patch) | |
tree | aae6a825f351ce931fcd30f1a865ebe65227c4b8 /net/ipv4 | |
parent | de5144164f6242ccfa8c9b64eec570564f5eaf14 (diff) |
[IPv4/IPv6]: UFO Scatter-gather approach
Attached is kernel patch for UDP Fragmentation Offload (UFO) feature.
1. This patch incorporate the review comments by Jeff Garzik.
2. Renamed USO as UFO (UDP Fragmentation Offload)
3. udp sendfile support with UFO
This patches uses scatter-gather feature of skb to generate large UDP
datagram. Below is a "how-to" on changes required in network device
driver to use the UFO interface.
UDP Fragmentation Offload (UFO) Interface:
-------------------------------------------
UFO is a feature wherein the Linux kernel network stack will offload the
IP fragmentation functionality of large UDP datagram to hardware. This
will reduce the overhead of stack in fragmenting the large UDP datagram to
MTU sized packets
1) Drivers indicate their capability of UFO using
dev->features |= NETIF_F_UFO | NETIF_F_HW_CSUM | NETIF_F_SG
NETIF_F_HW_CSUM is required for UFO over ipv6.
2) UFO packet will be submitted for transmission using driver xmit routine.
UFO packet will have a non-zero value for
"skb_shinfo(skb)->ufo_size"
skb_shinfo(skb)->ufo_size will indicate the length of data part in each IP
fragment going out of the adapter after IP fragmentation by hardware.
skb->data will contain MAC/IP/UDP header and skb_shinfo(skb)->frags[]
contains the data payload. The skb->ip_summed will be set to CHECKSUM_HW
indicating that hardware has to do checksum calculation. Hardware should
compute the UDP checksum of complete datagram and also ip header checksum of
each fragmented IP packet.
For IPV6 the UFO provides the fragment identification-id in
skb_shinfo(skb)->ip6_frag_id. The adapter should use this ID for generating
IPv6 fragments.
Signed-off-by: Ananda Raju <ananda.raju@neterion.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (forwarded)
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/ip_output.c | 83 |
1 files changed, 78 insertions, 5 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 87e350069abb..17758234a3e3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb) | |||
275 | { | 275 | { |
276 | IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); | 276 | IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); |
277 | 277 | ||
278 | if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size) | 278 | if (skb->len > dst_mtu(skb->dst) && |
279 | !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) | ||
279 | return ip_fragment(skb, ip_finish_output); | 280 | return ip_fragment(skb, ip_finish_output); |
280 | else | 281 | else |
281 | return ip_finish_output(skb); | 282 | return ip_finish_output(skb); |
@@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy) | |||
688 | return csum; | 689 | return csum; |
689 | } | 690 | } |
690 | 691 | ||
692 | inline int ip_ufo_append_data(struct sock *sk, | ||
693 | int getfrag(void *from, char *to, int offset, int len, | ||
694 | int odd, struct sk_buff *skb), | ||
695 | void *from, int length, int hh_len, int fragheaderlen, | ||
696 | int transhdrlen, int mtu,unsigned int flags) | ||
697 | { | ||
698 | struct sk_buff *skb; | ||
699 | int err; | ||
700 | |||
701 | /* There is support for UDP fragmentation offload by network | ||
702 | * device, so create one single skb packet containing complete | ||
703 | * udp datagram | ||
704 | */ | ||
705 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | ||
706 | skb = sock_alloc_send_skb(sk, | ||
707 | hh_len + fragheaderlen + transhdrlen + 20, | ||
708 | (flags & MSG_DONTWAIT), &err); | ||
709 | |||
710 | if (skb == NULL) | ||
711 | return err; | ||
712 | |||
713 | /* reserve space for Hardware header */ | ||
714 | skb_reserve(skb, hh_len); | ||
715 | |||
716 | /* create space for UDP/IP header */ | ||
717 | skb_put(skb,fragheaderlen + transhdrlen); | ||
718 | |||
719 | /* initialize network header pointer */ | ||
720 | skb->nh.raw = skb->data; | ||
721 | |||
722 | /* initialize protocol header pointer */ | ||
723 | skb->h.raw = skb->data + fragheaderlen; | ||
724 | |||
725 | skb->ip_summed = CHECKSUM_HW; | ||
726 | skb->csum = 0; | ||
727 | sk->sk_sndmsg_off = 0; | ||
728 | } | ||
729 | |||
730 | err = skb_append_datato_frags(sk,skb, getfrag, from, | ||
731 | (length - transhdrlen)); | ||
732 | if (!err) { | ||
733 | /* specify the length of each IP datagram fragment*/ | ||
734 | skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); | ||
735 | __skb_queue_tail(&sk->sk_write_queue, skb); | ||
736 | |||
737 | return 0; | ||
738 | } | ||
739 | /* There is not enough support do UFO , | ||
740 | * so follow normal path | ||
741 | */ | ||
742 | kfree_skb(skb); | ||
743 | return err; | ||
744 | } | ||
745 | |||
691 | /* | 746 | /* |
692 | * ip_append_data() and ip_append_page() can make one large IP datagram | 747 | * ip_append_data() and ip_append_page() can make one large IP datagram |
693 | * from many pieces of data. Each pieces will be holded on the socket | 748 | * from many pieces of data. Each pieces will be holded on the socket |
@@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk, | |||
777 | csummode = CHECKSUM_HW; | 832 | csummode = CHECKSUM_HW; |
778 | 833 | ||
779 | inet->cork.length += length; | 834 | inet->cork.length += length; |
835 | if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && | ||
836 | (rt->u.dst.dev->features & NETIF_F_UFO)) { | ||
837 | |||
838 | if(ip_ufo_append_data(sk, getfrag, from, length, hh_len, | ||
839 | fragheaderlen, transhdrlen, mtu, flags)) | ||
840 | goto error; | ||
841 | |||
842 | return 0; | ||
843 | } | ||
780 | 844 | ||
781 | /* So, what's going on in the loop below? | 845 | /* So, what's going on in the loop below? |
782 | * | 846 | * |
@@ -1008,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1008 | return -EINVAL; | 1072 | return -EINVAL; |
1009 | 1073 | ||
1010 | inet->cork.length += size; | 1074 | inet->cork.length += size; |
1075 | if ((sk->sk_protocol == IPPROTO_UDP) && | ||
1076 | (rt->u.dst.dev->features & NETIF_F_UFO)) | ||
1077 | skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); | ||
1078 | |||
1011 | 1079 | ||
1012 | while (size > 0) { | 1080 | while (size > 0) { |
1013 | int i; | 1081 | int i; |
1014 | 1082 | ||
1015 | /* Check if the remaining data fits into current packet. */ | 1083 | if (skb_shinfo(skb)->ufo_size) |
1016 | len = mtu - skb->len; | 1084 | len = size; |
1017 | if (len < size) | 1085 | else { |
1018 | len = maxfraglen - skb->len; | 1086 | |
1087 | /* Check if the remaining data fits into current packet. */ | ||
1088 | len = mtu - skb->len; | ||
1089 | if (len < size) | ||
1090 | len = maxfraglen - skb->len; | ||
1091 | } | ||
1019 | if (len <= 0) { | 1092 | if (len <= 0) { |
1020 | struct sk_buff *skb_prev; | 1093 | struct sk_buff *skb_prev; |
1021 | char *data; | 1094 | char *data; |