aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
authorAnanda Raju <ananda.raju@neterion.com>2005-10-18 18:46:41 -0400
committerArnaldo Carvalho de Melo <acme@mandriva.com>2005-10-28 14:30:00 -0400
commite89e9cf539a28df7d0eb1d0a545368e9920b34ac (patch)
treeaae6a825f351ce931fcd30f1a865ebe65227c4b8 /net/ipv4/ip_output.c
parentde5144164f6242ccfa8c9b64eec570564f5eaf14 (diff)
[IPv4/IPv6]: UFO Scatter-gather approach
Attached is kernel patch for UDP Fragmentation Offload (UFO) feature. 1. This patch incorporate the review comments by Jeff Garzik. 2. Renamed USO as UFO (UDP Fragmentation Offload) 3. udp sendfile support with UFO This patches uses scatter-gather feature of skb to generate large UDP datagram. Below is a "how-to" on changes required in network device driver to use the UFO interface. UDP Fragmentation Offload (UFO) Interface: ------------------------------------------- UFO is a feature wherein the Linux kernel network stack will offload the IP fragmentation functionality of large UDP datagram to hardware. This will reduce the overhead of stack in fragmenting the large UDP datagram to MTU sized packets 1) Drivers indicate their capability of UFO using dev->features |= NETIF_F_UFO | NETIF_F_HW_CSUM | NETIF_F_SG NETIF_F_HW_CSUM is required for UFO over ipv6. 2) UFO packet will be submitted for transmission using driver xmit routine. UFO packet will have a non-zero value for "skb_shinfo(skb)->ufo_size" skb_shinfo(skb)->ufo_size will indicate the length of data part in each IP fragment going out of the adapter after IP fragmentation by hardware. skb->data will contain MAC/IP/UDP header and skb_shinfo(skb)->frags[] contains the data payload. The skb->ip_summed will be set to CHECKSUM_HW indicating that hardware has to do checksum calculation. Hardware should compute the UDP checksum of complete datagram and also ip header checksum of each fragmented IP packet. For IPV6 the UFO provides the fragment identification-id in skb_shinfo(skb)->ip6_frag_id. The adapter should use this ID for generating IPv6 fragments. Signed-off-by: Ananda Raju <ananda.raju@neterion.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (forwarded) Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c83
1 files changed, 78 insertions, 5 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 87e350069abb..17758234a3e3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
275{ 275{
276 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 276 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
277 277
278 if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size) 278 if (skb->len > dst_mtu(skb->dst) &&
279 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
279 return ip_fragment(skb, ip_finish_output); 280 return ip_fragment(skb, ip_finish_output);
280 else 281 else
281 return ip_finish_output(skb); 282 return ip_finish_output(skb);
@@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy)
688 return csum; 689 return csum;
689} 690}
690 691
692inline int ip_ufo_append_data(struct sock *sk,
693 int getfrag(void *from, char *to, int offset, int len,
694 int odd, struct sk_buff *skb),
695 void *from, int length, int hh_len, int fragheaderlen,
696 int transhdrlen, int mtu,unsigned int flags)
697{
698 struct sk_buff *skb;
699 int err;
700
701 /* There is support for UDP fragmentation offload by network
702 * device, so create one single skb packet containing complete
703 * udp datagram
704 */
705 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
706 skb = sock_alloc_send_skb(sk,
707 hh_len + fragheaderlen + transhdrlen + 20,
708 (flags & MSG_DONTWAIT), &err);
709
710 if (skb == NULL)
711 return err;
712
713 /* reserve space for Hardware header */
714 skb_reserve(skb, hh_len);
715
716 /* create space for UDP/IP header */
717 skb_put(skb,fragheaderlen + transhdrlen);
718
719 /* initialize network header pointer */
720 skb->nh.raw = skb->data;
721
722 /* initialize protocol header pointer */
723 skb->h.raw = skb->data + fragheaderlen;
724
725 skb->ip_summed = CHECKSUM_HW;
726 skb->csum = 0;
727 sk->sk_sndmsg_off = 0;
728 }
729
730 err = skb_append_datato_frags(sk,skb, getfrag, from,
731 (length - transhdrlen));
732 if (!err) {
733 /* specify the length of each IP datagram fragment*/
734 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
735 __skb_queue_tail(&sk->sk_write_queue, skb);
736
737 return 0;
738 }
739 /* There is not enough support do UFO ,
740 * so follow normal path
741 */
742 kfree_skb(skb);
743 return err;
744}
745
691/* 746/*
692 * ip_append_data() and ip_append_page() can make one large IP datagram 747 * ip_append_data() and ip_append_page() can make one large IP datagram
693 * from many pieces of data. Each pieces will be holded on the socket 748 * from many pieces of data. Each pieces will be holded on the socket
@@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk,
777 csummode = CHECKSUM_HW; 832 csummode = CHECKSUM_HW;
778 833
779 inet->cork.length += length; 834 inet->cork.length += length;
835 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
836 (rt->u.dst.dev->features & NETIF_F_UFO)) {
837
838 if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
839 fragheaderlen, transhdrlen, mtu, flags))
840 goto error;
841
842 return 0;
843 }
780 844
781 /* So, what's going on in the loop below? 845 /* So, what's going on in the loop below?
782 * 846 *
@@ -1008,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1008 return -EINVAL; 1072 return -EINVAL;
1009 1073
1010 inet->cork.length += size; 1074 inet->cork.length += size;
1075 if ((sk->sk_protocol == IPPROTO_UDP) &&
1076 (rt->u.dst.dev->features & NETIF_F_UFO))
1077 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
1078
1011 1079
1012 while (size > 0) { 1080 while (size > 0) {
1013 int i; 1081 int i;
1014 1082
1015 /* Check if the remaining data fits into current packet. */ 1083 if (skb_shinfo(skb)->ufo_size)
1016 len = mtu - skb->len; 1084 len = size;
1017 if (len < size) 1085 else {
1018 len = maxfraglen - skb->len; 1086
1087 /* Check if the remaining data fits into current packet. */
1088 len = mtu - skb->len;
1089 if (len < size)
1090 len = maxfraglen - skb->len;
1091 }
1019 if (len <= 0) { 1092 if (len <= 0) {
1020 struct sk_buff *skb_prev; 1093 struct sk_buff *skb_prev;
1021 char *data; 1094 char *data;