diff options
author | Pravin B Shelar <pshelar@nicira.com> | 2013-02-14 09:02:41 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-02-15 15:17:11 -0500 |
commit | 68c331631143f5f039baac99a650e0b9e1ea02b6 (patch) | |
tree | c69d73c5599aab5e92a8c99bc5343c9fc9ffbbd8 /net/ipv4 | |
parent | 05e8ef4ab2d8087d360e814d14da20b9f7fb2283 (diff) |
v4 GRE: Add TCP segmentation offload for GRE
Following patch adds GRE protocol offload handler so that
skb_gso_segment() can segment GRE packets.
SKB GSO CB is added to keep track of total header length so that
skb_segment can push entire header. e.g. in case of GRE, skb_segment
need to push inner and outer headers to every segment.
New NETIF_F_GRE_GSO feature is added for devices which support HW
GRE TSO offload. Currently none of devices support it therefore GRE GSO
always fall backs to software GSO.
[ Compute pkt_len before ip_local_out() invocation. -DaveM ]
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/gre.c | 118 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 82 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 1 | ||||
-rw-r--r-- | net/ipv4/udp.c | 3 |
5 files changed, 197 insertions, 8 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e6e5d8506336..e225a4e5b572 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, | |||
1287 | SKB_GSO_UDP | | 1287 | SKB_GSO_UDP | |
1288 | SKB_GSO_DODGY | | 1288 | SKB_GSO_DODGY | |
1289 | SKB_GSO_TCP_ECN | | 1289 | SKB_GSO_TCP_ECN | |
1290 | SKB_GSO_GRE | | ||
1290 | 0))) | 1291 | 0))) |
1291 | goto out; | 1292 | goto out; |
1292 | 1293 | ||
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a491055c76..7a4c710c4cdd 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/in.h> | 19 | #include <linux/in.h> |
20 | #include <linux/ip.h> | 20 | #include <linux/ip.h> |
21 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> |
22 | #include <linux/if_tunnel.h> | ||
22 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
23 | #include <net/protocol.h> | 24 | #include <net/protocol.h> |
24 | #include <net/gre.h> | 25 | #include <net/gre.h> |
@@ -26,6 +27,11 @@ | |||
26 | 27 | ||
27 | static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; | 28 | static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; |
28 | static DEFINE_SPINLOCK(gre_proto_lock); | 29 | static DEFINE_SPINLOCK(gre_proto_lock); |
30 | struct gre_base_hdr { | ||
31 | __be16 flags; | ||
32 | __be16 protocol; | ||
33 | }; | ||
34 | #define GRE_HEADER_SECTION 4 | ||
29 | 35 | ||
30 | int gre_add_protocol(const struct gre_protocol *proto, u8 version) | 36 | int gre_add_protocol(const struct gre_protocol *proto, u8 version) |
31 | { | 37 | { |
@@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info) | |||
112 | rcu_read_unlock(); | 118 | rcu_read_unlock(); |
113 | } | 119 | } |
114 | 120 | ||
121 | static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | ||
122 | netdev_features_t features) | ||
123 | { | ||
124 | struct sk_buff *segs = ERR_PTR(-EINVAL); | ||
125 | netdev_features_t enc_features; | ||
126 | int ghl = GRE_HEADER_SECTION; | ||
127 | struct gre_base_hdr *greh; | ||
128 | int mac_len = skb->mac_len; | ||
129 | int tnl_hlen; | ||
130 | bool csum; | ||
131 | |||
132 | if (unlikely(skb_shinfo(skb)->gso_type & | ||
133 | ~(SKB_GSO_TCPV4 | | ||
134 | SKB_GSO_TCPV6 | | ||
135 | SKB_GSO_UDP | | ||
136 | SKB_GSO_DODGY | | ||
137 | SKB_GSO_TCP_ECN | | ||
138 | SKB_GSO_GRE))) | ||
139 | goto out; | ||
140 | |||
141 | if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) | ||
142 | goto out; | ||
143 | |||
144 | greh = (struct gre_base_hdr *)skb_transport_header(skb); | ||
145 | |||
146 | if (greh->flags & GRE_KEY) | ||
147 | ghl += GRE_HEADER_SECTION; | ||
148 | if (greh->flags & GRE_SEQ) | ||
149 | ghl += GRE_HEADER_SECTION; | ||
150 | if (greh->flags & GRE_CSUM) { | ||
151 | ghl += GRE_HEADER_SECTION; | ||
152 | csum = true; | ||
153 | } else | ||
154 | csum = false; | ||
155 | |||
156 | /* setup inner skb. */ | ||
157 | if (greh->protocol == htons(ETH_P_TEB)) { | ||
158 | struct ethhdr *eth = eth_hdr(skb); | ||
159 | skb->protocol = eth->h_proto; | ||
160 | } else { | ||
161 | skb->protocol = greh->protocol; | ||
162 | } | ||
163 | |||
164 | skb->encapsulation = 0; | ||
165 | |||
166 | if (unlikely(!pskb_may_pull(skb, ghl))) | ||
167 | goto out; | ||
168 | __skb_pull(skb, ghl); | ||
169 | skb_reset_mac_header(skb); | ||
170 | skb_set_network_header(skb, skb_inner_network_offset(skb)); | ||
171 | skb->mac_len = skb_inner_network_offset(skb); | ||
172 | |||
173 | /* segment inner packet. */ | ||
174 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); | ||
175 | segs = skb_mac_gso_segment(skb, enc_features); | ||
176 | if (!segs || IS_ERR(segs)) | ||
177 | goto out; | ||
178 | |||
179 | skb = segs; | ||
180 | tnl_hlen = skb_tnl_header_len(skb); | ||
181 | do { | ||
182 | __skb_push(skb, ghl); | ||
183 | if (csum) { | ||
184 | __be32 *pcsum; | ||
185 | |||
186 | if (skb_has_shared_frag(skb)) { | ||
187 | int err; | ||
188 | |||
189 | err = __skb_linearize(skb); | ||
190 | if (err) { | ||
191 | kfree_skb(segs); | ||
192 | segs = ERR_PTR(err); | ||
193 | goto out; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | greh = (struct gre_base_hdr *)(skb->data); | ||
198 | pcsum = (__be32 *)(greh + 1); | ||
199 | *pcsum = 0; | ||
200 | *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); | ||
201 | } | ||
202 | __skb_push(skb, tnl_hlen - ghl); | ||
203 | |||
204 | skb_reset_mac_header(skb); | ||
205 | skb_set_network_header(skb, mac_len); | ||
206 | skb->mac_len = mac_len; | ||
207 | } while ((skb = skb->next)); | ||
208 | out: | ||
209 | return segs; | ||
210 | } | ||
211 | |||
212 | static int gre_gso_send_check(struct sk_buff *skb) | ||
213 | { | ||
214 | if (!skb->encapsulation) | ||
215 | return -EINVAL; | ||
216 | return 0; | ||
217 | } | ||
218 | |||
115 | static const struct net_protocol net_gre_protocol = { | 219 | static const struct net_protocol net_gre_protocol = { |
116 | .handler = gre_rcv, | 220 | .handler = gre_rcv, |
117 | .err_handler = gre_err, | 221 | .err_handler = gre_err, |
118 | .netns_ok = 1, | 222 | .netns_ok = 1, |
119 | }; | 223 | }; |
120 | 224 | ||
225 | static const struct net_offload gre_offload = { | ||
226 | .callbacks = { | ||
227 | .gso_send_check = gre_gso_send_check, | ||
228 | .gso_segment = gre_gso_segment, | ||
229 | }, | ||
230 | }; | ||
231 | |||
121 | static int __init gre_init(void) | 232 | static int __init gre_init(void) |
122 | { | 233 | { |
123 | pr_info("GRE over IPv4 demultiplexor driver\n"); | 234 | pr_info("GRE over IPv4 demultiplexor driver\n"); |
@@ -127,11 +238,18 @@ static int __init gre_init(void) | |||
127 | return -EAGAIN; | 238 | return -EAGAIN; |
128 | } | 239 | } |
129 | 240 | ||
241 | if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { | ||
242 | pr_err("can't add protocol offload\n"); | ||
243 | inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); | ||
244 | return -EAGAIN; | ||
245 | } | ||
246 | |||
130 | return 0; | 247 | return 0; |
131 | } | 248 | } |
132 | 249 | ||
133 | static void __exit gre_exit(void) | 250 | static void __exit gre_exit(void) |
134 | { | 251 | { |
252 | inet_del_offload(&gre_offload, IPPROTO_GRE); | ||
135 | inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); | 253 | inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); |
136 | } | 254 | } |
137 | 255 | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 00a14b9864ea..a56f1182c176 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -735,8 +735,33 @@ drop: | |||
735 | return 0; | 735 | return 0; |
736 | } | 736 | } |
737 | 737 | ||
738 | static struct sk_buff *handle_offloads(struct sk_buff *skb) | ||
739 | { | ||
740 | int err; | ||
741 | |||
742 | if (skb_is_gso(skb)) { | ||
743 | err = skb_unclone(skb, GFP_ATOMIC); | ||
744 | if (unlikely(err)) | ||
745 | goto error; | ||
746 | skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; | ||
747 | return skb; | ||
748 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
749 | err = skb_checksum_help(skb); | ||
750 | if (unlikely(err)) | ||
751 | goto error; | ||
752 | } | ||
753 | skb->ip_summed = CHECKSUM_NONE; | ||
754 | |||
755 | return skb; | ||
756 | |||
757 | error: | ||
758 | kfree_skb(skb); | ||
759 | return ERR_PTR(err); | ||
760 | } | ||
761 | |||
738 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 762 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
739 | { | 763 | { |
764 | struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); | ||
740 | struct ip_tunnel *tunnel = netdev_priv(dev); | 765 | struct ip_tunnel *tunnel = netdev_priv(dev); |
741 | const struct iphdr *old_iph; | 766 | const struct iphdr *old_iph; |
742 | const struct iphdr *tiph; | 767 | const struct iphdr *tiph; |
@@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
751 | __be32 dst; | 776 | __be32 dst; |
752 | int mtu; | 777 | int mtu; |
753 | u8 ttl; | 778 | u8 ttl; |
779 | int err; | ||
780 | int pkt_len; | ||
754 | 781 | ||
755 | if (skb->ip_summed == CHECKSUM_PARTIAL && | 782 | skb = handle_offloads(skb); |
756 | skb_checksum_help(skb)) | 783 | if (IS_ERR(skb)) { |
757 | goto tx_error; | 784 | dev->stats.tx_dropped++; |
785 | return NETDEV_TX_OK; | ||
786 | } | ||
787 | |||
788 | if (!skb->encapsulation) { | ||
789 | skb_reset_inner_headers(skb); | ||
790 | skb->encapsulation = 1; | ||
791 | } | ||
758 | 792 | ||
759 | old_iph = ip_hdr(skb); | 793 | old_iph = ip_hdr(skb); |
760 | 794 | ||
@@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
855 | if (skb->protocol == htons(ETH_P_IP)) { | 889 | if (skb->protocol == htons(ETH_P_IP)) { |
856 | df |= (old_iph->frag_off&htons(IP_DF)); | 890 | df |= (old_iph->frag_off&htons(IP_DF)); |
857 | 891 | ||
858 | if ((old_iph->frag_off&htons(IP_DF)) && | 892 | if (!skb_is_gso(skb) && |
893 | (old_iph->frag_off&htons(IP_DF)) && | ||
859 | mtu < ntohs(old_iph->tot_len)) { | 894 | mtu < ntohs(old_iph->tot_len)) { |
860 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 895 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
861 | ip_rt_put(rt); | 896 | ip_rt_put(rt); |
@@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
875 | } | 910 | } |
876 | } | 911 | } |
877 | 912 | ||
878 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { | 913 | if (!skb_is_gso(skb) && |
914 | mtu >= IPV6_MIN_MTU && | ||
915 | mtu < skb->len - tunnel->hlen + gre_hlen) { | ||
879 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 916 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
880 | ip_rt_put(rt); | 917 | ip_rt_put(rt); |
881 | goto tx_error; | 918 | goto tx_error; |
@@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
936 | iph->daddr = fl4.daddr; | 973 | iph->daddr = fl4.daddr; |
937 | iph->saddr = fl4.saddr; | 974 | iph->saddr = fl4.saddr; |
938 | iph->ttl = ttl; | 975 | iph->ttl = ttl; |
976 | iph->id = 0; | ||
939 | 977 | ||
940 | if (ttl == 0) { | 978 | if (ttl == 0) { |
941 | if (skb->protocol == htons(ETH_P_IP)) | 979 | if (skb->protocol == htons(ETH_P_IP)) |
@@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
964 | *ptr = tunnel->parms.o_key; | 1002 | *ptr = tunnel->parms.o_key; |
965 | ptr--; | 1003 | ptr--; |
966 | } | 1004 | } |
967 | if (tunnel->parms.o_flags&GRE_CSUM) { | 1005 | /* Skip GRE checksum if skb is getting offloaded. */ |
1006 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && | ||
1007 | (tunnel->parms.o_flags&GRE_CSUM)) { | ||
968 | int offset = skb_transport_offset(skb); | 1008 | int offset = skb_transport_offset(skb); |
969 | 1009 | ||
1010 | if (skb_has_shared_frag(skb)) { | ||
1011 | err = __skb_linearize(skb); | ||
1012 | if (err) { | ||
1013 | ip_rt_put(rt); | ||
1014 | goto tx_error; | ||
1015 | } | ||
1016 | } | ||
1017 | |||
970 | *ptr = 0; | 1018 | *ptr = 0; |
971 | *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, | 1019 | *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, |
972 | skb->len - offset, | 1020 | skb->len - offset, |
@@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
974 | } | 1022 | } |
975 | } | 1023 | } |
976 | 1024 | ||
977 | iptunnel_xmit(skb, dev); | 1025 | nf_reset(skb); |
1026 | |||
1027 | pkt_len = skb->len - skb_transport_offset(skb); | ||
1028 | err = ip_local_out(skb); | ||
1029 | if (likely(net_xmit_eval(err) == 0)) { | ||
1030 | u64_stats_update_begin(&tstats->syncp); | ||
1031 | tstats->tx_bytes += pkt_len; | ||
1032 | tstats->tx_packets++; | ||
1033 | u64_stats_update_end(&tstats->syncp); | ||
1034 | } else { | ||
1035 | dev->stats.tx_errors++; | ||
1036 | dev->stats.tx_aborted_errors++; | ||
1037 | } | ||
978 | return NETDEV_TX_OK; | 1038 | return NETDEV_TX_OK; |
979 | 1039 | ||
980 | #if IS_ENABLED(CONFIG_IPV6) | 1040 | #if IS_ENABLED(CONFIG_IPV6) |
@@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
1044 | mtu = 68; | 1104 | mtu = 68; |
1045 | 1105 | ||
1046 | tunnel->hlen = addend; | 1106 | tunnel->hlen = addend; |
1107 | /* TCP offload with GRE SEQ is not supported. */ | ||
1108 | if (!(tunnel->parms.o_flags & GRE_SEQ)) { | ||
1109 | dev->features |= NETIF_F_GSO_SOFTWARE; | ||
1110 | dev->hw_features |= NETIF_F_GSO_SOFTWARE; | ||
1111 | } | ||
1047 | 1112 | ||
1048 | return mtu; | 1113 | return mtu; |
1049 | } | 1114 | } |
@@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev) | |||
1593 | 1658 | ||
1594 | dev->iflink = 0; | 1659 | dev->iflink = 0; |
1595 | dev->features |= NETIF_F_NETNS_LOCAL; | 1660 | dev->features |= NETIF_F_NETNS_LOCAL; |
1661 | |||
1662 | dev->features |= GRE_FEATURES; | ||
1663 | dev->hw_features |= GRE_FEATURES; | ||
1596 | } | 1664 | } |
1597 | 1665 | ||
1598 | static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], | 1666 | static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f0bedb8622f..7a5ba48c2cc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | |||
3043 | SKB_GSO_DODGY | | 3043 | SKB_GSO_DODGY | |
3044 | SKB_GSO_TCP_ECN | | 3044 | SKB_GSO_TCP_ECN | |
3045 | SKB_GSO_TCPV6 | | 3045 | SKB_GSO_TCPV6 | |
3046 | SKB_GSO_GRE | | ||
3046 | 0) || | 3047 | 0) || |
3047 | !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) | 3048 | !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) |
3048 | goto out; | 3049 | goto out; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6791aac06ea9..39a5e7a9a77f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | |||
2305 | /* Packet is from an untrusted source, reset gso_segs. */ | 2305 | /* Packet is from an untrusted source, reset gso_segs. */ |
2306 | int type = skb_shinfo(skb)->gso_type; | 2306 | int type = skb_shinfo(skb)->gso_type; |
2307 | 2307 | ||
2308 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || | 2308 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | |
2309 | SKB_GSO_GRE) || | ||
2309 | !(type & (SKB_GSO_UDP)))) | 2310 | !(type & (SKB_GSO_UDP)))) |
2310 | goto out; | 2311 | goto out; |
2311 | 2312 | ||