diff options
author | Wei-Chun Chao <weichunc@plumgrid.com> | 2013-12-26 16:10:22 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-01-02 19:06:47 -0500 |
commit | 7a7ffbabf99445704be01bff5d7e360da908cf8e (patch) | |
tree | 3b4f71699307ac2853a8fdffde60fb7055cb1b04 | |
parent | 619a60ee04be33238721a15c1f9704a2a515a33e (diff) |
ipv4: fix tunneled VM traffic over hw VXLAN/GRE GSO NIC
VM to VM GSO traffic is broken if it goes through VXLAN or GRE
tunnel and the physical NIC on the host supports hardware VXLAN/GRE
GSO offload (e.g. bnx2x and next-gen mlx4).
Two issues -
(VXLAN) VM traffic has SKB_GSO_DODGY and SKB_GSO_UDP_TUNNEL with
SKB_GSO_TCP/UDP set depending on the inner protocol. GSO header
integrity check fails in udp4_ufo_fragment if inner protocol is
TCP. Also gso_segs is calculated incorrectly using skb->len that
includes tunnel header. Fix: robust check should only be applied
to the inner packet.
(VXLAN & GRE) Once GSO header integrity check passes, NULL segs
is returned and the original skb is sent to hardware. However the
tunnel header is already pulled. Fix: tunnel header needs to be
restored so that hardware can perform GSO properly on the original
packet.
Signed-off-by: Wei-Chun Chao <weichunc@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 13 | ||||
-rw-r--r-- | net/ipv4/gre_offload.c | 11 | ||||
-rw-r--r-- | net/ipv4/udp.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 37 |
4 files changed, 44 insertions, 23 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7514b9c37a39..5faaadb0c74f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -3017,6 +3017,19 @@ static inline void netif_set_gso_max_size(struct net_device *dev, | |||
3017 | dev->gso_max_size = size; | 3017 | dev->gso_max_size = size; |
3018 | } | 3018 | } |
3019 | 3019 | ||
3020 | static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, | ||
3021 | int pulled_hlen, u16 mac_offset, | ||
3022 | int mac_len) | ||
3023 | { | ||
3024 | skb->protocol = protocol; | ||
3025 | skb->encapsulation = 1; | ||
3026 | skb_push(skb, pulled_hlen); | ||
3027 | skb_reset_transport_header(skb); | ||
3028 | skb->mac_header = mac_offset; | ||
3029 | skb->network_header = skb->mac_header + mac_len; | ||
3030 | skb->mac_len = mac_len; | ||
3031 | } | ||
3032 | |||
3020 | static inline bool netif_is_macvlan(struct net_device *dev) | 3033 | static inline bool netif_is_macvlan(struct net_device *dev) |
3021 | { | 3034 | { |
3022 | return dev->priv_flags & IFF_MACVLAN; | 3035 | return dev->priv_flags & IFF_MACVLAN; |
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e5d436188464..2cd02f32f99f 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c | |||
@@ -28,6 +28,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | |||
28 | netdev_features_t enc_features; | 28 | netdev_features_t enc_features; |
29 | int ghl = GRE_HEADER_SECTION; | 29 | int ghl = GRE_HEADER_SECTION; |
30 | struct gre_base_hdr *greh; | 30 | struct gre_base_hdr *greh; |
31 | u16 mac_offset = skb->mac_header; | ||
31 | int mac_len = skb->mac_len; | 32 | int mac_len = skb->mac_len; |
32 | __be16 protocol = skb->protocol; | 33 | __be16 protocol = skb->protocol; |
33 | int tnl_hlen; | 34 | int tnl_hlen; |
@@ -58,13 +59,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | |||
58 | } else | 59 | } else |
59 | csum = false; | 60 | csum = false; |
60 | 61 | ||
62 | if (unlikely(!pskb_may_pull(skb, ghl))) | ||
63 | goto out; | ||
64 | |||
61 | /* setup inner skb. */ | 65 | /* setup inner skb. */ |
62 | skb->protocol = greh->protocol; | 66 | skb->protocol = greh->protocol; |
63 | skb->encapsulation = 0; | 67 | skb->encapsulation = 0; |
64 | 68 | ||
65 | if (unlikely(!pskb_may_pull(skb, ghl))) | ||
66 | goto out; | ||
67 | |||
68 | __skb_pull(skb, ghl); | 69 | __skb_pull(skb, ghl); |
69 | skb_reset_mac_header(skb); | 70 | skb_reset_mac_header(skb); |
70 | skb_set_network_header(skb, skb_inner_network_offset(skb)); | 71 | skb_set_network_header(skb, skb_inner_network_offset(skb)); |
@@ -73,8 +74,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | |||
73 | /* segment inner packet. */ | 74 | /* segment inner packet. */ |
74 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); | 75 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); |
75 | segs = skb_mac_gso_segment(skb, enc_features); | 76 | segs = skb_mac_gso_segment(skb, enc_features); |
76 | if (!segs || IS_ERR(segs)) | 77 | if (!segs || IS_ERR(segs)) { |
78 | skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); | ||
77 | goto out; | 79 | goto out; |
80 | } | ||
78 | 81 | ||
79 | skb = segs; | 82 | skb = segs; |
80 | tnl_hlen = skb_tnl_header_len(skb); | 83 | tnl_hlen = skb_tnl_header_len(skb); |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f140048334ce..a7e4729e974b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -2478,6 +2478,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | |||
2478 | netdev_features_t features) | 2478 | netdev_features_t features) |
2479 | { | 2479 | { |
2480 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2480 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2481 | u16 mac_offset = skb->mac_header; | ||
2481 | int mac_len = skb->mac_len; | 2482 | int mac_len = skb->mac_len; |
2482 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); | 2483 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); |
2483 | __be16 protocol = skb->protocol; | 2484 | __be16 protocol = skb->protocol; |
@@ -2497,8 +2498,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | |||
2497 | /* segment inner packet. */ | 2498 | /* segment inner packet. */ |
2498 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); | 2499 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); |
2499 | segs = skb_mac_gso_segment(skb, enc_features); | 2500 | segs = skb_mac_gso_segment(skb, enc_features); |
2500 | if (!segs || IS_ERR(segs)) | 2501 | if (!segs || IS_ERR(segs)) { |
2502 | skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, | ||
2503 | mac_len); | ||
2501 | goto out; | 2504 | goto out; |
2505 | } | ||
2502 | 2506 | ||
2503 | outer_hlen = skb_tnl_header_len(skb); | 2507 | outer_hlen = skb_tnl_header_len(skb); |
2504 | skb = segs; | 2508 | skb = segs; |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 83206de2bc76..79c62bdcd3c5 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c | |||
@@ -41,6 +41,14 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | |||
41 | { | 41 | { |
42 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 42 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
43 | unsigned int mss; | 43 | unsigned int mss; |
44 | int offset; | ||
45 | __wsum csum; | ||
46 | |||
47 | if (skb->encapsulation && | ||
48 | skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { | ||
49 | segs = skb_udp_tunnel_segment(skb, features); | ||
50 | goto out; | ||
51 | } | ||
44 | 52 | ||
45 | mss = skb_shinfo(skb)->gso_size; | 53 | mss = skb_shinfo(skb)->gso_size; |
46 | if (unlikely(skb->len <= mss)) | 54 | if (unlikely(skb->len <= mss)) |
@@ -63,27 +71,20 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | |||
63 | goto out; | 71 | goto out; |
64 | } | 72 | } |
65 | 73 | ||
74 | /* Do software UFO. Complete and fill in the UDP checksum as | ||
75 | * HW cannot do checksum of UDP packets sent as multiple | ||
76 | * IP fragments. | ||
77 | */ | ||
78 | offset = skb_checksum_start_offset(skb); | ||
79 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
80 | offset += skb->csum_offset; | ||
81 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | ||
82 | skb->ip_summed = CHECKSUM_NONE; | ||
83 | |||
66 | /* Fragment the skb. IP headers of the fragments are updated in | 84 | /* Fragment the skb. IP headers of the fragments are updated in |
67 | * inet_gso_segment() | 85 | * inet_gso_segment() |
68 | */ | 86 | */ |
69 | if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) | 87 | segs = skb_segment(skb, features); |
70 | segs = skb_udp_tunnel_segment(skb, features); | ||
71 | else { | ||
72 | int offset; | ||
73 | __wsum csum; | ||
74 | |||
75 | /* Do software UFO. Complete and fill in the UDP checksum as | ||
76 | * HW cannot do checksum of UDP packets sent as multiple | ||
77 | * IP fragments. | ||
78 | */ | ||
79 | offset = skb_checksum_start_offset(skb); | ||
80 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
81 | offset += skb->csum_offset; | ||
82 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | ||
83 | skb->ip_summed = CHECKSUM_NONE; | ||
84 | |||
85 | segs = skb_segment(skb, features); | ||
86 | } | ||
87 | out: | 88 | out: |
88 | return segs; | 89 | return segs; |
89 | } | 90 | } |