diff options
author | Ed Swierk <eswierk@skyportsystems.com> | 2018-01-31 21:48:02 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-02-01 09:46:22 -0500 |
commit | 9382fe71c0058465e942a633869629929102843d (patch) | |
tree | a67c6d0859748ef1a443189f08113e2b855a691b | |
parent | 3aff3b4b986e51bcf4ab249e5d48d39596e0df6a (diff) |
openvswitch: Remove padding from packet before L3+ conntrack processing
IPv4 and IPv6 packets may arrive with lower-layer padding that is not
included in the L3 length. For example, a short IPv4 packet may have
up to 6 bytes of padding following the IP payload when received on an
Ethernet device with a minimum packet length of 64 bytes.
Higher-layer processing functions in netfilter (e.g. nf_ip_checksum(),
and help() in nf_conntrack_ftp) assume skb->len reflects the length of
the L3 header and payload, rather than referring back to
ip_hdr->tot_len or ipv6_hdr->payload_len, and get confused by
lower-layer padding.
In the normal IPv4 receive path, ip_rcv() trims the packet to
ip_hdr->tot_len before invoking netfilter hooks. In the IPv6 receive
path, ip6_rcv() does the same using ipv6_hdr->payload_len. Similarly
in the br_netfilter receive path, br_validate_ipv4() and
br_validate_ipv6() trim the packet to the L3 length before invoking
netfilter hooks.
Currently in the OVS conntrack receive path, ovs_ct_execute() pulls
the skb to the L3 header but does not trim it to the L3 length before
calling nf_conntrack_in(NF_INET_PRE_ROUTING). When
nf_conntrack_proto_tcp encounters a packet with lower-layer padding,
nf_ip_checksum() fails causing a "nf_ct_tcp: bad TCP checksum" log
message. While extra zero bytes don't affect the checksum, the length
in the IP pseudoheader does. That length is based on skb->len, and
without trimming, it doesn't match the length the sender used when
computing the checksum.
In ovs_ct_execute(), trim the skb to the L3 length before higher-layer
processing.
Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/openvswitch/conntrack.c | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 62f36cc938ca..c5904f629091 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c | |||
@@ -1098,6 +1098,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, | |||
1098 | return 0; | 1098 | return 0; |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | /* Trim the skb to the length specified by the IP/IPv6 header, | ||
1102 | * removing any trailing lower-layer padding. This prepares the skb | ||
1103 | * for higher-layer processing that assumes skb->len excludes padding | ||
1104 | * (such as nf_ip_checksum). The caller needs to pull the skb to the | ||
1105 | * network header, and ensure ip_hdr/ipv6_hdr points to valid data. | ||
1106 | */ | ||
1107 | static int ovs_skb_network_trim(struct sk_buff *skb) | ||
1108 | { | ||
1109 | unsigned int len; | ||
1110 | int err; | ||
1111 | |||
1112 | switch (skb->protocol) { | ||
1113 | case htons(ETH_P_IP): | ||
1114 | len = ntohs(ip_hdr(skb)->tot_len); | ||
1115 | break; | ||
1116 | case htons(ETH_P_IPV6): | ||
1117 | len = sizeof(struct ipv6hdr) | ||
1118 | + ntohs(ipv6_hdr(skb)->payload_len); | ||
1119 | break; | ||
1120 | default: | ||
1121 | len = skb->len; | ||
1122 | } | ||
1123 | |||
1124 | err = pskb_trim_rcsum(skb, len); | ||
1125 | if (err) | ||
1126 | kfree_skb(skb); | ||
1127 | |||
1128 | return err; | ||
1129 | } | ||
1130 | |||
1101 | /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero | 1131 | /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero |
1102 | * value if 'skb' is freed. | 1132 | * value if 'skb' is freed. |
1103 | */ | 1133 | */ |
@@ -1112,6 +1142,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, | |||
1112 | nh_ofs = skb_network_offset(skb); | 1142 | nh_ofs = skb_network_offset(skb); |
1113 | skb_pull_rcsum(skb, nh_ofs); | 1143 | skb_pull_rcsum(skb, nh_ofs); |
1114 | 1144 | ||
1145 | err = ovs_skb_network_trim(skb); | ||
1146 | if (err) | ||
1147 | return err; | ||
1148 | |||
1115 | if (key->ip.frag != OVS_FRAG_TYPE_NONE) { | 1149 | if (key->ip.frag != OVS_FRAG_TYPE_NONE) { |
1116 | err = handle_fragments(net, key, info->zone.id, skb); | 1150 | err = handle_fragments(net, key, info->zone.id, skb); |
1117 | if (err) | 1151 | if (err) |