aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNikolay Aleksandrov <nikolay@redhat.com>2014-05-28 12:03:48 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-01 22:39:13 -0400
commit4b9b1cdf83c4facba89e0646aeac8ead679851b8 (patch)
treecf95c72edef6a1a58b78e28437e0b102170c8031 /net
parent6ce995c6f466e59a5f16e6eae8b265c1d13bb202 (diff)
net: fix wrong mac_len calculation for vlans
After 1e785f48d29a ("net: Start with correct mac_len in skb_network_protocol") skb->mac_len is used as a start of the calculation in skb_network_protocol() but that is not always correct. If skb->protocol == 8021Q/AD, usually the vlan header is already inserted in the skb (i.e. vlan reorder hdr == 0). Usually when the packet enters dev_hard_xmit it has mac_len == 0 so we take 2 bytes from the destination mac address (skb->data + VLAN_HLEN) as a type in skb_network_protocol() and return vlan_depth == 4. In the case where TSO is off, then the mac_len is set but it's == 18 (ETH_HLEN + VLAN_HLEN), so skb_network_protocol() returns a type from inside the packet and offset == 22. Also make vlan_depth unsigned as suggested before. As suggested by Eric Dumazet, move the while() loop in the if() so we can avoid additional testing in fast path. Here are few netperf tests + debug printk's to illustrate: cat netperf.tso-on.reorder-on.bugged - Vlan -> device (reorder on, default, this case is okay) MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 7111.54 [ 81.605435] skb->len 65226 skb->gso_size 1448 skb->proto 0x800 skb->mac_len 0 vlan_depth 0 type 0x800 - Vlan -> device (reorder off, bad) cat netperf.tso-on.reorder-off.bugged MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 241.35 [ 204.578332] skb->len 1518 skb->gso_size 0 skb->proto 0x8100 skb->mac_len 0 vlan_depth 4 type 0x5301 0x5301 are the last two bytes of the destination mac. And if we stop TSO, we may get even the following: [ 83.343156] skb->len 2966 skb->gso_size 1448 skb->proto 0x8100 skb->mac_len 18 vlan_depth 22 type 0xb84 Because mac_len already accounts for VLAN_HLEN. After the fix: cat netperf.tso-on.reorder-off.fixed MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.01 5001.46 [ 81.888489] skb->len 65230 skb->gso_size 1448 skb->proto 0x8100 skb->mac_len 0 vlan_depth 18 type 0x800 CC: Vlad Yasevich <vyasevic@redhat.com> CC: Eric Dumazet <eric.dumazet@gmail.com> CC: Daniel Borkman <dborkman@redhat.com> CC: David S. Miller <davem@davemloft.net> Fixes:1e785f48d29a ("net: Start with correct mac_len in skb_network_protocol") Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c35
1 files changed, 25 insertions, 10 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 9abc503b19b7..fb8b0546485b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2283,8 +2283,8 @@ EXPORT_SYMBOL(skb_checksum_help);
2283 2283
2284__be16 skb_network_protocol(struct sk_buff *skb, int *depth) 2284__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2285{ 2285{
2286 unsigned int vlan_depth = skb->mac_len;
2286 __be16 type = skb->protocol; 2287 __be16 type = skb->protocol;
2287 int vlan_depth = skb->mac_len;
2288 2288
2289 /* Tunnel gso handlers can set protocol to ethernet. */ 2289 /* Tunnel gso handlers can set protocol to ethernet. */
2290 if (type == htons(ETH_P_TEB)) { 2290 if (type == htons(ETH_P_TEB)) {
@@ -2297,15 +2297,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2297 type = eth->h_proto; 2297 type = eth->h_proto;
2298 } 2298 }
2299 2299
2300 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { 2300 /* if skb->protocol is 802.1Q/AD then the header should already be
2301 struct vlan_hdr *vh; 2301 * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
2302 2302 * ETH_HLEN otherwise
2303 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) 2303 */
2304 return 0; 2304 if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2305 2305 if (vlan_depth) {
2306 vh = (struct vlan_hdr *)(skb->data + vlan_depth); 2306 if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN)))
2307 type = vh->h_vlan_encapsulated_proto; 2307 return 0;
2308 vlan_depth += VLAN_HLEN; 2308 vlan_depth -= VLAN_HLEN;
2309 } else {
2310 vlan_depth = ETH_HLEN;
2311 }
2312 do {
2313 struct vlan_hdr *vh;
2314
2315 if (unlikely(!pskb_may_pull(skb,
2316 vlan_depth + VLAN_HLEN)))
2317 return 0;
2318
2319 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2320 type = vh->h_vlan_encapsulated_proto;
2321 vlan_depth += VLAN_HLEN;
2322 } while (type == htons(ETH_P_8021Q) ||
2323 type == htons(ETH_P_8021AD));
2309 } 2324 }
2310 2325
2311 *depth = vlan_depth; 2326 *depth = vlan_depth;