diff options
author | David S. Miller <davem@davemloft.net> | 2014-05-07 15:49:16 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-05-07 15:49:16 -0400 |
commit | d32aebfd64c8c8649b39cd6789b141525cc9d7c0 (patch) | |
tree | 965fe4107b8a28f594832657610a74152e3f27a1 | |
parent | 418a31561d594a2b636c1e2fa94ecd9e1245abb1 (diff) | |
parent | c1e756bfcbcac838a86a23f3e4501b556a961e3c (diff) |
Merge branch 'gso_forward'
Florian Westphal says:
====================
net: ip: push gso skb forwarding handling down the stack
Turns out doing the segmentation in forwarding was not a bright idea,
there are corner-cases where this has unintended side-effects.
This patch pushes the segmentation downwards.
After this, netif_skb_dev_features() function can be removed
again, it was only added to fetch the features of the output device,
we can just use skb->dev after the pushdown.
Tested with following setup:
host -> kvm_router -> kvm_host
mtu 1500 mtu1280
- 'host' has route to kvm_host with locked mtu of 1500
- gso/gro enabled on all interfaces
Did tests with all of following combinations:
- netfilter conntrack off and on on kvm_router
- virtio-net and e1000 driver on kvm_router
- tcp and udp bulk xmit from host to kvm_host
for tcp, I added TCPMSS mangling on kvm_host to make it lie about tcp mss.
Also added a dummy '-t mangle -A POSTROUTING -p udp -f'
rule to make sure no udp fragments are seen in the 'conntrack on'
and 'virtio-net' case.
Also checked (with ping -M do -s 1400)' that it still sends the wanted
icmp error message when size exceeds 1280.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 7 | ||||
-rw-r--r-- | net/core/dev.c | 22 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 50 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 51 |
4 files changed, 59 insertions, 71 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ed3a3aa6604..20e99efb1ca6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev); | |||
3180 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, | 3180 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, |
3181 | struct net_device *dev); | 3181 | struct net_device *dev); |
3182 | 3182 | ||
3183 | netdev_features_t netif_skb_dev_features(struct sk_buff *skb, | 3183 | netdev_features_t netif_skb_features(struct sk_buff *skb); |
3184 | const struct net_device *dev); | ||
3185 | static inline netdev_features_t netif_skb_features(struct sk_buff *skb) | ||
3186 | { | ||
3187 | return netif_skb_dev_features(skb, skb->dev); | ||
3188 | } | ||
3189 | 3184 | ||
3190 | static inline bool net_gso_ok(netdev_features_t features, int gso_type) | 3185 | static inline bool net_gso_ok(netdev_features_t features, int gso_type) |
3191 | { | 3186 | { |
diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a98..c619b8641337 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); | |||
2418 | * 2. No high memory really exists on this machine. | 2418 | * 2. No high memory really exists on this machine. |
2419 | */ | 2419 | */ |
2420 | 2420 | ||
2421 | static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) | 2421 | static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
2422 | { | 2422 | { |
2423 | #ifdef CONFIG_HIGHMEM | 2423 | #ifdef CONFIG_HIGHMEM |
2424 | int i; | 2424 | int i; |
@@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) | |||
2493 | } | 2493 | } |
2494 | 2494 | ||
2495 | static netdev_features_t harmonize_features(struct sk_buff *skb, | 2495 | static netdev_features_t harmonize_features(struct sk_buff *skb, |
2496 | const struct net_device *dev, | 2496 | netdev_features_t features) |
2497 | netdev_features_t features) | ||
2498 | { | 2497 | { |
2499 | int tmp; | 2498 | int tmp; |
2500 | 2499 | ||
2501 | if (skb->ip_summed != CHECKSUM_NONE && | 2500 | if (skb->ip_summed != CHECKSUM_NONE && |
2502 | !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { | 2501 | !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { |
2503 | features &= ~NETIF_F_ALL_CSUM; | 2502 | features &= ~NETIF_F_ALL_CSUM; |
2504 | } else if (illegal_highdma(dev, skb)) { | 2503 | } else if (illegal_highdma(skb->dev, skb)) { |
2505 | features &= ~NETIF_F_SG; | 2504 | features &= ~NETIF_F_SG; |
2506 | } | 2505 | } |
2507 | 2506 | ||
2508 | return features; | 2507 | return features; |
2509 | } | 2508 | } |
2510 | 2509 | ||
2511 | netdev_features_t netif_skb_dev_features(struct sk_buff *skb, | 2510 | netdev_features_t netif_skb_features(struct sk_buff *skb) |
2512 | const struct net_device *dev) | ||
2513 | { | 2511 | { |
2514 | __be16 protocol = skb->protocol; | 2512 | __be16 protocol = skb->protocol; |
2515 | netdev_features_t features = dev->features; | 2513 | netdev_features_t features = skb->dev->features; |
2516 | 2514 | ||
2517 | if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) | 2515 | if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) |
2518 | features &= ~NETIF_F_GSO_MASK; | 2516 | features &= ~NETIF_F_GSO_MASK; |
2519 | 2517 | ||
2520 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { | 2518 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { |
2521 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 2519 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
2522 | protocol = veh->h_vlan_encapsulated_proto; | 2520 | protocol = veh->h_vlan_encapsulated_proto; |
2523 | } else if (!vlan_tx_tag_present(skb)) { | 2521 | } else if (!vlan_tx_tag_present(skb)) { |
2524 | return harmonize_features(skb, dev, features); | 2522 | return harmonize_features(skb, features); |
2525 | } | 2523 | } |
2526 | 2524 | ||
2527 | features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | | 2525 | features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | |
2528 | NETIF_F_HW_VLAN_STAG_TX); | 2526 | NETIF_F_HW_VLAN_STAG_TX); |
2529 | 2527 | ||
2530 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) | 2528 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) |
@@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, | |||
2532 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | | 2530 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | |
2533 | NETIF_F_HW_VLAN_STAG_TX; | 2531 | NETIF_F_HW_VLAN_STAG_TX; |
2534 | 2532 | ||
2535 | return harmonize_features(skb, dev, features); | 2533 | return harmonize_features(skb, features); |
2536 | } | 2534 | } |
2537 | EXPORT_SYMBOL(netif_skb_dev_features); | 2535 | EXPORT_SYMBOL(netif_skb_features); |
2538 | 2536 | ||
2539 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 2537 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
2540 | struct netdev_queue *txq) | 2538 | struct netdev_queue *txq) |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index c29ae8371e44..6f111e48e11c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | |||
56 | return true; | 56 | return true; |
57 | } | 57 | } |
58 | 58 | ||
59 | static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) | ||
60 | { | ||
61 | unsigned int mtu; | ||
62 | |||
63 | if (skb->local_df || !skb_is_gso(skb)) | ||
64 | return false; | ||
65 | |||
66 | mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); | ||
67 | |||
68 | /* if seglen > mtu, do software segmentation for IP fragmentation on | ||
69 | * output. DF bit cannot be set since ip_forward would have sent | ||
70 | * icmp error. | ||
71 | */ | ||
72 | return skb_gso_network_seglen(skb) > mtu; | ||
73 | } | ||
74 | |||
75 | /* called if GSO skb needs to be fragmented on forward */ | ||
76 | static int ip_forward_finish_gso(struct sk_buff *skb) | ||
77 | { | ||
78 | struct dst_entry *dst = skb_dst(skb); | ||
79 | netdev_features_t features; | ||
80 | struct sk_buff *segs; | ||
81 | int ret = 0; | ||
82 | |||
83 | features = netif_skb_dev_features(skb, dst->dev); | ||
84 | segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); | ||
85 | if (IS_ERR(segs)) { | ||
86 | kfree_skb(skb); | ||
87 | return -ENOMEM; | ||
88 | } | ||
89 | |||
90 | consume_skb(skb); | ||
91 | |||
92 | do { | ||
93 | struct sk_buff *nskb = segs->next; | ||
94 | int err; | ||
95 | |||
96 | segs->next = NULL; | ||
97 | err = dst_output(segs); | ||
98 | |||
99 | if (err && ret == 0) | ||
100 | ret = err; | ||
101 | segs = nskb; | ||
102 | } while (segs); | ||
103 | |||
104 | return ret; | ||
105 | } | ||
106 | 59 | ||
107 | static int ip_forward_finish(struct sk_buff *skb) | 60 | static int ip_forward_finish(struct sk_buff *skb) |
108 | { | 61 | { |
@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb) | |||
114 | if (unlikely(opt->optlen)) | 67 | if (unlikely(opt->optlen)) |
115 | ip_forward_options(skb); | 68 | ip_forward_options(skb); |
116 | 69 | ||
117 | if (ip_gso_exceeds_dst_mtu(skb)) | ||
118 | return ip_forward_finish_gso(skb); | ||
119 | |||
120 | return dst_output(skb); | 70 | return dst_output(skb); |
121 | } | 71 | } |
122 | 72 | ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1cbeba5edff9..a52f50187b54 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
211 | return -EINVAL; | 211 | return -EINVAL; |
212 | } | 212 | } |
213 | 213 | ||
214 | static int ip_finish_output_gso(struct sk_buff *skb) | ||
215 | { | ||
216 | netdev_features_t features; | ||
217 | struct sk_buff *segs; | ||
218 | int ret = 0; | ||
219 | |||
220 | /* common case: locally created skb or seglen is <= mtu */ | ||
221 | if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || | ||
222 | skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) | ||
223 | return ip_finish_output2(skb); | ||
224 | |||
225 | /* Slowpath - GSO segment length is exceeding the dst MTU. | ||
226 | * | ||
227 | * This can happen in two cases: | ||
228 | * 1) TCP GRO packet, DF bit not set | ||
229 | * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly | ||
230 | * from host network stack. | ||
231 | */ | ||
232 | features = netif_skb_features(skb); | ||
233 | segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); | ||
234 | if (IS_ERR(segs)) { | ||
235 | kfree_skb(skb); | ||
236 | return -ENOMEM; | ||
237 | } | ||
238 | |||
239 | consume_skb(skb); | ||
240 | |||
241 | do { | ||
242 | struct sk_buff *nskb = segs->next; | ||
243 | int err; | ||
244 | |||
245 | segs->next = NULL; | ||
246 | err = ip_fragment(segs, ip_finish_output2); | ||
247 | |||
248 | if (err && ret == 0) | ||
249 | ret = err; | ||
250 | segs = nskb; | ||
251 | } while (segs); | ||
252 | |||
253 | return ret; | ||
254 | } | ||
255 | |||
214 | static int ip_finish_output(struct sk_buff *skb) | 256 | static int ip_finish_output(struct sk_buff *skb) |
215 | { | 257 | { |
216 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) | 258 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) |
@@ -220,10 +262,13 @@ static int ip_finish_output(struct sk_buff *skb) | |||
220 | return dst_output(skb); | 262 | return dst_output(skb); |
221 | } | 263 | } |
222 | #endif | 264 | #endif |
223 | if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) | 265 | if (skb_is_gso(skb)) |
266 | return ip_finish_output_gso(skb); | ||
267 | |||
268 | if (skb->len > ip_skb_dst_mtu(skb)) | ||
224 | return ip_fragment(skb, ip_finish_output2); | 269 | return ip_fragment(skb, ip_finish_output2); |
225 | else | 270 | |
226 | return ip_finish_output2(skb); | 271 | return ip_finish_output2(skb); |
227 | } | 272 | } |
228 | 273 | ||
229 | int ip_mc_output(struct sock *sk, struct sk_buff *skb) | 274 | int ip_mc_output(struct sock *sk, struct sk_buff *skb) |