diff options
author | Patrick McHardy <kaber@trash.net> | 2012-08-26 13:13:55 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2012-08-26 13:13:55 -0400 |
commit | 5f2d04f1f9b52604fca6ee08a77972c0df67e082 (patch) | |
tree | f4caa0aa602f6044f1fc20c6dfa89527183803d1 | |
parent | 0fa7fa98dbcc2789409ed24e885485e645803d7f (diff) |
ipv4: fix path MTU discovery with connection tracking
IPv4 conntrack defragments incoming packet at the PRE_ROUTING hook and
(in case of forwarded packets) refragments them at POST_ROUTING
independent of the IP_DF flag. Refragmentation uses the dst_mtu() of
the local route without caring about the original fragment sizes,
thereby breaking PMTUD.
This patch fixes this by keeping track of the largest received fragment
with IP_DF set and generates an ICMP fragmentation required error during
refragmentation if that size exceeds the MTU.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet_frag.h | 2 | ||||
-rw-r--r-- | include/net/ip.h | 2 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 8 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 4 |
4 files changed, 14 insertions, 2 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 2431cf83aeca..5098ee7b7e0e 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h | |||
@@ -29,6 +29,8 @@ struct inet_frag_queue { | |||
29 | #define INET_FRAG_COMPLETE 4 | 29 | #define INET_FRAG_COMPLETE 4 |
30 | #define INET_FRAG_FIRST_IN 2 | 30 | #define INET_FRAG_FIRST_IN 2 |
31 | #define INET_FRAG_LAST_IN 1 | 31 | #define INET_FRAG_LAST_IN 1 |
32 | |||
33 | u16 max_size; | ||
32 | }; | 34 | }; |
33 | 35 | ||
34 | #define INETFRAGS_HASHSZ 64 | 36 | #define INETFRAGS_HASHSZ 64 |
diff --git a/include/net/ip.h b/include/net/ip.h index 5a5d84d3d2c6..0707fb9551aa 100644 --- a/include/net/ip.h +++ b/include/net/ip.h | |||
@@ -42,6 +42,8 @@ struct inet_skb_parm { | |||
42 | #define IPSKB_XFRM_TRANSFORMED 4 | 42 | #define IPSKB_XFRM_TRANSFORMED 4 |
43 | #define IPSKB_FRAG_COMPLETE 8 | 43 | #define IPSKB_FRAG_COMPLETE 8 |
44 | #define IPSKB_REROUTED 16 | 44 | #define IPSKB_REROUTED 16 |
45 | |||
46 | u16 frag_max_size; | ||
45 | }; | 47 | }; |
46 | 48 | ||
47 | static inline unsigned int ip_hdrlen(const struct sk_buff *skb) | 49 | static inline unsigned int ip_hdrlen(const struct sk_buff *skb) |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409c..fa6a12c51066 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -523,6 +523,10 @@ found: | |||
523 | if (offset == 0) | 523 | if (offset == 0) |
524 | qp->q.last_in |= INET_FRAG_FIRST_IN; | 524 | qp->q.last_in |= INET_FRAG_FIRST_IN; |
525 | 525 | ||
526 | if (ip_hdr(skb)->frag_off & htons(IP_DF) && | ||
527 | skb->len + ihl > qp->q.max_size) | ||
528 | qp->q.max_size = skb->len + ihl; | ||
529 | |||
526 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | 530 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && |
527 | qp->q.meat == qp->q.len) | 531 | qp->q.meat == qp->q.len) |
528 | return ip_frag_reasm(qp, prev, dev); | 532 | return ip_frag_reasm(qp, prev, dev); |
@@ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
646 | head->next = NULL; | 650 | head->next = NULL; |
647 | head->dev = dev; | 651 | head->dev = dev; |
648 | head->tstamp = qp->q.stamp; | 652 | head->tstamp = qp->q.stamp; |
653 | IPCB(head)->frag_max_size = qp->q.max_size; | ||
649 | 654 | ||
650 | iph = ip_hdr(head); | 655 | iph = ip_hdr(head); |
651 | iph->frag_off = 0; | 656 | /* max_size != 0 implies at least one fragment had IP_DF set */ |
657 | iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; | ||
652 | iph->tot_len = htons(len); | 658 | iph->tot_len = htons(len); |
653 | iph->tos |= ecn; | 659 | iph->tos |= ecn; |
654 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 660 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c196d749daf2..a5beab1dc958 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
467 | 467 | ||
468 | iph = ip_hdr(skb); | 468 | iph = ip_hdr(skb); |
469 | 469 | ||
470 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 470 | if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || |
471 | (IPCB(skb)->frag_max_size && | ||
472 | IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { | ||
471 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 473 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
472 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 474 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
473 | htonl(ip_skb_dst_mtu(skb))); | 475 | htonl(ip_skb_dst_mtu(skb))); |