aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2012-08-26 13:13:55 -0400
committerPatrick McHardy <kaber@trash.net>2012-08-26 13:13:55 -0400
commit5f2d04f1f9b52604fca6ee08a77972c0df67e082 (patch)
treef4caa0aa602f6044f1fc20c6dfa89527183803d1
parent0fa7fa98dbcc2789409ed24e885485e645803d7f (diff)
ipv4: fix path MTU discovery with connection tracking
IPv4 conntrack defragments incoming packet at the PRE_ROUTING hook and (in case of forwarded packets) refragments them at POST_ROUTING independent of the IP_DF flag. Refragmentation uses the dst_mtu() of the local route without caring about the original fragment sizes, thereby breaking PMTUD. This patch fixes this by keeping track of the largest received fragment with IP_DF set and generates an ICMP fragmentation required error during refragmentation if that size exceeds the MTU. Signed-off-by: Patrick McHardy <kaber@trash.net> Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_frag.h2
-rw-r--r--include/net/ip.h2
-rw-r--r--net/ipv4/ip_fragment.c8
-rw-r--r--net/ipv4/ip_output.c4
4 files changed, 14 insertions, 2 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 2431cf83aeca..5098ee7b7e0e 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -29,6 +29,8 @@ struct inet_frag_queue {
29#define INET_FRAG_COMPLETE 4 29#define INET_FRAG_COMPLETE 4
30#define INET_FRAG_FIRST_IN 2 30#define INET_FRAG_FIRST_IN 2
31#define INET_FRAG_LAST_IN 1 31#define INET_FRAG_LAST_IN 1
32
33 u16 max_size;
32}; 34};
33 35
34#define INETFRAGS_HASHSZ 64 36#define INETFRAGS_HASHSZ 64
diff --git a/include/net/ip.h b/include/net/ip.h
index 5a5d84d3d2c6..0707fb9551aa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -42,6 +42,8 @@ struct inet_skb_parm {
42#define IPSKB_XFRM_TRANSFORMED 4 42#define IPSKB_XFRM_TRANSFORMED 4
43#define IPSKB_FRAG_COMPLETE 8 43#define IPSKB_FRAG_COMPLETE 8
44#define IPSKB_REROUTED 16 44#define IPSKB_REROUTED 16
45
46 u16 frag_max_size;
45}; 47};
46 48
47static inline unsigned int ip_hdrlen(const struct sk_buff *skb) 49static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8d07c973409c..fa6a12c51066 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -523,6 +523,10 @@ found:
523 if (offset == 0) 523 if (offset == 0)
524 qp->q.last_in |= INET_FRAG_FIRST_IN; 524 qp->q.last_in |= INET_FRAG_FIRST_IN;
525 525
526 if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
527 skb->len + ihl > qp->q.max_size)
528 qp->q.max_size = skb->len + ihl;
529
526 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 530 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
527 qp->q.meat == qp->q.len) 531 qp->q.meat == qp->q.len)
528 return ip_frag_reasm(qp, prev, dev); 532 return ip_frag_reasm(qp, prev, dev);
@@ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
646 head->next = NULL; 650 head->next = NULL;
647 head->dev = dev; 651 head->dev = dev;
648 head->tstamp = qp->q.stamp; 652 head->tstamp = qp->q.stamp;
653 IPCB(head)->frag_max_size = qp->q.max_size;
649 654
650 iph = ip_hdr(head); 655 iph = ip_hdr(head);
651 iph->frag_off = 0; 656 /* max_size != 0 implies at least one fragment had IP_DF set */
657 iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
652 iph->tot_len = htons(len); 658 iph->tot_len = htons(len);
653 iph->tos |= ecn; 659 iph->tos |= ecn;
654 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 660 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c196d749daf2..a5beab1dc958 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
467 467
468 iph = ip_hdr(skb); 468 iph = ip_hdr(skb);
469 469
470 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { 470 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
471 (IPCB(skb)->frag_max_size &&
472 IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) {
471 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 473 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
472 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 474 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
473 htonl(ip_skb_dst_mtu(skb))); 475 htonl(ip_skb_dst_mtu(skb)));