aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-06-15 17:54:11 -0400
committerDavid S. Miller <davem@davemloft.net>2012-06-15 17:54:11 -0400
commit81aded24675ebda5de8a68843250ad15584ac38a (patch)
tree84f7bd5cf86cf010394de92efd5e4c5b636b3d20 /net
parent36393395536064e483b73d173f6afc103eadfbc4 (diff)
ipv6: Handle PMTU in ICMP error handlers.
One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts to handle the error pass the 32-bit info cookie in network byte order whereas ipv4 passes it around in host byte order. Like the ipv4 side, we have two helper functions. One for when we have a socket context and one for when we do not. ip6ip6 tunnels are not handled here, because they handle PMTU events by essentially relaying another ICMP packet-too-big message back to the original sender. This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all kinds of situations that simply cannot happen when we do the PMTU update directly using a fully resolved route. In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very likely be removed or changed into a BUG_ON() check. We should never have a prefixed ipv6 route when we get there. Another piece of strange history here is that TCP and DCCP, unlike in ipv4, never invoke the update_pmtu() method from their ICMP error handlers. This is incredibly astonishing since this is the context where we have the most accurate context in which to make a PMTU update, namely we have a fully connected socket and associated cached socket route. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/raw.c5
-rw-r--r--net/ipv6/route.c143
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c3
9 files changed, 50 insertions, 118 deletions
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index fa9512d86f3b..9991be083ad0 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -165,6 +165,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
165 } else 165 } else
166 dst_hold(dst); 166 dst_hold(dst);
167 167
168 dst->ops->update_pmtu(dst, ntohl(info));
169
168 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 170 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
169 dccp_sync_mss(sk, dst_mtu(dst)); 171 dccp_sync_mss(sk, dst_mtu(dst));
170 } /* else let the usual retransmit timer handle it */ 172 } /* else let the usual retransmit timer handle it */
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f1a4a2c28ed3..49d4d26bda88 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -35,6 +35,7 @@
35#include <linux/pfkeyv2.h> 35#include <linux/pfkeyv2.h>
36#include <linux/string.h> 36#include <linux/string.h>
37#include <linux/scatterlist.h> 37#include <linux/scatterlist.h>
38#include <net/ip6_route.h>
38#include <net/icmp.h> 39#include <net/icmp.h>
39#include <net/ipv6.h> 40#include <net/ipv6.h>
40#include <net/protocol.h> 41#include <net/protocol.h>
@@ -621,7 +622,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
621 622
622 NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", 623 NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
623 ntohl(ah->spi), &iph->daddr); 624 ntohl(ah->spi), &iph->daddr);
624 625 ip6_update_pmtu(skb, net, info, 0, 0);
625 xfrm_state_put(x); 626 xfrm_state_put(x);
626} 627}
627 628
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index db1521fcda5b..89a615ba84f8 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -39,6 +39,7 @@
39#include <linux/random.h> 39#include <linux/random.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/spinlock.h> 41#include <linux/spinlock.h>
42#include <net/ip6_route.h>
42#include <net/icmp.h> 43#include <net/icmp.h>
43#include <net/ipv6.h> 44#include <net/ipv6.h>
44#include <net/protocol.h> 45#include <net/protocol.h>
@@ -442,6 +443,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
442 return; 443 return;
443 pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", 444 pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n",
444 ntohl(esph->spi), &iph->daddr); 445 ntohl(esph->spi), &iph->daddr);
446 ip6_update_pmtu(skb, net, info, 0, 0);
445 xfrm_state_put(x); 447 xfrm_state_put(x);
446} 448}
447 449
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ed89bba745a1..5247d5c211f9 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -649,7 +649,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
649 struct net_device *dev = skb->dev; 649 struct net_device *dev = skb->dev;
650 struct inet6_dev *idev = __in6_dev_get(dev); 650 struct inet6_dev *idev = __in6_dev_get(dev);
651 const struct in6_addr *saddr, *daddr; 651 const struct in6_addr *saddr, *daddr;
652 const struct ipv6hdr *orig_hdr;
653 struct icmp6hdr *hdr; 652 struct icmp6hdr *hdr;
654 u8 type; 653 u8 type;
655 654
@@ -661,7 +660,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
661 XFRM_STATE_ICMP)) 660 XFRM_STATE_ICMP))
662 goto drop_no_count; 661 goto drop_no_count;
663 662
664 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) 663 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
665 goto drop_no_count; 664 goto drop_no_count;
666 665
667 nh = skb_network_offset(skb); 666 nh = skb_network_offset(skb);
@@ -722,9 +721,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
722 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 721 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
723 goto discard_it; 722 goto discard_it;
724 hdr = icmp6_hdr(skb); 723 hdr = icmp6_hdr(skb);
725 orig_hdr = (struct ipv6hdr *) (hdr + 1);
726 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
727 ntohl(hdr->icmp6_mtu));
728 724
729 /* 725 /*
730 * Drop through to notify 726 * Drop through to notify
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5cb75bfe45b1..92832385a8ef 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -46,6 +46,7 @@
46#include <linux/list.h> 46#include <linux/list.h>
47#include <linux/vmalloc.h> 47#include <linux/vmalloc.h>
48#include <linux/rtnetlink.h> 48#include <linux/rtnetlink.h>
49#include <net/ip6_route.h>
49#include <net/icmp.h> 50#include <net/icmp.h>
50#include <net/ipv6.h> 51#include <net/ipv6.h>
51#include <net/protocol.h> 52#include <net/protocol.h>
@@ -74,6 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
74 75
75 pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", 76 pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n",
76 spi, &iph->daddr); 77 spi, &iph->daddr);
78 ip6_update_pmtu(skb, net, info, 0, 0);
77 xfrm_state_put(x); 79 xfrm_state_put(x);
78} 80}
79 81
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 93d69836fded..43b0042f15f4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -328,9 +328,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
328 return; 328 return;
329 329
330 harderr = icmpv6_err_convert(type, code, &err); 330 harderr = icmpv6_err_convert(type, code, &err);
331 if (type == ICMPV6_PKT_TOOBIG) 331 if (type == ICMPV6_PKT_TOOBIG) {
332 ip6_sk_update_pmtu(skb, sk, info);
332 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); 333 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
333 334 }
334 if (np->recverr) { 335 if (np->recverr) {
335 u8 *payload = skb->data; 336 u8 *payload = skb->data;
336 if (!inet->hdrincl) 337 if (!inet->hdrincl)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 58a3ec23da2f..0d41f68daff2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1049{ 1049{
1050 struct rt6_info *rt6 = (struct rt6_info*)dst; 1050 struct rt6_info *rt6 = (struct rt6_info*)dst;
1051 1051
1052 dst_confirm(dst);
1052 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1053 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1054 struct net *net = dev_net(dst->dev);
1055
1053 rt6->rt6i_flags |= RTF_MODIFIED; 1056 rt6->rt6i_flags |= RTF_MODIFIED;
1054 if (mtu < IPV6_MIN_MTU) { 1057 if (mtu < IPV6_MIN_MTU) {
1055 u32 features = dst_metric(dst, RTAX_FEATURES); 1058 u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1058 dst_metric_set(dst, RTAX_FEATURES, features); 1061 dst_metric_set(dst, RTAX_FEATURES, features);
1059 } 1062 }
1060 dst_metric_set(dst, RTAX_MTU, mtu); 1063 dst_metric_set(dst, RTAX_MTU, mtu);
1064 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1061 } 1065 }
1062} 1066}
1063 1067
1068void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1069 int oif, __be32 mark)
1070{
1071 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1072 struct dst_entry *dst;
1073 struct flowi6 fl6;
1074
1075 memset(&fl6, 0, sizeof(fl6));
1076 fl6.flowi6_oif = oif;
1077 fl6.flowi6_mark = mark;
1078 fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
1079 fl6.daddr = iph->daddr;
1080 fl6.saddr = iph->saddr;
1081 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1082
1083 dst = ip6_route_output(net, NULL, &fl6);
1084 if (!dst->error)
1085 ip6_rt_update_pmtu(dst, ntohl(mtu));
1086 dst_release(dst);
1087}
1088EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1089
1090void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1091{
1092 ip6_update_pmtu(skb, sock_net(sk), mtu,
1093 sk->sk_bound_dev_if, sk->sk_mark);
1094}
1095EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1096
1064static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1097static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1065{ 1098{
1066 struct net_device *dev = dst->dev; 1099 struct net_device *dev = dst->dev;
@@ -1704,116 +1737,6 @@ out:
1704} 1737}
1705 1738
1706/* 1739/*
1707 * Handle ICMP "packet too big" messages
1708 * i.e. Path MTU discovery
1709 */
1710
1711static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1712 struct net *net, u32 pmtu, int ifindex)
1713{
1714 struct rt6_info *rt, *nrt;
1715 int allfrag = 0;
1716again:
1717 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1718 if (!rt)
1719 return;
1720
1721 if (rt6_check_expired(rt)) {
1722 ip6_del_rt(rt);
1723 goto again;
1724 }
1725
1726 if (pmtu >= dst_mtu(&rt->dst))
1727 goto out;
1728
1729 if (pmtu < IPV6_MIN_MTU) {
1730 /*
1731 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1732 * MTU (1280) and a fragment header should always be included
1733 * after a node receiving Too Big message reporting PMTU is
1734 * less than the IPv6 Minimum Link MTU.
1735 */
1736 pmtu = IPV6_MIN_MTU;
1737 allfrag = 1;
1738 }
1739
1740 /* New mtu received -> path was valid.
1741 They are sent only in response to data packets,
1742 so that this nexthop apparently is reachable. --ANK
1743 */
1744 dst_confirm(&rt->dst);
1745
1746 /* Host route. If it is static, it would be better
1747 not to override it, but add new one, so that
1748 when cache entry will expire old pmtu
1749 would return automatically.
1750 */
1751 if (rt->rt6i_flags & RTF_CACHE) {
1752 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1753 if (allfrag) {
1754 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1755 features |= RTAX_FEATURE_ALLFRAG;
1756 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1757 }
1758 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1759 rt->rt6i_flags |= RTF_MODIFIED;
1760 goto out;
1761 }
1762
1763 /* Network route.
1764 Two cases are possible:
1765 1. It is connected route. Action: COW
1766 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1767 */
1768 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1769 nrt = rt6_alloc_cow(rt, daddr, saddr);
1770 else
1771 nrt = rt6_alloc_clone(rt, daddr);
1772
1773 if (nrt) {
1774 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1775 if (allfrag) {
1776 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1777 features |= RTAX_FEATURE_ALLFRAG;
1778 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1779 }
1780
1781 /* According to RFC 1981, detecting PMTU increase shouldn't be
1782 * happened within 5 mins, the recommended timer is 10 mins.
1783 * Here this route expiration time is set to ip6_rt_mtu_expires
1784 * which is 10 mins. After 10 mins the decreased pmtu is expired
1785 * and detecting PMTU increase will be automatically happened.
1786 */
1787 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1788 nrt->rt6i_flags |= RTF_DYNAMIC;
1789 ip6_ins_rt(nrt);
1790 }
1791out:
1792 dst_release(&rt->dst);
1793}
1794
1795void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1796 struct net_device *dev, u32 pmtu)
1797{
1798 struct net *net = dev_net(dev);
1799
1800 /*
1801 * RFC 1981 states that a node "MUST reduce the size of the packets it
1802 * is sending along the path" that caused the Packet Too Big message.
1803 * Since it's not possible in the general case to determine which
1804 * interface was used to send the original packet, we update the MTU
1805 * on the interface that will be used to send future packets. We also
1806 * update the MTU on the interface that received the Packet Too Big in
1807 * case the original packet was forced out that interface with
1808 * SO_BINDTODEVICE or similar. This is the next best thing to the
1809 * correct behaviour, which would be to update the MTU on all
1810 * interfaces.
1811 */
1812 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1813 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1814}
1815
1816/*
1817 * Misc support functions 1740 * Misc support functions
1818 */ 1741 */
1819 1742
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f91b0bfd12d5..26a88623940b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -415,6 +415,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
415 } else 415 } else
416 dst_hold(dst); 416 dst_hold(dst);
417 417
418 dst->ops->update_pmtu(dst, ntohl(info));
419
418 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 420 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
419 tcp_sync_mss(sk, dst_mtu(dst)); 421 tcp_sync_mss(sk, dst_mtu(dst));
420 tcp_simple_retransmit(sk); 422 tcp_simple_retransmit(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f05099fc5901..051ad481973f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
479 if (sk == NULL) 479 if (sk == NULL)
480 return; 480 return;
481 481
482 if (type == ICMPV6_PKT_TOOBIG)
483 ip6_sk_update_pmtu(skb, sk, info);
484
482 np = inet6_sk(sk); 485 np = inet6_sk(sk);
483 486
484 if (!icmpv6_err_convert(type, code, &err) && !np->recverr) 487 if (!icmpv6_err_convert(type, code, &err) && !np->recverr)