ipv6: Handle PMTU in ICMP error handlers.

One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts to handle the error pass the 32-bit info cookie in network byte order whereas ipv4 passes it around in host byte order. Like the ipv4 side, we have two helper functions. One for when we have a socket context and one for when we do not. ip6ip6 tunnels are not handled here, because they handle PMTU events by essentially relaying another ICMP packet-too-big message back to the original sender. This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all kinds of situations that simply cannot happen when we do the PMTU update directly using a fully resolved route. In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very likely be removed or changed into a BUG_ON() check. We should never have a prefixed ipv6 route when we get there. Another piece of strange history here is that TCP and DCCP, unlike in ipv4, never invoke the update_pmtu() method from their ICMP error handlers. This is incredibly astonishing since this is the context where we have the most accurate context in which to make a PMTU update, namely we have a fully connected socket and associated cached socket route. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2012-06-15 17:54:11 -0400
committer: David S. Miller <davem@davemloft.net> 2012-06-15 17:54:11 -0400
commit: 81aded24675ebda5de8a68843250ad15584ac38a (patch)
tree: 84f7bd5cf86cf010394de92efd5e4c5b636b3d20 /net/ipv6/route.c
parent: 36393395536064e483b73d173f6afc103eadfbc4 (diff)
1 files changed, 33 insertions, 110 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 58a3ec23da2..0d41f68daff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
        struct rt6_info *rt6 = (struct rt6_info*)dst;
+        dst_confirm(dst);
        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+                struct net *net = dev_net(dst->dev);
                rt6->rt6i_flags |= RTF_MODIFIED;
                if (mtu < IPV6_MIN_MTU) {
                        u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
                        dst_metric_set(dst, RTAX_FEATURES, features);
                }
                dst_metric_set(dst, RTAX_MTU, mtu);
+                rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
        }
 }
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
+                     int oif, __be32 mark)
+{
+        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+        struct dst_entry *dst;
+        struct flowi6 fl6;
+        memset(&fl6, 0, sizeof(fl6));
+        fl6.flowi6_oif = oif;
+        fl6.flowi6_mark = mark;
+        fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
+        fl6.daddr = iph->daddr;
+        fl6.saddr = iph->saddr;
+        fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+        dst = ip6_route_output(net, NULL, &fl6);
+        if (!dst->error)
+                ip6_rt_update_pmtu(dst, ntohl(mtu));
+        dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+        ip6_update_pmtu(skb, sock_net(sk), mtu,
+                        sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
        struct net_device *dev = dst->dev;
@@ -1704,116 +1737,6 @@ out:
 }
 /*
- *      Handle ICMP "packet too big" messages
- *      i.e. Path MTU discovery
- */
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
-                             struct net *net, u32 pmtu, int ifindex)
-{
-        struct rt6_info *rt, *nrt;
-        int allfrag = 0;
-again:
-        rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
-        if (!rt)
-                return;
-        if (rt6_check_expired(rt)) {
-                ip6_del_rt(rt);
-                goto again;
-        }
-        if (pmtu >= dst_mtu(&rt->dst))
-                goto out;
-        if (pmtu < IPV6_MIN_MTU) {
-                /*
-                 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
-                 * MTU (1280) and a fragment header should always be included
-                 * after a node receiving Too Big message reporting PMTU is
-                 * less than the IPv6 Minimum Link MTU.
-                 */
-                pmtu = IPV6_MIN_MTU;
-                allfrag = 1;
-        }
-        /* New mtu received -> path was valid.
-           They are sent only in response to data packets,
-           so that this nexthop apparently is reachable. --ANK
-         */
-        dst_confirm(&rt->dst);
-        /* Host route. If it is static, it would be better
-           not to override it, but add new one, so that
-           when cache entry will expire old pmtu
-           would return automatically.
-         */
-        if (rt->rt6i_flags & RTF_CACHE) {
-                dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
-                if (allfrag) {
-                        u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
-                        features |= RTAX_FEATURE_ALLFRAG;
-                        dst_metric_set(&rt->dst, RTAX_FEATURES, features);
-                }
-                rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-                rt->rt6i_flags |= RTF_MODIFIED;
-                goto out;
-        }
-        /* Network route.
-           Two cases are possible:
-           1. It is connected route. Action: COW
-           2. It is gatewayed route or NONEXTHOP route. Action: clone it.
-         */
-        if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
-                nrt = rt6_alloc_cow(rt, daddr, saddr);
-        else
-                nrt = rt6_alloc_clone(rt, daddr);
-        if (nrt) {
-                dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
-                if (allfrag) {
-                        u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
-                        features |= RTAX_FEATURE_ALLFRAG;
-                        dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
-                }
-                /* According to RFC 1981, detecting PMTU increase shouldn't be
-                 * happened within 5 mins, the recommended timer is 10 mins.
-                 * Here this route expiration time is set to ip6_rt_mtu_expires
-                 * which is 10 mins. After 10 mins the decreased pmtu is expired
-                 * and detecting PMTU increase will be automatically happened.
-                 */
-                rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-                nrt->rt6i_flags |= RTF_DYNAMIC;
-                ip6_ins_rt(nrt);
-        }
-out:
-        dst_release(&rt->dst);
-}
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
-                        struct net_device *dev, u32 pmtu)
-{
-        struct net *net = dev_net(dev);
-        /*
-         * RFC 1981 states that a node "MUST reduce the size of the packets it
-         * is sending along the path" that caused the Packet Too Big message.
-         * Since it's not possible in the general case to determine which
-         * interface was used to send the original packet, we update the MTU
-         * on the interface that will be used to send future packets. We also
-         * update the MTU on the interface that received the Packet Too Big in
-         * case the original packet was forced out that interface with
-         * SO_BINDTODEVICE or similar. This is the next best thing to the
-         * correct behaviour, which would be to update the MTU on all
-         * interfaces.
-         */
-        rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
-        rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
-}
-/*
 *      Misc support functions
 */
author	David S. Miller <davem@davemloft.net>	2012-06-15 17:54:11 -0400
committer	David S. Miller <davem@davemloft.net>	2012-06-15 17:54:11 -0400
commit	81aded24675ebda5de8a68843250ad15584ac38a (patch)
tree	84f7bd5cf86cf010394de92efd5e4c5b636b3d20 /net/ipv6/route.c
parent	36393395536064e483b73d173f6afc103eadfbc4 (diff)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58a3ec23da2..0d41f68daff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c
@@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1049	{	1049	{
1050	struct rt6_info rt6 = (struct rt6_info)dst;	1050	struct rt6_info rt6 = (struct rt6_info)dst;
1051		1051
		1052	dst_confirm(dst);
1052	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {	1053	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
		1054	struct net *net = dev_net(dst->dev);
		1055
1053	rt6->rt6i_flags \|= RTF_MODIFIED;	1056	rt6->rt6i_flags \|= RTF_MODIFIED;
1054	if (mtu < IPV6_MIN_MTU) {	1057	if (mtu < IPV6_MIN_MTU) {
1055	u32 features = dst_metric(dst, RTAX_FEATURES);	1058	u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1058	dst_metric_set(dst, RTAX_FEATURES, features);	1061	dst_metric_set(dst, RTAX_FEATURES, features);
1059	}	1062	}
1060	dst_metric_set(dst, RTAX_MTU, mtu);	1063	dst_metric_set(dst, RTAX_MTU, mtu);
		1064	rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1061	}	1065	}
1062	}	1066	}
1063		1067
		1068	void ip6_update_pmtu(struct sk_buff skb, struct net net, u32 mtu,
		1069	int oif, __be32 mark)
		1070	{
		1071	const struct ipv6hdr iph = (struct ipv6hdr ) skb->data;
		1072	struct dst_entry *dst;
		1073	struct flowi6 fl6;
		1074
		1075	memset(&fl6, 0, sizeof(fl6));
		1076	fl6.flowi6_oif = oif;
		1077	fl6.flowi6_mark = mark;
		1078	fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
		1079	fl6.daddr = iph->daddr;
		1080	fl6.saddr = iph->saddr;
		1081	fl6.flowlabel = ((__be32 ) iph) & IPV6_FLOWINFO_MASK;
		1082
		1083	dst = ip6_route_output(net, NULL, &fl6);
		1084	if (!dst->error)
		1085	ip6_rt_update_pmtu(dst, ntohl(mtu));
		1086	dst_release(dst);
		1087	}
		1088	EXPORT_SYMBOL_GPL(ip6_update_pmtu);
		1089
		1090	void ip6_sk_update_pmtu(struct sk_buff skb, struct sock sk, __be32 mtu)
		1091	{
		1092	ip6_update_pmtu(skb, sock_net(sk), mtu,
		1093	sk->sk_bound_dev_if, sk->sk_mark);
		1094	}
		1095	EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
		1096
1064	static unsigned int ip6_default_advmss(const struct dst_entry *dst)	1097	static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1065	{	1098	{
1066	struct net_device *dev = dst->dev;	1099	struct net_device *dev = dst->dev;
@@ -1704,116 +1737,6 @@ out:
1704	}	1737	}
1705		1738
1706	/*	1739	/*
1707	* Handle ICMP "packet too big" messages
1708	* i.e. Path MTU discovery
1709	*/
1710
1711	static void rt6_do_pmtu_disc(const struct in6_addr daddr, const struct in6_addr saddr,
1712	struct net *net, u32 pmtu, int ifindex)
1713	{
1714	struct rt6_info rt, nrt;
1715	int allfrag = 0;
1716	again:
1717	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1718	if (!rt)
1719	return;
1720
1721	if (rt6_check_expired(rt)) {
1722	ip6_del_rt(rt);
1723	goto again;
1724	}
1725
1726	if (pmtu >= dst_mtu(&rt->dst))
1727	goto out;
1728
1729	if (pmtu < IPV6_MIN_MTU) {
1730	/*
1731	* According to RFC2460, PMTU is set to the IPv6 Minimum Link
1732	* MTU (1280) and a fragment header should always be included
1733	* after a node receiving Too Big message reporting PMTU is
1734	* less than the IPv6 Minimum Link MTU.
1735	*/
1736	pmtu = IPV6_MIN_MTU;
1737	allfrag = 1;
1738	}
1739
1740	/* New mtu received -> path was valid.
1741	They are sent only in response to data packets,
1742	so that this nexthop apparently is reachable. --ANK
1743	*/
1744	dst_confirm(&rt->dst);
1745
1746	/* Host route. If it is static, it would be better
1747	not to override it, but add new one, so that
1748	when cache entry will expire old pmtu
1749	would return automatically.
1750	*/
1751	if (rt->rt6i_flags & RTF_CACHE) {
1752	dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1753	if (allfrag) {
1754	u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1755	features \|= RTAX_FEATURE_ALLFRAG;
1756	dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1757	}
1758	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1759	rt->rt6i_flags \|= RTF_MODIFIED;
1760	goto out;
1761	}
1762
1763	/* Network route.
1764	Two cases are possible:
1765	1. It is connected route. Action: COW
1766	2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1767	*/
1768	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1769	nrt = rt6_alloc_cow(rt, daddr, saddr);
1770	else
1771	nrt = rt6_alloc_clone(rt, daddr);
1772
1773	if (nrt) {
1774	dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1775	if (allfrag) {
1776	u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1777	features \|= RTAX_FEATURE_ALLFRAG;
1778	dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1779	}
1780
1781	/* According to RFC 1981, detecting PMTU increase shouldn't be
1782	* happened within 5 mins, the recommended timer is 10 mins.
1783	* Here this route expiration time is set to ip6_rt_mtu_expires
1784	* which is 10 mins. After 10 mins the decreased pmtu is expired
1785	* and detecting PMTU increase will be automatically happened.
1786	*/
1787	rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1788	nrt->rt6i_flags \|= RTF_DYNAMIC;
1789	ip6_ins_rt(nrt);
1790	}
1791	out:
1792	dst_release(&rt->dst);
1793	}
1794
1795	void rt6_pmtu_discovery(const struct in6_addr daddr, const struct in6_addr saddr,
1796	struct net_device *dev, u32 pmtu)
1797	{
1798	struct net *net = dev_net(dev);
1799
1800	/*
1801	* RFC 1981 states that a node "MUST reduce the size of the packets it
1802	* is sending along the path" that caused the Packet Too Big message.
1803	* Since it's not possible in the general case to determine which
1804	* interface was used to send the original packet, we update the MTU
1805	* on the interface that will be used to send future packets. We also
1806	* update the MTU on the interface that received the Packet Too Big in
1807	* case the original packet was forced out that interface with
1808	* SO_BINDTODEVICE or similar. This is the next best thing to the
1809	* correct behaviour, which would be to update the MTU on all
1810	* interfaces.
1811	*/
1812	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1813	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1814	}
1815
1816	/*
1817	* Misc support functions	1740	* Misc support functions
1818	*/	1741	*/
1819		1742