aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-06-15 17:54:11 -0400
committerDavid S. Miller <davem@davemloft.net>2012-06-15 17:54:11 -0400
commit81aded24675ebda5de8a68843250ad15584ac38a (patch)
tree84f7bd5cf86cf010394de92efd5e4c5b636b3d20 /net/ipv6/route.c
parent36393395536064e483b73d173f6afc103eadfbc4 (diff)
ipv6: Handle PMTU in ICMP error handlers.
One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts to handle the error pass the 32-bit info cookie in network byte order whereas ipv4 passes it around in host byte order. Like the ipv4 side, we have two helper functions. One for when we have a socket context and one for when we do not. ip6ip6 tunnels are not handled here, because they handle PMTU events by essentially relaying another ICMP packet-too-big message back to the original sender. This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all kinds of situations that simply cannot happen when we do the PMTU update directly using a fully resolved route. In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very likely be removed or changed into a BUG_ON() check. We should never have a prefixed ipv6 route when we get there. Another piece of strange history here is that TCP and DCCP, unlike in ipv4, never invoke the update_pmtu() method from their ICMP error handlers. This is incredibly astonishing since this is the context where we have the most accurate context in which to make a PMTU update, namely we have a fully connected socket and associated cached socket route. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c143
1 files changed, 33 insertions, 110 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 58a3ec23da2..0d41f68daff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1049{ 1049{
1050 struct rt6_info *rt6 = (struct rt6_info*)dst; 1050 struct rt6_info *rt6 = (struct rt6_info*)dst;
1051 1051
1052 dst_confirm(dst);
1052 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1053 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1054 struct net *net = dev_net(dst->dev);
1055
1053 rt6->rt6i_flags |= RTF_MODIFIED; 1056 rt6->rt6i_flags |= RTF_MODIFIED;
1054 if (mtu < IPV6_MIN_MTU) { 1057 if (mtu < IPV6_MIN_MTU) {
1055 u32 features = dst_metric(dst, RTAX_FEATURES); 1058 u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1058 dst_metric_set(dst, RTAX_FEATURES, features); 1061 dst_metric_set(dst, RTAX_FEATURES, features);
1059 } 1062 }
1060 dst_metric_set(dst, RTAX_MTU, mtu); 1063 dst_metric_set(dst, RTAX_MTU, mtu);
1064 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1061 } 1065 }
1062} 1066}
1063 1067
1068void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1069 int oif, __be32 mark)
1070{
1071 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1072 struct dst_entry *dst;
1073 struct flowi6 fl6;
1074
1075 memset(&fl6, 0, sizeof(fl6));
1076 fl6.flowi6_oif = oif;
1077 fl6.flowi6_mark = mark;
1078 fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
1079 fl6.daddr = iph->daddr;
1080 fl6.saddr = iph->saddr;
1081 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1082
1083 dst = ip6_route_output(net, NULL, &fl6);
1084 if (!dst->error)
1085 ip6_rt_update_pmtu(dst, ntohl(mtu));
1086 dst_release(dst);
1087}
1088EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1089
1090void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1091{
1092 ip6_update_pmtu(skb, sock_net(sk), mtu,
1093 sk->sk_bound_dev_if, sk->sk_mark);
1094}
1095EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1096
1064static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1097static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1065{ 1098{
1066 struct net_device *dev = dst->dev; 1099 struct net_device *dev = dst->dev;
@@ -1704,116 +1737,6 @@ out:
1704} 1737}
1705 1738
1706/* 1739/*
1707 * Handle ICMP "packet too big" messages
1708 * i.e. Path MTU discovery
1709 */
1710
1711static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1712 struct net *net, u32 pmtu, int ifindex)
1713{
1714 struct rt6_info *rt, *nrt;
1715 int allfrag = 0;
1716again:
1717 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1718 if (!rt)
1719 return;
1720
1721 if (rt6_check_expired(rt)) {
1722 ip6_del_rt(rt);
1723 goto again;
1724 }
1725
1726 if (pmtu >= dst_mtu(&rt->dst))
1727 goto out;
1728
1729 if (pmtu < IPV6_MIN_MTU) {
1730 /*
1731 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1732 * MTU (1280) and a fragment header should always be included
1733 * after a node receiving Too Big message reporting PMTU is
1734 * less than the IPv6 Minimum Link MTU.
1735 */
1736 pmtu = IPV6_MIN_MTU;
1737 allfrag = 1;
1738 }
1739
1740 /* New mtu received -> path was valid.
1741 They are sent only in response to data packets,
1742 so that this nexthop apparently is reachable. --ANK
1743 */
1744 dst_confirm(&rt->dst);
1745
1746 /* Host route. If it is static, it would be better
1747 not to override it, but add new one, so that
1748 when cache entry will expire old pmtu
1749 would return automatically.
1750 */
1751 if (rt->rt6i_flags & RTF_CACHE) {
1752 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1753 if (allfrag) {
1754 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1755 features |= RTAX_FEATURE_ALLFRAG;
1756 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1757 }
1758 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1759 rt->rt6i_flags |= RTF_MODIFIED;
1760 goto out;
1761 }
1762
1763 /* Network route.
1764 Two cases are possible:
1765 1. It is connected route. Action: COW
1766 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1767 */
1768 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1769 nrt = rt6_alloc_cow(rt, daddr, saddr);
1770 else
1771 nrt = rt6_alloc_clone(rt, daddr);
1772
1773 if (nrt) {
1774 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1775 if (allfrag) {
1776 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1777 features |= RTAX_FEATURE_ALLFRAG;
1778 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1779 }
1780
1781 /* According to RFC 1981, detecting PMTU increase shouldn't be
1782 * happened within 5 mins, the recommended timer is 10 mins.
1783 * Here this route expiration time is set to ip6_rt_mtu_expires
1784 * which is 10 mins. After 10 mins the decreased pmtu is expired
1785 * and detecting PMTU increase will be automatically happened.
1786 */
1787 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1788 nrt->rt6i_flags |= RTF_DYNAMIC;
1789 ip6_ins_rt(nrt);
1790 }
1791out:
1792 dst_release(&rt->dst);
1793}
1794
1795void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1796 struct net_device *dev, u32 pmtu)
1797{
1798 struct net *net = dev_net(dev);
1799
1800 /*
1801 * RFC 1981 states that a node "MUST reduce the size of the packets it
1802 * is sending along the path" that caused the Packet Too Big message.
1803 * Since it's not possible in the general case to determine which
1804 * interface was used to send the original packet, we update the MTU
1805 * on the interface that will be used to send future packets. We also
1806 * update the MTU on the interface that received the Packet Too Big in
1807 * case the original packet was forced out that interface with
1808 * SO_BINDTODEVICE or similar. This is the next best thing to the
1809 * correct behaviour, which would be to update the MTU on all
1810 * interfaces.
1811 */
1812 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1813 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1814}
1815
1816/*
1817 * Misc support functions 1740 * Misc support functions
1818 */ 1741 */
1819 1742