diff options
author | David S. Miller <davem@davemloft.net> | 2012-06-15 17:54:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-06-15 17:54:11 -0400 |
commit | 81aded24675ebda5de8a68843250ad15584ac38a (patch) | |
tree | 84f7bd5cf86cf010394de92efd5e4c5b636b3d20 /net/ipv6/route.c | |
parent | 36393395536064e483b73d173f6afc103eadfbc4 (diff) |
ipv6: Handle PMTU in ICMP error handlers.
One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts
to handle the error pass the 32-bit info cookie in network byte order
whereas ipv4 passes it around in host byte order.
Like the ipv4 side, we have two helper functions. One for when we
have a socket context and one for when we do not.
ip6ip6 tunnels are not handled here, because they handle PMTU events
by essentially relaying another ICMP packet-too-big message back to
the original sender.
This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all
kinds of situations that simply cannot happen when we do the PMTU
update directly using a fully resolved route.
In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very
likely be removed or changed into a BUG_ON() check. We should never
have a prefixed ipv6 route when we get there.
Another piece of strange history here is that TCP and DCCP, unlike in
ipv4, never invoke the update_pmtu() method from their ICMP error
handlers. This is incredibly astonishing since this is the context
where we have the most accurate context in which to make a PMTU
update, namely we have a fully connected socket and associated cached
socket route.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 143 |
1 files changed, 33 insertions, 110 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58a3ec23da2..0d41f68daff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1049 | { | 1049 | { |
1050 | struct rt6_info *rt6 = (struct rt6_info*)dst; | 1050 | struct rt6_info *rt6 = (struct rt6_info*)dst; |
1051 | 1051 | ||
1052 | dst_confirm(dst); | ||
1052 | if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { | 1053 | if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { |
1054 | struct net *net = dev_net(dst->dev); | ||
1055 | |||
1053 | rt6->rt6i_flags |= RTF_MODIFIED; | 1056 | rt6->rt6i_flags |= RTF_MODIFIED; |
1054 | if (mtu < IPV6_MIN_MTU) { | 1057 | if (mtu < IPV6_MIN_MTU) { |
1055 | u32 features = dst_metric(dst, RTAX_FEATURES); | 1058 | u32 features = dst_metric(dst, RTAX_FEATURES); |
@@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1058 | dst_metric_set(dst, RTAX_FEATURES, features); | 1061 | dst_metric_set(dst, RTAX_FEATURES, features); |
1059 | } | 1062 | } |
1060 | dst_metric_set(dst, RTAX_MTU, mtu); | 1063 | dst_metric_set(dst, RTAX_MTU, mtu); |
1064 | rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); | ||
1061 | } | 1065 | } |
1062 | } | 1066 | } |
1063 | 1067 | ||
1068 | void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | ||
1069 | int oif, __be32 mark) | ||
1070 | { | ||
1071 | const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; | ||
1072 | struct dst_entry *dst; | ||
1073 | struct flowi6 fl6; | ||
1074 | |||
1075 | memset(&fl6, 0, sizeof(fl6)); | ||
1076 | fl6.flowi6_oif = oif; | ||
1077 | fl6.flowi6_mark = mark; | ||
1078 | fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; | ||
1079 | fl6.daddr = iph->daddr; | ||
1080 | fl6.saddr = iph->saddr; | ||
1081 | fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; | ||
1082 | |||
1083 | dst = ip6_route_output(net, NULL, &fl6); | ||
1084 | if (!dst->error) | ||
1085 | ip6_rt_update_pmtu(dst, ntohl(mtu)); | ||
1086 | dst_release(dst); | ||
1087 | } | ||
1088 | EXPORT_SYMBOL_GPL(ip6_update_pmtu); | ||
1089 | |||
1090 | void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) | ||
1091 | { | ||
1092 | ip6_update_pmtu(skb, sock_net(sk), mtu, | ||
1093 | sk->sk_bound_dev_if, sk->sk_mark); | ||
1094 | } | ||
1095 | EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); | ||
1096 | |||
1064 | static unsigned int ip6_default_advmss(const struct dst_entry *dst) | 1097 | static unsigned int ip6_default_advmss(const struct dst_entry *dst) |
1065 | { | 1098 | { |
1066 | struct net_device *dev = dst->dev; | 1099 | struct net_device *dev = dst->dev; |
@@ -1704,116 +1737,6 @@ out: | |||
1704 | } | 1737 | } |
1705 | 1738 | ||
1706 | /* | 1739 | /* |
1707 | * Handle ICMP "packet too big" messages | ||
1708 | * i.e. Path MTU discovery | ||
1709 | */ | ||
1710 | |||
1711 | static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, | ||
1712 | struct net *net, u32 pmtu, int ifindex) | ||
1713 | { | ||
1714 | struct rt6_info *rt, *nrt; | ||
1715 | int allfrag = 0; | ||
1716 | again: | ||
1717 | rt = rt6_lookup(net, daddr, saddr, ifindex, 0); | ||
1718 | if (!rt) | ||
1719 | return; | ||
1720 | |||
1721 | if (rt6_check_expired(rt)) { | ||
1722 | ip6_del_rt(rt); | ||
1723 | goto again; | ||
1724 | } | ||
1725 | |||
1726 | if (pmtu >= dst_mtu(&rt->dst)) | ||
1727 | goto out; | ||
1728 | |||
1729 | if (pmtu < IPV6_MIN_MTU) { | ||
1730 | /* | ||
1731 | * According to RFC2460, PMTU is set to the IPv6 Minimum Link | ||
1732 | * MTU (1280) and a fragment header should always be included | ||
1733 | * after a node receiving Too Big message reporting PMTU is | ||
1734 | * less than the IPv6 Minimum Link MTU. | ||
1735 | */ | ||
1736 | pmtu = IPV6_MIN_MTU; | ||
1737 | allfrag = 1; | ||
1738 | } | ||
1739 | |||
1740 | /* New mtu received -> path was valid. | ||
1741 | They are sent only in response to data packets, | ||
1742 | so that this nexthop apparently is reachable. --ANK | ||
1743 | */ | ||
1744 | dst_confirm(&rt->dst); | ||
1745 | |||
1746 | /* Host route. If it is static, it would be better | ||
1747 | not to override it, but add new one, so that | ||
1748 | when cache entry will expire old pmtu | ||
1749 | would return automatically. | ||
1750 | */ | ||
1751 | if (rt->rt6i_flags & RTF_CACHE) { | ||
1752 | dst_metric_set(&rt->dst, RTAX_MTU, pmtu); | ||
1753 | if (allfrag) { | ||
1754 | u32 features = dst_metric(&rt->dst, RTAX_FEATURES); | ||
1755 | features |= RTAX_FEATURE_ALLFRAG; | ||
1756 | dst_metric_set(&rt->dst, RTAX_FEATURES, features); | ||
1757 | } | ||
1758 | rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); | ||
1759 | rt->rt6i_flags |= RTF_MODIFIED; | ||
1760 | goto out; | ||
1761 | } | ||
1762 | |||
1763 | /* Network route. | ||
1764 | Two cases are possible: | ||
1765 | 1. It is connected route. Action: COW | ||
1766 | 2. It is gatewayed route or NONEXTHOP route. Action: clone it. | ||
1767 | */ | ||
1768 | if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) | ||
1769 | nrt = rt6_alloc_cow(rt, daddr, saddr); | ||
1770 | else | ||
1771 | nrt = rt6_alloc_clone(rt, daddr); | ||
1772 | |||
1773 | if (nrt) { | ||
1774 | dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); | ||
1775 | if (allfrag) { | ||
1776 | u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); | ||
1777 | features |= RTAX_FEATURE_ALLFRAG; | ||
1778 | dst_metric_set(&nrt->dst, RTAX_FEATURES, features); | ||
1779 | } | ||
1780 | |||
1781 | /* According to RFC 1981, detecting PMTU increase shouldn't be | ||
1782 | * happened within 5 mins, the recommended timer is 10 mins. | ||
1783 | * Here this route expiration time is set to ip6_rt_mtu_expires | ||
1784 | * which is 10 mins. After 10 mins the decreased pmtu is expired | ||
1785 | * and detecting PMTU increase will be automatically happened. | ||
1786 | */ | ||
1787 | rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); | ||
1788 | nrt->rt6i_flags |= RTF_DYNAMIC; | ||
1789 | ip6_ins_rt(nrt); | ||
1790 | } | ||
1791 | out: | ||
1792 | dst_release(&rt->dst); | ||
1793 | } | ||
1794 | |||
1795 | void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, | ||
1796 | struct net_device *dev, u32 pmtu) | ||
1797 | { | ||
1798 | struct net *net = dev_net(dev); | ||
1799 | |||
1800 | /* | ||
1801 | * RFC 1981 states that a node "MUST reduce the size of the packets it | ||
1802 | * is sending along the path" that caused the Packet Too Big message. | ||
1803 | * Since it's not possible in the general case to determine which | ||
1804 | * interface was used to send the original packet, we update the MTU | ||
1805 | * on the interface that will be used to send future packets. We also | ||
1806 | * update the MTU on the interface that received the Packet Too Big in | ||
1807 | * case the original packet was forced out that interface with | ||
1808 | * SO_BINDTODEVICE or similar. This is the next best thing to the | ||
1809 | * correct behaviour, which would be to update the MTU on all | ||
1810 | * interfaces. | ||
1811 | */ | ||
1812 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); | ||
1813 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); | ||
1814 | } | ||
1815 | |||
1816 | /* | ||
1817 | * Misc support functions | 1740 | * Misc support functions |
1818 | */ | 1741 | */ |
1819 | 1742 | ||