diff options
author | Martin KaFai Lau <kafai@fb.com> | 2015-05-22 23:56:00 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-25 13:25:33 -0400 |
commit | 45e4fd26683c9a5f88600d91b08a484f7f09226a (patch) | |
tree | 9393590b3d79a26cf18cd11ac2b2d7f7c34a61e5 /net/ipv6 | |
parent | 8b9df2657704dd313333a79497dde429f9190caa (diff) |
ipv6: Only create RTF_CACHE routes after encountering pmtu exception
This patch creates a RTF_CACHE routes only after encountering a pmtu
exception.
After ip6_rt_update_pmtu() has inserted the RTF_CACHE route to the fib6
tree, the rt->rt6i_node->fn_sernum is bumped which will fail the
ip6_dst_check() and trigger a relookup.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/ip6_fib.c | 1 | ||||
-rw-r--r-- | net/ipv6/route.c | 100 |
2 files changed, 52 insertions, 49 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bde57b113009..83341b3a248d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -738,6 +738,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | |||
738 | rt6_clean_expires(iter); | 738 | rt6_clean_expires(iter); |
739 | else | 739 | else |
740 | rt6_set_expires(iter, rt->dst.expires); | 740 | rt6_set_expires(iter, rt->dst.expires); |
741 | iter->rt6i_pmtu = rt->rt6i_pmtu; | ||
741 | return -EEXIST; | 742 | return -EEXIST; |
742 | } | 743 | } |
743 | /* If we have the same destination and the same metric, | 744 | /* If we have the same destination and the same metric, |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f199d6357b31..e7ae2430dfed 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -873,16 +873,13 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, | |||
873 | struct flowi6 *fl6, int flags) | 873 | struct flowi6 *fl6, int flags) |
874 | { | 874 | { |
875 | struct fib6_node *fn, *saved_fn; | 875 | struct fib6_node *fn, *saved_fn; |
876 | struct rt6_info *rt, *nrt; | 876 | struct rt6_info *rt; |
877 | int strict = 0; | 877 | int strict = 0; |
878 | int attempts = 3; | ||
879 | int err; | ||
880 | 878 | ||
881 | strict |= flags & RT6_LOOKUP_F_IFACE; | 879 | strict |= flags & RT6_LOOKUP_F_IFACE; |
882 | if (net->ipv6.devconf_all->forwarding == 0) | 880 | if (net->ipv6.devconf_all->forwarding == 0) |
883 | strict |= RT6_LOOKUP_F_REACHABLE; | 881 | strict |= RT6_LOOKUP_F_REACHABLE; |
884 | 882 | ||
885 | redo_fib6_lookup_lock: | ||
886 | read_lock_bh(&table->tb6_lock); | 883 | read_lock_bh(&table->tb6_lock); |
887 | 884 | ||
888 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); | 885 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); |
@@ -901,46 +898,12 @@ redo_rt6_select: | |||
901 | strict &= ~RT6_LOOKUP_F_REACHABLE; | 898 | strict &= ~RT6_LOOKUP_F_REACHABLE; |
902 | fn = saved_fn; | 899 | fn = saved_fn; |
903 | goto redo_rt6_select; | 900 | goto redo_rt6_select; |
904 | } else { | ||
905 | dst_hold(&rt->dst); | ||
906 | read_unlock_bh(&table->tb6_lock); | ||
907 | goto out2; | ||
908 | } | 901 | } |
909 | } | 902 | } |
910 | 903 | ||
911 | dst_hold(&rt->dst); | 904 | dst_hold(&rt->dst); |
912 | read_unlock_bh(&table->tb6_lock); | 905 | read_unlock_bh(&table->tb6_lock); |
913 | 906 | ||
914 | if (rt->rt6i_flags & RTF_CACHE) | ||
915 | goto out2; | ||
916 | |||
917 | if (!rt6_is_gw_or_nonexthop(rt) || | ||
918 | !(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL)) | ||
919 | nrt = ip6_rt_cache_alloc(rt, &fl6->daddr, &fl6->saddr); | ||
920 | else | ||
921 | goto out2; | ||
922 | |||
923 | ip6_rt_put(rt); | ||
924 | rt = nrt ? : net->ipv6.ip6_null_entry; | ||
925 | |||
926 | dst_hold(&rt->dst); | ||
927 | if (nrt) { | ||
928 | err = ip6_ins_rt(nrt); | ||
929 | if (!err) | ||
930 | goto out2; | ||
931 | } | ||
932 | |||
933 | if (--attempts <= 0) | ||
934 | goto out2; | ||
935 | |||
936 | /* | ||
937 | * Race condition! In the gap, when table->tb6_lock was | ||
938 | * released someone could insert this route. Relookup. | ||
939 | */ | ||
940 | ip6_rt_put(rt); | ||
941 | goto redo_fib6_lookup_lock; | ||
942 | |||
943 | out2: | ||
944 | rt6_dst_from_metrics_check(rt); | 907 | rt6_dst_from_metrics_check(rt); |
945 | rt->dst.lastuse = jiffies; | 908 | rt->dst.lastuse = jiffies; |
946 | rt->dst.__use++; | 909 | rt->dst.__use++; |
@@ -1113,24 +1076,63 @@ static void ip6_link_failure(struct sk_buff *skb) | |||
1113 | } | 1076 | } |
1114 | } | 1077 | } |
1115 | 1078 | ||
1116 | static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | 1079 | static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) |
1117 | struct sk_buff *skb, u32 mtu) | 1080 | { |
1081 | struct net *net = dev_net(rt->dst.dev); | ||
1082 | |||
1083 | rt->rt6i_flags |= RTF_MODIFIED; | ||
1084 | rt->rt6i_pmtu = mtu; | ||
1085 | rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); | ||
1086 | } | ||
1087 | |||
1088 | static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, | ||
1089 | const struct ipv6hdr *iph, u32 mtu) | ||
1118 | { | 1090 | { |
1119 | struct rt6_info *rt6 = (struct rt6_info *)dst; | 1091 | struct rt6_info *rt6 = (struct rt6_info *)dst; |
1120 | 1092 | ||
1121 | dst_confirm(dst); | 1093 | if (rt6->rt6i_flags & RTF_LOCAL) |
1122 | if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) { | 1094 | return; |
1123 | struct net *net = dev_net(dst->dev); | ||
1124 | 1095 | ||
1125 | rt6->rt6i_flags |= RTF_MODIFIED; | 1096 | dst_confirm(dst); |
1126 | if (mtu < IPV6_MIN_MTU) | 1097 | mtu = max_t(u32, mtu, IPV6_MIN_MTU); |
1127 | mtu = IPV6_MIN_MTU; | 1098 | if (mtu >= dst_mtu(dst)) |
1099 | return; | ||
1128 | 1100 | ||
1129 | rt6->rt6i_pmtu = mtu; | 1101 | if (rt6->rt6i_flags & RTF_CACHE) { |
1130 | rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); | 1102 | rt6_do_update_pmtu(rt6, mtu); |
1103 | } else { | ||
1104 | const struct in6_addr *daddr, *saddr; | ||
1105 | struct rt6_info *nrt6; | ||
1106 | |||
1107 | if (iph) { | ||
1108 | daddr = &iph->daddr; | ||
1109 | saddr = &iph->saddr; | ||
1110 | } else if (sk) { | ||
1111 | daddr = &sk->sk_v6_daddr; | ||
1112 | saddr = &inet6_sk(sk)->saddr; | ||
1113 | } else { | ||
1114 | return; | ||
1115 | } | ||
1116 | nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); | ||
1117 | if (nrt6) { | ||
1118 | rt6_do_update_pmtu(nrt6, mtu); | ||
1119 | |||
1120 | /* ip6_ins_rt(nrt6) will bump the | ||
1121 | * rt6->rt6i_node->fn_sernum | ||
1122 | * which will fail the next rt6_check() and | ||
1123 | * invalidate the sk->sk_dst_cache. | ||
1124 | */ | ||
1125 | ip6_ins_rt(nrt6); | ||
1126 | } | ||
1131 | } | 1127 | } |
1132 | } | 1128 | } |
1133 | 1129 | ||
1130 | static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | ||
1131 | struct sk_buff *skb, u32 mtu) | ||
1132 | { | ||
1133 | __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); | ||
1134 | } | ||
1135 | |||
1134 | void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, | 1136 | void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, |
1135 | int oif, u32 mark) | 1137 | int oif, u32 mark) |
1136 | { | 1138 | { |
@@ -1147,7 +1149,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, | |||
1147 | 1149 | ||
1148 | dst = ip6_route_output(net, NULL, &fl6); | 1150 | dst = ip6_route_output(net, NULL, &fl6); |
1149 | if (!dst->error) | 1151 | if (!dst->error) |
1150 | ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); | 1152 | __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); |
1151 | dst_release(dst); | 1153 | dst_release(dst); |
1152 | } | 1154 | } |
1153 | EXPORT_SYMBOL_GPL(ip6_update_pmtu); | 1155 | EXPORT_SYMBOL_GPL(ip6_update_pmtu); |