aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c538
1 files changed, 264 insertions, 274 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index becb048d18d4..cf02cb97bbdd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,7 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops);
78static int ip6_pkt_discard(struct sk_buff *skb); 78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb); 79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb); 80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
82 85
83#ifdef CONFIG_IPV6_ROUTE_INFO 86#ifdef CONFIG_IPV6_ROUTE_INFO
84static struct rt6_info *rt6_add_route_info(struct net *net, 87static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -99,10 +102,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
99 if (!(rt->dst.flags & DST_HOST)) 102 if (!(rt->dst.flags & DST_HOST))
100 return NULL; 103 return NULL;
101 104
102 if (!rt->rt6i_peer) 105 peer = rt6_get_peer_create(rt);
103 rt6_bind_peer(rt, 1);
104
105 peer = rt->rt6i_peer;
106 if (peer) { 106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old); 107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new; 108 unsigned long prev, new;
@@ -123,21 +123,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
123 return p; 123 return p;
124} 124}
125 125
126static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) 126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
127{ 129{
128 struct in6_addr *p = &rt->rt6i_gateway; 130 struct in6_addr *p = &rt->rt6i_gateway;
129 131
130 if (!ipv6_addr_any(p)) 132 if (!ipv6_addr_any(p))
131 return (const void *) p; 133 return (const void *) p;
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
132 return daddr; 136 return daddr;
133} 137}
134 138
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) 139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
136{ 142{
137 struct rt6_info *rt = (struct rt6_info *) dst; 143 struct rt6_info *rt = (struct rt6_info *) dst;
138 struct neighbour *n; 144 struct neighbour *n;
139 145
140 daddr = choose_neigh_daddr(rt, daddr); 146 daddr = choose_neigh_daddr(rt, skb, daddr);
141 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
142 if (n) 148 if (n)
143 return n; 149 return n;
@@ -152,7 +158,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
152 if (IS_ERR(n)) 158 if (IS_ERR(n))
153 return PTR_ERR(n); 159 return PTR_ERR(n);
154 } 160 }
155 dst_set_neighbour(&rt->dst, n); 161 rt->n = n;
156 162
157 return 0; 163 return 0;
158} 164}
@@ -171,6 +177,7 @@ static struct dst_ops ip6_dst_ops_template = {
171 .negative_advice = ip6_negative_advice, 177 .negative_advice = ip6_negative_advice,
172 .link_failure = ip6_link_failure, 178 .link_failure = ip6_link_failure,
173 .update_pmtu = ip6_rt_update_pmtu, 179 .update_pmtu = ip6_rt_update_pmtu,
180 .redirect = rt6_do_redirect,
174 .local_out = __ip6_local_out, 181 .local_out = __ip6_local_out,
175 .neigh_lookup = ip6_neigh_lookup, 182 .neigh_lookup = ip6_neigh_lookup,
176}; 183};
@@ -182,7 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
182 return mtu ? : dst->dev->mtu; 189 return mtu ? : dst->dev->mtu;
183} 190}
184 191
185static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
194{
195}
196
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
186{ 199{
187} 200}
188 201
@@ -200,6 +213,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
200 .mtu = ip6_blackhole_mtu, 213 .mtu = ip6_blackhole_mtu,
201 .default_advmss = ip6_default_advmss, 214 .default_advmss = ip6_default_advmss,
202 .update_pmtu = ip6_rt_blackhole_update_pmtu, 215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
216 .redirect = ip6_rt_blackhole_redirect,
203 .cow_metrics = ip6_rt_blackhole_cow_metrics, 217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
204 .neigh_lookup = ip6_neigh_lookup, 218 .neigh_lookup = ip6_neigh_lookup,
205}; 219};
@@ -261,16 +275,20 @@ static struct rt6_info ip6_blk_hole_entry_template = {
261#endif 275#endif
262 276
263/* allocate dst with ip6_dst_ops */ 277/* allocate dst with ip6_dst_ops */
264static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
265 struct net_device *dev, 279 struct net_device *dev,
266 int flags) 280 int flags,
281 struct fib6_table *table)
267{ 282{
268 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 0, DST_OBSOLETE_NONE, flags);
269 285
270 if (rt) 286 if (rt) {
271 memset(&rt->rt6i_table, 0, 287 struct dst_entry *dst = &rt->dst;
272 sizeof(*rt) - sizeof(struct dst_entry));
273 288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 }
274 return rt; 292 return rt;
275} 293}
276 294
@@ -278,7 +296,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
278{ 296{
279 struct rt6_info *rt = (struct rt6_info *)dst; 297 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev; 298 struct inet6_dev *idev = rt->rt6i_idev;
281 struct inet_peer *peer = rt->rt6i_peer; 299
300 if (rt->n)
301 neigh_release(rt->n);
282 302
283 if (!(rt->dst.flags & DST_HOST)) 303 if (!(rt->dst.flags & DST_HOST))
284 dst_destroy_metrics_generic(dst); 304 dst_destroy_metrics_generic(dst);
@@ -291,8 +311,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
291 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) 311 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
292 dst_release(dst->from); 312 dst_release(dst->from);
293 313
294 if (peer) { 314 if (rt6_has_peer(rt)) {
295 rt->rt6i_peer = NULL; 315 struct inet_peer *peer = rt6_peer_ptr(rt);
296 inet_putpeer(peer); 316 inet_putpeer(peer);
297 } 317 }
298} 318}
@@ -306,13 +326,20 @@ static u32 rt6_peer_genid(void)
306 326
307void rt6_bind_peer(struct rt6_info *rt, int create) 327void rt6_bind_peer(struct rt6_info *rt, int create)
308{ 328{
329 struct inet_peer_base *base;
309 struct inet_peer *peer; 330 struct inet_peer *peer;
310 331
311 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); 332 base = inetpeer_base_ptr(rt->_rt6i_peer);
312 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) 333 if (!base)
313 inet_putpeer(peer); 334 return;
314 else 335
315 rt->rt6i_peer_genid = rt6_peer_genid(); 336 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
337 if (peer) {
338 if (!rt6_set_peer(rt, peer))
339 inet_putpeer(peer);
340 else
341 rt->rt6i_peer_genid = rt6_peer_genid();
342 }
316} 343}
317 344
318static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 345static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -323,12 +350,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
323 struct net_device *loopback_dev = 350 struct net_device *loopback_dev =
324 dev_net(dev)->loopback_dev; 351 dev_net(dev)->loopback_dev;
325 352
326 if (dev != loopback_dev && idev && idev->dev == dev) { 353 if (dev != loopback_dev) {
327 struct inet6_dev *loopback_idev = 354 if (idev && idev->dev == dev) {
328 in6_dev_get(loopback_dev); 355 struct inet6_dev *loopback_idev =
329 if (loopback_idev) { 356 in6_dev_get(loopback_dev);
330 rt->rt6i_idev = loopback_idev; 357 if (loopback_idev) {
331 in6_dev_put(idev); 358 rt->rt6i_idev = loopback_idev;
359 in6_dev_put(idev);
360 }
361 }
362 if (rt->n && rt->n->dev == dev) {
363 rt->n->dev = loopback_dev;
364 dev_hold(loopback_dev);
365 dev_put(dev);
332 } 366 }
333 } 367 }
334} 368}
@@ -418,7 +452,7 @@ static void rt6_probe(struct rt6_info *rt)
418 * to no more than one per minute. 452 * to no more than one per minute.
419 */ 453 */
420 rcu_read_lock(); 454 rcu_read_lock();
421 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL; 455 neigh = rt ? rt->n : NULL;
422 if (!neigh || (neigh->nud_state & NUD_VALID)) 456 if (!neigh || (neigh->nud_state & NUD_VALID))
423 goto out; 457 goto out;
424 read_lock_bh(&neigh->lock); 458 read_lock_bh(&neigh->lock);
@@ -465,7 +499,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
465 int m; 499 int m;
466 500
467 rcu_read_lock(); 501 rcu_read_lock();
468 neigh = dst_get_neighbour_noref(&rt->dst); 502 neigh = rt->n;
469 if (rt->rt6i_flags & RTF_NONEXTHOP || 503 if (rt->rt6i_flags & RTF_NONEXTHOP ||
470 !(rt->rt6i_flags & RTF_GATEWAY)) 504 !(rt->rt6i_flags & RTF_GATEWAY))
471 m = 1; 505 m = 1;
@@ -812,7 +846,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
812 846
813 if (rt) { 847 if (rt) {
814 rt->rt6i_flags |= RTF_CACHE; 848 rt->rt6i_flags |= RTF_CACHE;
815 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst))); 849 rt->n = neigh_clone(ort->n);
816 } 850 }
817 return rt; 851 return rt;
818} 852}
@@ -846,7 +880,7 @@ restart:
846 dst_hold(&rt->dst); 880 dst_hold(&rt->dst);
847 read_unlock_bh(&table->tb6_lock); 881 read_unlock_bh(&table->tb6_lock);
848 882
849 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 883 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
850 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 884 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
851 else if (!(rt->dst.flags & DST_HOST)) 885 else if (!(rt->dst.flags & DST_HOST))
852 nrt = rt6_alloc_clone(rt, &fl6->daddr); 886 nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -931,6 +965,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
931{ 965{
932 int flags = 0; 966 int flags = 0;
933 967
968 fl6->flowi6_iif = net->loopback_dev->ifindex;
969
934 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 970 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
935 flags |= RT6_LOOKUP_F_IFACE; 971 flags |= RT6_LOOKUP_F_IFACE;
936 972
@@ -949,12 +985,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
949 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 985 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
950 struct dst_entry *new = NULL; 986 struct dst_entry *new = NULL;
951 987
952 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); 988 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
953 if (rt) { 989 if (rt) {
954 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
955
956 new = &rt->dst; 990 new = &rt->dst;
957 991
992 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
993 rt6_init_peer(rt, net->ipv6.peers);
994
958 new->__use = 1; 995 new->__use = 1;
959 new->input = dst_discard; 996 new->input = dst_discard;
960 new->output = dst_discard; 997 new->output = dst_discard;
@@ -996,7 +1033,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
996 1033
997 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 1034 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
998 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 1035 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
999 if (!rt->rt6i_peer) 1036 if (!rt6_has_peer(rt))
1000 rt6_bind_peer(rt, 0); 1037 rt6_bind_peer(rt, 0);
1001 rt->rt6i_peer_genid = rt6_peer_genid(); 1038 rt->rt6i_peer_genid = rt6_peer_genid();
1002 } 1039 }
@@ -1038,11 +1075,15 @@ static void ip6_link_failure(struct sk_buff *skb)
1038 } 1075 }
1039} 1076}
1040 1077
1041static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1078static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1079 struct sk_buff *skb, u32 mtu)
1042{ 1080{
1043 struct rt6_info *rt6 = (struct rt6_info*)dst; 1081 struct rt6_info *rt6 = (struct rt6_info*)dst;
1044 1082
1083 dst_confirm(dst);
1045 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1084 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1085 struct net *net = dev_net(dst->dev);
1086
1046 rt6->rt6i_flags |= RTF_MODIFIED; 1087 rt6->rt6i_flags |= RTF_MODIFIED;
1047 if (mtu < IPV6_MIN_MTU) { 1088 if (mtu < IPV6_MIN_MTU) {
1048 u32 features = dst_metric(dst, RTAX_FEATURES); 1089 u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1051,9 +1092,66 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1051 dst_metric_set(dst, RTAX_FEATURES, features); 1092 dst_metric_set(dst, RTAX_FEATURES, features);
1052 } 1093 }
1053 dst_metric_set(dst, RTAX_MTU, mtu); 1094 dst_metric_set(dst, RTAX_MTU, mtu);
1095 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1054 } 1096 }
1055} 1097}
1056 1098
1099void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1100 int oif, u32 mark)
1101{
1102 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1103 struct dst_entry *dst;
1104 struct flowi6 fl6;
1105
1106 memset(&fl6, 0, sizeof(fl6));
1107 fl6.flowi6_oif = oif;
1108 fl6.flowi6_mark = mark;
1109 fl6.flowi6_flags = 0;
1110 fl6.daddr = iph->daddr;
1111 fl6.saddr = iph->saddr;
1112 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1113
1114 dst = ip6_route_output(net, NULL, &fl6);
1115 if (!dst->error)
1116 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1117 dst_release(dst);
1118}
1119EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1120
1121void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1122{
1123 ip6_update_pmtu(skb, sock_net(sk), mtu,
1124 sk->sk_bound_dev_if, sk->sk_mark);
1125}
1126EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1127
1128void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1129{
1130 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1131 struct dst_entry *dst;
1132 struct flowi6 fl6;
1133
1134 memset(&fl6, 0, sizeof(fl6));
1135 fl6.flowi6_oif = oif;
1136 fl6.flowi6_mark = mark;
1137 fl6.flowi6_flags = 0;
1138 fl6.daddr = iph->daddr;
1139 fl6.saddr = iph->saddr;
1140 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1141
1142 dst = ip6_route_output(net, NULL, &fl6);
1143 if (!dst->error)
1144 rt6_do_redirect(dst, NULL, skb);
1145 dst_release(dst);
1146}
1147EXPORT_SYMBOL_GPL(ip6_redirect);
1148
1149void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1150{
1151 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1152}
1153EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1154
1057static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1155static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1058{ 1156{
1059 struct net_device *dev = dst->dev; 1157 struct net_device *dev = dst->dev;
@@ -1110,7 +1208,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1110 if (unlikely(!idev)) 1208 if (unlikely(!idev))
1111 return ERR_PTR(-ENODEV); 1209 return ERR_PTR(-ENODEV);
1112 1210
1113 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); 1211 rt = ip6_dst_alloc(net, dev, 0, NULL);
1114 if (unlikely(!rt)) { 1212 if (unlikely(!rt)) {
1115 in6_dev_put(idev); 1213 in6_dev_put(idev);
1116 dst = ERR_PTR(-ENOMEM); 1214 dst = ERR_PTR(-ENOMEM);
@@ -1120,7 +1218,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1120 if (neigh) 1218 if (neigh)
1121 neigh_hold(neigh); 1219 neigh_hold(neigh);
1122 else { 1220 else {
1123 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr); 1221 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1124 if (IS_ERR(neigh)) { 1222 if (IS_ERR(neigh)) {
1125 in6_dev_put(idev); 1223 in6_dev_put(idev);
1126 dst_free(&rt->dst); 1224 dst_free(&rt->dst);
@@ -1130,7 +1228,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1130 1228
1131 rt->dst.flags |= DST_HOST; 1229 rt->dst.flags |= DST_HOST;
1132 rt->dst.output = ip6_output; 1230 rt->dst.output = ip6_output;
1133 dst_set_neighbour(&rt->dst, neigh); 1231 rt->n = neigh;
1134 atomic_set(&rt->dst.__refcnt, 1); 1232 atomic_set(&rt->dst.__refcnt, 1);
1135 rt->rt6i_dst.addr = fl6->daddr; 1233 rt->rt6i_dst.addr = fl6->daddr;
1136 rt->rt6i_dst.plen = 128; 1234 rt->rt6i_dst.plen = 128;
@@ -1292,7 +1390,7 @@ int ip6_route_add(struct fib6_config *cfg)
1292 if (!table) 1390 if (!table)
1293 goto out; 1391 goto out;
1294 1392
1295 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); 1393 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1296 1394
1297 if (!rt) { 1395 if (!rt) {
1298 err = -ENOMEM; 1396 err = -ENOMEM;
@@ -1546,107 +1644,94 @@ static int ip6_route_del(struct fib6_config *cfg)
1546 return err; 1644 return err;
1547} 1645}
1548 1646
1549/* 1647static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1550 * Handle redirects
1551 */
1552struct ip6rd_flowi {
1553 struct flowi6 fl6;
1554 struct in6_addr gateway;
1555};
1556
1557static struct rt6_info *__ip6_route_redirect(struct net *net,
1558 struct fib6_table *table,
1559 struct flowi6 *fl6,
1560 int flags)
1561{ 1648{
1562 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1649 struct net *net = dev_net(skb->dev);
1563 struct rt6_info *rt; 1650 struct netevent_redirect netevent;
1564 struct fib6_node *fn; 1651 struct rt6_info *rt, *nrt = NULL;
1652 const struct in6_addr *target;
1653 struct ndisc_options ndopts;
1654 const struct in6_addr *dest;
1655 struct neighbour *old_neigh;
1656 struct inet6_dev *in6_dev;
1657 struct neighbour *neigh;
1658 struct icmp6hdr *icmph;
1659 int optlen, on_link;
1660 u8 *lladdr;
1565 1661
1566 /* 1662 optlen = skb->tail - skb->transport_header;
1567 * Get the "current" route for this destination and 1663 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1568 * check if the redirect has come from approriate router.
1569 *
1570 * RFC 2461 specifies that redirects should only be
1571 * accepted if they come from the nexthop to the target.
1572 * Due to the way the routes are chosen, this notion
1573 * is a bit fuzzy and one might need to check all possible
1574 * routes.
1575 */
1576 1664
1577 read_lock_bh(&table->tb6_lock); 1665 if (optlen < 0) {
1578 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1666 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1579restart: 1667 return;
1580 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1581 /*
1582 * Current route is on-link; redirect is always invalid.
1583 *
1584 * Seems, previous statement is not true. It could
1585 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586 * But then router serving it might decide, that we should
1587 * know truth 8)8) --ANK (980726).
1588 */
1589 if (rt6_check_expired(rt))
1590 continue;
1591 if (!(rt->rt6i_flags & RTF_GATEWAY))
1592 continue;
1593 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1594 continue;
1595 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1596 continue;
1597 break;
1598 } 1668 }
1599 1669
1600 if (!rt) 1670 icmph = icmp6_hdr(skb);
1601 rt = net->ipv6.ip6_null_entry; 1671 target = (const struct in6_addr *) (icmph + 1);
1602 BACKTRACK(net, &fl6->saddr); 1672 dest = target + 1;
1603out:
1604 dst_hold(&rt->dst);
1605
1606 read_unlock_bh(&table->tb6_lock);
1607
1608 return rt;
1609};
1610 1673
1611static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, 1674 if (ipv6_addr_is_multicast(dest)) {
1612 const struct in6_addr *src, 1675 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1613 const struct in6_addr *gateway, 1676 return;
1614 struct net_device *dev) 1677 }
1615{
1616 int flags = RT6_LOOKUP_F_HAS_SADDR;
1617 struct net *net = dev_net(dev);
1618 struct ip6rd_flowi rdfl = {
1619 .fl6 = {
1620 .flowi6_oif = dev->ifindex,
1621 .daddr = *dest,
1622 .saddr = *src,
1623 },
1624 };
1625 1678
1626 rdfl.gateway = *gateway; 1679 on_link = 0;
1680 if (ipv6_addr_equal(dest, target)) {
1681 on_link = 1;
1682 } else if (ipv6_addr_type(target) !=
1683 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1684 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1685 return;
1686 }
1627 1687
1628 if (rt6_need_strict(dest)) 1688 in6_dev = __in6_dev_get(skb->dev);
1629 flags |= RT6_LOOKUP_F_IFACE; 1689 if (!in6_dev)
1690 return;
1691 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1692 return;
1630 1693
1631 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, 1694 /* RFC2461 8.1:
1632 flags, __ip6_route_redirect); 1695 * The IP source address of the Redirect MUST be the same as the current
1633} 1696 * first-hop router for the specified ICMP Destination Address.
1697 */
1634 1698
1635void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, 1699 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1636 const struct in6_addr *saddr, 1700 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1637 struct neighbour *neigh, u8 *lladdr, int on_link) 1701 return;
1638{ 1702 }
1639 struct rt6_info *rt, *nrt = NULL;
1640 struct netevent_redirect netevent;
1641 struct net *net = dev_net(neigh->dev);
1642 1703
1643 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1704 lladdr = NULL;
1705 if (ndopts.nd_opts_tgt_lladdr) {
1706 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1707 skb->dev);
1708 if (!lladdr) {
1709 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1710 return;
1711 }
1712 }
1644 1713
1714 rt = (struct rt6_info *) dst;
1645 if (rt == net->ipv6.ip6_null_entry) { 1715 if (rt == net->ipv6.ip6_null_entry) {
1646 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 1716 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1647 goto out; 1717 return;
1648 } 1718 }
1649 1719
1720 /* Redirect received -> path was valid.
1721 * Look, redirects are sent only in response to data packets,
1722 * so that this nexthop apparently is reachable. --ANK
1723 */
1724 dst_confirm(&rt->dst);
1725
1726 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1727 if (!neigh)
1728 return;
1729
1730 /* Duplicate redirect: silently ignore. */
1731 old_neigh = rt->n;
1732 if (neigh == old_neigh)
1733 goto out;
1734
1650 /* 1735 /*
1651 * We have finally decided to accept it. 1736 * We have finally decided to accept it.
1652 */ 1737 */
@@ -1658,17 +1743,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1658 NEIGH_UPDATE_F_ISROUTER)) 1743 NEIGH_UPDATE_F_ISROUTER))
1659 ); 1744 );
1660 1745
1661 /*
1662 * Redirect received -> path was valid.
1663 * Look, redirects are sent only in response to data packets,
1664 * so that this nexthop apparently is reachable. --ANK
1665 */
1666 dst_confirm(&rt->dst);
1667
1668 /* Duplicate redirect: silently ignore. */
1669 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1670 goto out;
1671
1672 nrt = ip6_rt_copy(rt, dest); 1746 nrt = ip6_rt_copy(rt, dest);
1673 if (!nrt) 1747 if (!nrt)
1674 goto out; 1748 goto out;
@@ -1678,132 +1752,25 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1678 nrt->rt6i_flags &= ~RTF_GATEWAY; 1752 nrt->rt6i_flags &= ~RTF_GATEWAY;
1679 1753
1680 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 1754 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1681 dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); 1755 nrt->n = neigh_clone(neigh);
1682 1756
1683 if (ip6_ins_rt(nrt)) 1757 if (ip6_ins_rt(nrt))
1684 goto out; 1758 goto out;
1685 1759
1686 netevent.old = &rt->dst; 1760 netevent.old = &rt->dst;
1761 netevent.old_neigh = old_neigh;
1687 netevent.new = &nrt->dst; 1762 netevent.new = &nrt->dst;
1763 netevent.new_neigh = neigh;
1764 netevent.daddr = dest;
1688 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1765 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1689 1766
1690 if (rt->rt6i_flags & RTF_CACHE) { 1767 if (rt->rt6i_flags & RTF_CACHE) {
1768 rt = (struct rt6_info *) dst_clone(&rt->dst);
1691 ip6_del_rt(rt); 1769 ip6_del_rt(rt);
1692 return;
1693 } 1770 }
1694 1771
1695out: 1772out:
1696 dst_release(&rt->dst); 1773 neigh_release(neigh);
1697}
1698
1699/*
1700 * Handle ICMP "packet too big" messages
1701 * i.e. Path MTU discovery
1702 */
1703
1704static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1705 struct net *net, u32 pmtu, int ifindex)
1706{
1707 struct rt6_info *rt, *nrt;
1708 int allfrag = 0;
1709again:
1710 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1711 if (!rt)
1712 return;
1713
1714 if (rt6_check_expired(rt)) {
1715 ip6_del_rt(rt);
1716 goto again;
1717 }
1718
1719 if (pmtu >= dst_mtu(&rt->dst))
1720 goto out;
1721
1722 if (pmtu < IPV6_MIN_MTU) {
1723 /*
1724 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1725 * MTU (1280) and a fragment header should always be included
1726 * after a node receiving Too Big message reporting PMTU is
1727 * less than the IPv6 Minimum Link MTU.
1728 */
1729 pmtu = IPV6_MIN_MTU;
1730 allfrag = 1;
1731 }
1732
1733 /* New mtu received -> path was valid.
1734 They are sent only in response to data packets,
1735 so that this nexthop apparently is reachable. --ANK
1736 */
1737 dst_confirm(&rt->dst);
1738
1739 /* Host route. If it is static, it would be better
1740 not to override it, but add new one, so that
1741 when cache entry will expire old pmtu
1742 would return automatically.
1743 */
1744 if (rt->rt6i_flags & RTF_CACHE) {
1745 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1746 if (allfrag) {
1747 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1748 features |= RTAX_FEATURE_ALLFRAG;
1749 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1750 }
1751 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1752 rt->rt6i_flags |= RTF_MODIFIED;
1753 goto out;
1754 }
1755
1756 /* Network route.
1757 Two cases are possible:
1758 1. It is connected route. Action: COW
1759 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1760 */
1761 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1762 nrt = rt6_alloc_cow(rt, daddr, saddr);
1763 else
1764 nrt = rt6_alloc_clone(rt, daddr);
1765
1766 if (nrt) {
1767 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1768 if (allfrag) {
1769 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1770 features |= RTAX_FEATURE_ALLFRAG;
1771 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1772 }
1773
1774 /* According to RFC 1981, detecting PMTU increase shouldn't be
1775 * happened within 5 mins, the recommended timer is 10 mins.
1776 * Here this route expiration time is set to ip6_rt_mtu_expires
1777 * which is 10 mins. After 10 mins the decreased pmtu is expired
1778 * and detecting PMTU increase will be automatically happened.
1779 */
1780 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1781 nrt->rt6i_flags |= RTF_DYNAMIC;
1782 ip6_ins_rt(nrt);
1783 }
1784out:
1785 dst_release(&rt->dst);
1786}
1787
1788void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1789 struct net_device *dev, u32 pmtu)
1790{
1791 struct net *net = dev_net(dev);
1792
1793 /*
1794 * RFC 1981 states that a node "MUST reduce the size of the packets it
1795 * is sending along the path" that caused the Packet Too Big message.
1796 * Since it's not possible in the general case to determine which
1797 * interface was used to send the original packet, we update the MTU
1798 * on the interface that will be used to send future packets. We also
1799 * update the MTU on the interface that received the Packet Too Big in
1800 * case the original packet was forced out that interface with
1801 * SO_BINDTODEVICE or similar. This is the next best thing to the
1802 * correct behaviour, which would be to update the MTU on all
1803 * interfaces.
1804 */
1805 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1806 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1807} 1774}
1808 1775
1809/* 1776/*
@@ -1814,8 +1781,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1814 const struct in6_addr *dest) 1781 const struct in6_addr *dest)
1815{ 1782{
1816 struct net *net = dev_net(ort->dst.dev); 1783 struct net *net = dev_net(ort->dst.dev);
1817 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, 1784 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1818 ort->dst.dev, 0); 1785 ort->rt6i_table);
1819 1786
1820 if (rt) { 1787 if (rt) {
1821 rt->dst.input = ort->dst.input; 1788 rt->dst.input = ort->dst.input;
@@ -2099,8 +2066,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2099 bool anycast) 2066 bool anycast)
2100{ 2067{
2101 struct net *net = dev_net(idev->dev); 2068 struct net *net = dev_net(idev->dev);
2102 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, 2069 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2103 net->loopback_dev, 0);
2104 int err; 2070 int err;
2105 2071
2106 if (!rt) { 2072 if (!rt) {
@@ -2396,13 +2362,11 @@ static int rt6_fill_node(struct net *net,
2396 int iif, int type, u32 pid, u32 seq, 2362 int iif, int type, u32 pid, u32 seq,
2397 int prefix, int nowait, unsigned int flags) 2363 int prefix, int nowait, unsigned int flags)
2398{ 2364{
2399 const struct inet_peer *peer;
2400 struct rtmsg *rtm; 2365 struct rtmsg *rtm;
2401 struct nlmsghdr *nlh; 2366 struct nlmsghdr *nlh;
2402 long expires; 2367 long expires;
2403 u32 table; 2368 u32 table;
2404 struct neighbour *n; 2369 struct neighbour *n;
2405 u32 ts, tsage;
2406 2370
2407 if (prefix) { /* user wants prefix routes only */ 2371 if (prefix) { /* user wants prefix routes only */
2408 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2372 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2440,10 +2404,12 @@ static int rt6_fill_node(struct net *net,
2440 rtm->rtm_protocol = rt->rt6i_protocol; 2404 rtm->rtm_protocol = rt->rt6i_protocol;
2441 if (rt->rt6i_flags & RTF_DYNAMIC) 2405 if (rt->rt6i_flags & RTF_DYNAMIC)
2442 rtm->rtm_protocol = RTPROT_REDIRECT; 2406 rtm->rtm_protocol = RTPROT_REDIRECT;
2443 else if (rt->rt6i_flags & RTF_ADDRCONF) 2407 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2444 rtm->rtm_protocol = RTPROT_KERNEL; 2408 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2445 else if (rt->rt6i_flags & RTF_DEFAULT) 2409 rtm->rtm_protocol = RTPROT_RA;
2446 rtm->rtm_protocol = RTPROT_RA; 2410 else
2411 rtm->rtm_protocol = RTPROT_KERNEL;
2412 }
2447 2413
2448 if (rt->rt6i_flags & RTF_CACHE) 2414 if (rt->rt6i_flags & RTF_CACHE)
2449 rtm->rtm_flags |= RTM_F_CLONED; 2415 rtm->rtm_flags |= RTM_F_CLONED;
@@ -2500,7 +2466,7 @@ static int rt6_fill_node(struct net *net,
2500 goto nla_put_failure; 2466 goto nla_put_failure;
2501 2467
2502 rcu_read_lock(); 2468 rcu_read_lock();
2503 n = dst_get_neighbour_noref(&rt->dst); 2469 n = rt->n;
2504 if (n) { 2470 if (n) {
2505 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { 2471 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2506 rcu_read_unlock(); 2472 rcu_read_unlock();
@@ -2521,15 +2487,7 @@ static int rt6_fill_node(struct net *net,
2521 else 2487 else
2522 expires = INT_MAX; 2488 expires = INT_MAX;
2523 2489
2524 peer = rt->rt6i_peer; 2490 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2525 ts = tsage = 0;
2526 if (peer && peer->tcp_ts_stamp) {
2527 ts = peer->tcp_ts;
2528 tsage = get_seconds() - peer->tcp_ts_stamp;
2529 }
2530
2531 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2532 expires, rt->dst.error) < 0)
2533 goto nla_put_failure; 2491 goto nla_put_failure;
2534 2492
2535 return nlmsg_end(skb, nlh); 2493 return nlmsg_end(skb, nlh);
@@ -2722,7 +2680,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2722 seq_puts(m, "00000000000000000000000000000000 00 "); 2680 seq_puts(m, "00000000000000000000000000000000 00 ");
2723#endif 2681#endif
2724 rcu_read_lock(); 2682 rcu_read_lock();
2725 n = dst_get_neighbour_noref(&rt->dst); 2683 n = rt->n;
2726 if (n) { 2684 if (n) {
2727 seq_printf(m, "%pi6", n->primary_key); 2685 seq_printf(m, "%pi6", n->primary_key);
2728 } else { 2686 } else {
@@ -3007,6 +2965,31 @@ static struct pernet_operations ip6_route_net_ops = {
3007 .exit = ip6_route_net_exit, 2965 .exit = ip6_route_net_exit,
3008}; 2966};
3009 2967
2968static int __net_init ipv6_inetpeer_init(struct net *net)
2969{
2970 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2971
2972 if (!bp)
2973 return -ENOMEM;
2974 inet_peer_base_init(bp);
2975 net->ipv6.peers = bp;
2976 return 0;
2977}
2978
2979static void __net_exit ipv6_inetpeer_exit(struct net *net)
2980{
2981 struct inet_peer_base *bp = net->ipv6.peers;
2982
2983 net->ipv6.peers = NULL;
2984 inetpeer_invalidate_tree(bp);
2985 kfree(bp);
2986}
2987
2988static struct pernet_operations ipv6_inetpeer_ops = {
2989 .init = ipv6_inetpeer_init,
2990 .exit = ipv6_inetpeer_exit,
2991};
2992
3010static struct pernet_operations ip6_route_net_late_ops = { 2993static struct pernet_operations ip6_route_net_late_ops = {
3011 .init = ip6_route_net_init_late, 2994 .init = ip6_route_net_init_late,
3012 .exit = ip6_route_net_exit_late, 2995 .exit = ip6_route_net_exit_late,
@@ -3032,10 +3015,14 @@ int __init ip6_route_init(void)
3032 if (ret) 3015 if (ret)
3033 goto out_kmem_cache; 3016 goto out_kmem_cache;
3034 3017
3035 ret = register_pernet_subsys(&ip6_route_net_ops); 3018 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3036 if (ret) 3019 if (ret)
3037 goto out_dst_entries; 3020 goto out_dst_entries;
3038 3021
3022 ret = register_pernet_subsys(&ip6_route_net_ops);
3023 if (ret)
3024 goto out_register_inetpeer;
3025
3039 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3026 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3040 3027
3041 /* Registering of the loopback is done before this portion of code, 3028 /* Registering of the loopback is done before this portion of code,
@@ -3088,6 +3075,8 @@ out_fib6_init:
3088 fib6_gc_cleanup(); 3075 fib6_gc_cleanup();
3089out_register_subsys: 3076out_register_subsys:
3090 unregister_pernet_subsys(&ip6_route_net_ops); 3077 unregister_pernet_subsys(&ip6_route_net_ops);
3078out_register_inetpeer:
3079 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3091out_dst_entries: 3080out_dst_entries:
3092 dst_entries_destroy(&ip6_dst_blackhole_ops); 3081 dst_entries_destroy(&ip6_dst_blackhole_ops);
3093out_kmem_cache: 3082out_kmem_cache:
@@ -3102,6 +3091,7 @@ void ip6_route_cleanup(void)
3102 fib6_rules_cleanup(); 3091 fib6_rules_cleanup();
3103 xfrm6_fini(); 3092 xfrm6_fini();
3104 fib6_gc_cleanup(); 3093 fib6_gc_cleanup();
3094 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3105 unregister_pernet_subsys(&ip6_route_net_ops); 3095 unregister_pernet_subsys(&ip6_route_net_ops);
3106 dst_entries_destroy(&ip6_dst_blackhole_ops); 3096 dst_entries_destroy(&ip6_dst_blackhole_ops);
3107 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3097 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);