aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c680
1 files changed, 409 insertions, 271 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e0d3ad02ffb5..79078747a646 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -72,6 +72,10 @@
72#define RT6_TRACE(x...) do { ; } while (0) 72#define RT6_TRACE(x...) do { ; } while (0)
73#endif 73#endif
74 74
75#define CLONE_OFFLINK_ROUTE 0
76
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
75 79
76static int ip6_rt_max_size = 4096; 80static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2; 81static int ip6_rt_gc_min_interval = HZ / 2;
@@ -94,6 +98,14 @@ static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb); 98static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96 100
101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
97static struct dst_ops ip6_dst_ops = { 109static struct dst_ops ip6_dst_ops = {
98 .family = AF_INET6, 110 .family = AF_INET6,
99 .protocol = __constant_htons(ETH_P_IPV6), 111 .protocol = __constant_htons(ETH_P_IPV6),
@@ -214,150 +226,211 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
214 return rt; 226 return rt;
215} 227}
216 228
229#ifdef CONFIG_IPV6_ROUTER_PREF
230static void rt6_probe(struct rt6_info *rt)
231{
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 /*
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
237 *
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
240 */
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
246 struct in6_addr mcaddr;
247 struct in6_addr *target;
248
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
251
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
257}
258#else
259static inline void rt6_probe(struct rt6_info *rt)
260{
261 return;
262}
263#endif
264
217/* 265/*
218 * pointer to the last default router chosen. BH is disabled locally. 266 * Default Router Selection (RFC 2461 6.3.6)
219 */ 267 */
220static struct rt6_info *rt6_dflt_pointer; 268static int inline rt6_check_dev(struct rt6_info *rt, int oif)
221static DEFINE_SPINLOCK(rt6_dflt_lock); 269{
270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
277}
222 278
223void rt6_reset_dflt_pointer(struct rt6_info *rt) 279static int inline rt6_check_neigh(struct rt6_info *rt)
224{ 280{
225 spin_lock_bh(&rt6_dflt_lock); 281 struct neighbour *neigh = rt->rt6i_nexthop;
226 if (rt == NULL || rt == rt6_dflt_pointer) { 282 int m = 0;
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer); 283 if (neigh) {
228 rt6_dflt_pointer = NULL; 284 read_lock_bh(&neigh->lock);
285 if (neigh->nud_state & NUD_VALID)
286 m = 1;
287 read_unlock_bh(&neigh->lock);
229 } 288 }
230 spin_unlock_bh(&rt6_dflt_lock); 289 return m;
231} 290}
232 291
233/* Default Router Selection (RFC 2461 6.3.6) */ 292static int rt6_score_route(struct rt6_info *rt, int oif,
234static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) 293 int strict)
235{ 294{
236 struct rt6_info *match = NULL; 295 int m = rt6_check_dev(rt, oif);
237 struct rt6_info *sprt; 296 if (!m && (strict & RT6_SELECT_F_IFACE))
238 int mpri = 0; 297 return -1;
239 298#ifdef CONFIG_IPV6_ROUTER_PREF
240 for (sprt = rt; sprt; sprt = sprt->u.next) { 299 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
241 struct neighbour *neigh; 300#endif
242 int m = 0; 301 if (rt6_check_neigh(rt))
243 302 m |= 16;
244 if (!oif || 303 else if (strict & RT6_SELECT_F_REACHABLE)
245 (sprt->rt6i_dev && 304 return -1;
246 sprt->rt6i_dev->ifindex == oif)) 305 return m;
247 m += 8; 306}
248 307
249 if (rt6_check_expired(sprt)) 308static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
250 continue; 309 int strict)
310{
311 struct rt6_info *match = NULL, *last = NULL;
312 struct rt6_info *rt, *rt0 = *head;
313 u32 metric;
314 int mpri = -1;
251 315
252 if (sprt == rt6_dflt_pointer) 316 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
253 m += 4; 317 __FUNCTION__, head, head ? *head : NULL, oif);
254 318
255 if ((neigh = sprt->rt6i_nexthop) != NULL) { 319 for (rt = rt0, metric = rt0->rt6i_metric;
256 read_lock_bh(&neigh->lock); 320 rt && rt->rt6i_metric == metric;
257 switch (neigh->nud_state) { 321 rt = rt->u.next) {
258 case NUD_REACHABLE: 322 int m;
259 m += 3;
260 break;
261 323
262 case NUD_STALE: 324 if (rt6_check_expired(rt))
263 case NUD_DELAY: 325 continue;
264 case NUD_PROBE:
265 m += 2;
266 break;
267 326
268 case NUD_NOARP: 327 last = rt;
269 case NUD_PERMANENT:
270 m += 1;
271 break;
272 328
273 case NUD_INCOMPLETE: 329 m = rt6_score_route(rt, oif, strict);
274 default: 330 if (m < 0)
275 read_unlock_bh(&neigh->lock);
276 continue;
277 }
278 read_unlock_bh(&neigh->lock);
279 } else {
280 continue; 331 continue;
281 }
282 332
283 if (m > mpri || m >= 12) { 333 if (m > mpri) {
284 match = sprt; 334 rt6_probe(match);
335 match = rt;
285 mpri = m; 336 mpri = m;
286 if (m >= 12) { 337 } else {
287 /* we choose the last default router if it 338 rt6_probe(rt);
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
291 */
292 break;
293 }
294 } 339 }
295 } 340 }
296 341
297 spin_lock(&rt6_dflt_lock); 342 if (!match &&
298 if (!match) { 343 (strict & RT6_SELECT_F_REACHABLE) &&
299 /* 344 last && last != rt0) {
300 * No default routers are known to be reachable. 345 /* no entries matched; do round-robin */
301 * SHOULD round robin 346 *head = rt0->u.next;
302 */ 347 rt0->u.next = last->u.next;
303 if (rt6_dflt_pointer) { 348 last->u.next = rt0;
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
309 match = sprt;
310 break;
311 }
312 }
313 for (sprt = rt;
314 !match && sprt;
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
319 match = sprt;
320 break;
321 }
322 if (sprt == rt6_dflt_pointer)
323 break;
324 }
325 }
326 } 349 }
327 350
328 if (match) { 351 RT6_TRACE("%s() => %p, score=%d\n",
329 if (rt6_dflt_pointer != match) 352 __FUNCTION__, match, mpri);
330 RT6_TRACE("changed default router: %p->%p\n", 353
331 rt6_dflt_pointer, match); 354 return (match ? match : &ip6_null_entry);
332 rt6_dflt_pointer = match; 355}
356
357#ifdef CONFIG_IPV6_ROUTE_INFO
358int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
359 struct in6_addr *gwaddr)
360{
361 struct route_info *rinfo = (struct route_info *) opt;
362 struct in6_addr prefix_buf, *prefix;
363 unsigned int pref;
364 u32 lifetime;
365 struct rt6_info *rt;
366
367 if (len < sizeof(struct route_info)) {
368 return -EINVAL;
333 } 369 }
334 spin_unlock(&rt6_dflt_lock);
335 370
336 if (!match) { 371 /* Sanity check for prefix_len and length */
337 /* 372 if (rinfo->length > 3) {
338 * Last Resort: if no default routers found, 373 return -EINVAL;
339 * use addrconf default route. 374 } else if (rinfo->prefix_len > 128) {
340 * We don't record this route. 375 return -EINVAL;
341 */ 376 } else if (rinfo->prefix_len > 64) {
342 for (sprt = ip6_routing_table.leaf; 377 if (rinfo->length < 2) {
343 sprt; sprt = sprt->u.next) { 378 return -EINVAL;
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
346 (!oif ||
347 (sprt->rt6i_dev &&
348 sprt->rt6i_dev->ifindex == oif))) {
349 match = sprt;
350 break;
351 }
352 } 379 }
353 if (!match) { 380 } else if (rinfo->prefix_len > 0) {
354 /* no default route. give up. */ 381 if (rinfo->length < 1) {
355 match = &ip6_null_entry; 382 return -EINVAL;
356 } 383 }
357 } 384 }
358 385
359 return match; 386 pref = rinfo->route_pref;
387 if (pref == ICMPV6_ROUTER_PREF_INVALID)
388 pref = ICMPV6_ROUTER_PREF_MEDIUM;
389
390 lifetime = htonl(rinfo->lifetime);
391 if (lifetime == 0xffffffff) {
392 /* infinity */
393 } else if (lifetime > 0x7fffffff/HZ) {
394 /* Avoid arithmetic overflow */
395 lifetime = 0x7fffffff/HZ - 1;
396 }
397
398 if (rinfo->length == 3)
399 prefix = (struct in6_addr *)rinfo->prefix;
400 else {
401 /* this function is safe */
402 ipv6_addr_prefix(&prefix_buf,
403 (struct in6_addr *)rinfo->prefix,
404 rinfo->prefix_len);
405 prefix = &prefix_buf;
406 }
407
408 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
409
410 if (rt && !lifetime) {
411 ip6_del_rt(rt, NULL, NULL, NULL);
412 rt = NULL;
413 }
414
415 if (!rt && lifetime)
416 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
417 pref);
418 else if (rt)
419 rt->rt6i_flags = RTF_ROUTEINFO |
420 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
421
422 if (rt) {
423 if (lifetime == 0xffffffff) {
424 rt->rt6i_flags &= ~RTF_EXPIRES;
425 } else {
426 rt->rt6i_expires = jiffies + HZ * lifetime;
427 rt->rt6i_flags |= RTF_EXPIRES;
428 }
429 dst_release(&rt->u.dst);
430 }
431 return 0;
360} 432}
433#endif
361 434
362struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 435struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363 int oif, int strict) 436 int oif, int strict)
@@ -397,14 +470,9 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
397 return err; 470 return err;
398} 471}
399 472
400/* No rt6_lock! If COW failed, the function returns dead route entry 473static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
401 with dst->error set to errno value. 474 struct in6_addr *saddr)
402 */
403
404static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
405 struct in6_addr *saddr, struct netlink_skb_parms *req)
406{ 475{
407 int err;
408 struct rt6_info *rt; 476 struct rt6_info *rt;
409 477
410 /* 478 /*
@@ -435,25 +503,30 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
435 503
436 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 504 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
437 505
438 dst_hold(&rt->u.dst); 506 }
439
440 err = ip6_ins_rt(rt, NULL, NULL, req);
441 if (err == 0)
442 return rt;
443 507
444 rt->u.dst.error = err; 508 return rt;
509}
445 510
446 return rt; 511static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
512{
513 struct rt6_info *rt = ip6_rt_copy(ort);
514 if (rt) {
515 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
516 rt->rt6i_dst.plen = 128;
517 rt->rt6i_flags |= RTF_CACHE;
518 if (rt->rt6i_flags & RTF_REJECT)
519 rt->u.dst.error = ort->u.dst.error;
520 rt->u.dst.flags |= DST_HOST;
521 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
447 } 522 }
448 dst_hold(&ip6_null_entry.u.dst); 523 return rt;
449 return &ip6_null_entry;
450} 524}
451 525
452#define BACKTRACK() \ 526#define BACKTRACK() \
453if (rt == &ip6_null_entry && strict) { \ 527if (rt == &ip6_null_entry) { \
454 while ((fn = fn->parent) != NULL) { \ 528 while ((fn = fn->parent) != NULL) { \
455 if (fn->fn_flags & RTN_ROOT) { \ 529 if (fn->fn_flags & RTN_ROOT) { \
456 dst_hold(&rt->u.dst); \
457 goto out; \ 530 goto out; \
458 } \ 531 } \
459 if (fn->fn_flags & RTN_RTINFO) \ 532 if (fn->fn_flags & RTN_RTINFO) \
@@ -465,115 +538,138 @@ if (rt == &ip6_null_entry && strict) { \
465void ip6_route_input(struct sk_buff *skb) 538void ip6_route_input(struct sk_buff *skb)
466{ 539{
467 struct fib6_node *fn; 540 struct fib6_node *fn;
468 struct rt6_info *rt; 541 struct rt6_info *rt, *nrt;
469 int strict; 542 int strict;
470 int attempts = 3; 543 int attempts = 3;
544 int err;
545 int reachable = RT6_SELECT_F_REACHABLE;
471 546
472 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); 547 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
473 548
474relookup: 549relookup:
475 read_lock_bh(&rt6_lock); 550 read_lock_bh(&rt6_lock);
476 551
552restart_2:
477 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, 553 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478 &skb->nh.ipv6h->saddr); 554 &skb->nh.ipv6h->saddr);
479 555
480restart: 556restart:
481 rt = fn->leaf; 557 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
482
483 if ((rt->rt6i_flags & RTF_CACHE)) {
484 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
485 BACKTRACK();
486 dst_hold(&rt->u.dst);
487 goto out;
488 }
489
490 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
491 BACKTRACK(); 558 BACKTRACK();
559 if (rt == &ip6_null_entry ||
560 rt->rt6i_flags & RTF_CACHE)
561 goto out;
492 562
493 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 563 dst_hold(&rt->u.dst);
494 struct rt6_info *nrt; 564 read_unlock_bh(&rt6_lock);
495 dst_hold(&rt->u.dst);
496 read_unlock_bh(&rt6_lock);
497 565
498 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr, 566 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
499 &skb->nh.ipv6h->saddr, 567 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
500 &NETLINK_CB(skb)); 568 else {
569#if CLONE_OFFLINK_ROUTE
570 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
571#else
572 goto out2;
573#endif
574 }
501 575
502 dst_release(&rt->u.dst); 576 dst_release(&rt->u.dst);
503 rt = nrt; 577 rt = nrt ? : &ip6_null_entry;
504 578
505 if (rt->u.dst.error != -EEXIST || --attempts <= 0) 579 dst_hold(&rt->u.dst);
580 if (nrt) {
581 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
582 if (!err)
506 goto out2; 583 goto out2;
507
508 /* Race condition! In the gap, when rt6_lock was
509 released someone could insert this route. Relookup.
510 */
511 dst_release(&rt->u.dst);
512 goto relookup;
513 } 584 }
514 dst_hold(&rt->u.dst); 585
586 if (--attempts <= 0)
587 goto out2;
588
589 /*
590 * Race condition! In the gap, when rt6_lock was
591 * released someone could insert this route. Relookup.
592 */
593 dst_release(&rt->u.dst);
594 goto relookup;
515 595
516out: 596out:
597 if (reachable) {
598 reachable = 0;
599 goto restart_2;
600 }
601 dst_hold(&rt->u.dst);
517 read_unlock_bh(&rt6_lock); 602 read_unlock_bh(&rt6_lock);
518out2: 603out2:
519 rt->u.dst.lastuse = jiffies; 604 rt->u.dst.lastuse = jiffies;
520 rt->u.dst.__use++; 605 rt->u.dst.__use++;
521 skb->dst = (struct dst_entry *) rt; 606 skb->dst = (struct dst_entry *) rt;
607 return;
522} 608}
523 609
524struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 610struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
525{ 611{
526 struct fib6_node *fn; 612 struct fib6_node *fn;
527 struct rt6_info *rt; 613 struct rt6_info *rt, *nrt;
528 int strict; 614 int strict;
529 int attempts = 3; 615 int attempts = 3;
616 int err;
617 int reachable = RT6_SELECT_F_REACHABLE;
530 618
531 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); 619 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
532 620
533relookup: 621relookup:
534 read_lock_bh(&rt6_lock); 622 read_lock_bh(&rt6_lock);
535 623
624restart_2:
536 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); 625 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
537 626
538restart: 627restart:
539 rt = fn->leaf; 628 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
540 629 BACKTRACK();
541 if ((rt->rt6i_flags & RTF_CACHE)) { 630 if (rt == &ip6_null_entry ||
542 rt = rt6_device_match(rt, fl->oif, strict); 631 rt->rt6i_flags & RTF_CACHE)
543 BACKTRACK();
544 dst_hold(&rt->u.dst);
545 goto out; 632 goto out;
546 }
547 if (rt->rt6i_flags & RTF_DEFAULT) {
548 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
549 rt = rt6_best_dflt(rt, fl->oif);
550 } else {
551 rt = rt6_device_match(rt, fl->oif, strict);
552 BACKTRACK();
553 }
554 633
555 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 634 dst_hold(&rt->u.dst);
556 struct rt6_info *nrt; 635 read_unlock_bh(&rt6_lock);
557 dst_hold(&rt->u.dst);
558 read_unlock_bh(&rt6_lock);
559 636
560 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL); 637 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
638 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
639 else {
640#if CLONE_OFFLINK_ROUTE
641 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
642#else
643 goto out2;
644#endif
645 }
561 646
562 dst_release(&rt->u.dst); 647 dst_release(&rt->u.dst);
563 rt = nrt; 648 rt = nrt ? : &ip6_null_entry;
564 649
565 if (rt->u.dst.error != -EEXIST || --attempts <= 0) 650 dst_hold(&rt->u.dst);
651 if (nrt) {
652 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
653 if (!err)
566 goto out2; 654 goto out2;
567
568 /* Race condition! In the gap, when rt6_lock was
569 released someone could insert this route. Relookup.
570 */
571 dst_release(&rt->u.dst);
572 goto relookup;
573 } 655 }
574 dst_hold(&rt->u.dst); 656
657 if (--attempts <= 0)
658 goto out2;
659
660 /*
661 * Race condition! In the gap, when rt6_lock was
662 * released someone could insert this route. Relookup.
663 */
664 dst_release(&rt->u.dst);
665 goto relookup;
575 666
576out: 667out:
668 if (reachable) {
669 reachable = 0;
670 goto restart_2;
671 }
672 dst_hold(&rt->u.dst);
577 read_unlock_bh(&rt6_lock); 673 read_unlock_bh(&rt6_lock);
578out2: 674out2:
579 rt->u.dst.lastuse = jiffies; 675 rt->u.dst.lastuse = jiffies;
@@ -999,8 +1095,6 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct
999 1095
1000 write_lock_bh(&rt6_lock); 1096 write_lock_bh(&rt6_lock);
1001 1097
1002 rt6_reset_dflt_pointer(NULL);
1003
1004 err = fib6_del(rt, nlh, _rtattr, req); 1098 err = fib6_del(rt, nlh, _rtattr, req);
1005 dst_release(&rt->u.dst); 1099 dst_release(&rt->u.dst);
1006 1100
@@ -1050,59 +1144,63 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
1050void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, 1144void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1051 struct neighbour *neigh, u8 *lladdr, int on_link) 1145 struct neighbour *neigh, u8 *lladdr, int on_link)
1052{ 1146{
1053 struct rt6_info *rt, *nrt; 1147 struct rt6_info *rt, *nrt = NULL;
1054 1148 int strict;
1055 /* Locate old route to this destination. */ 1149 struct fib6_node *fn;
1056 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1057
1058 if (rt == NULL)
1059 return;
1060
1061 if (neigh->dev != rt->rt6i_dev)
1062 goto out;
1063 1150
1064 /* 1151 /*
1065 * Current route is on-link; redirect is always invalid. 1152 * Get the "current" route for this destination and
1066 * 1153 * check if the redirect has come from approriate router.
1067 * Seems, previous statement is not true. It could 1154 *
1068 * be node, which looks for us as on-link (f.e. proxy ndisc) 1155 * RFC 2461 specifies that redirects should only be
1069 * But then router serving it might decide, that we should 1156 * accepted if they come from the nexthop to the target.
1070 * know truth 8)8) --ANK (980726). 1157 * Due to the way the routes are chosen, this notion
1158 * is a bit fuzzy and one might need to check all possible
1159 * routes.
1071 */ 1160 */
1072 if (!(rt->rt6i_flags&RTF_GATEWAY)) 1161 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
1073 goto out;
1074 1162
1075 /* 1163 read_lock_bh(&rt6_lock);
1076 * RFC 2461 specifies that redirects should only be 1164 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1077 * accepted if they come from the nexthop to the target. 1165restart:
1078 * Due to the way default routers are chosen, this notion 1166 for (rt = fn->leaf; rt; rt = rt->u.next) {
1079 * is a bit fuzzy and one might need to check all default 1167 /*
1080 * routers. 1168 * Current route is on-link; redirect is always invalid.
1081 */ 1169 *
1082 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) { 1170 * Seems, previous statement is not true. It could
1083 if (rt->rt6i_flags & RTF_DEFAULT) { 1171 * be node, which looks for us as on-link (f.e. proxy ndisc)
1084 struct rt6_info *rt1; 1172 * But then router serving it might decide, that we should
1085 1173 * know truth 8)8) --ANK (980726).
1086 read_lock(&rt6_lock); 1174 */
1087 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) { 1175 if (rt6_check_expired(rt))
1088 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) { 1176 continue;
1089 dst_hold(&rt1->u.dst); 1177 if (!(rt->rt6i_flags & RTF_GATEWAY))
1090 dst_release(&rt->u.dst); 1178 continue;
1091 read_unlock(&rt6_lock); 1179 if (neigh->dev != rt->rt6i_dev)
1092 rt = rt1; 1180 continue;
1093 goto source_ok; 1181 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1094 } 1182 continue;
1095 } 1183 break;
1096 read_unlock(&rt6_lock); 1184 }
1185 if (rt)
1186 dst_hold(&rt->u.dst);
1187 else if (strict) {
1188 while ((fn = fn->parent) != NULL) {
1189 if (fn->fn_flags & RTN_ROOT)
1190 break;
1191 if (fn->fn_flags & RTN_RTINFO)
1192 goto restart;
1097 } 1193 }
1194 }
1195 read_unlock_bh(&rt6_lock);
1196
1197 if (!rt) {
1098 if (net_ratelimit()) 1198 if (net_ratelimit())
1099 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1199 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1100 "for redirect target\n"); 1200 "for redirect target\n");
1101 goto out; 1201 return;
1102 } 1202 }
1103 1203
1104source_ok:
1105
1106 /* 1204 /*
1107 * We have finally decided to accept it. 1205 * We have finally decided to accept it.
1108 */ 1206 */
@@ -1210,38 +1308,27 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1210 1. It is connected route. Action: COW 1308 1. It is connected route. Action: COW
1211 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1309 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1212 */ 1310 */
1213 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 1311 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1214 nrt = rt6_cow(rt, daddr, saddr, NULL); 1312 nrt = rt6_alloc_cow(rt, daddr, saddr);
1215 if (!nrt->u.dst.error) { 1313 else
1216 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1314 nrt = rt6_alloc_clone(rt, daddr);
1217 if (allfrag) 1315
1218 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1316 if (nrt) {
1219 /* According to RFC 1981, detecting PMTU increase shouldn't be
1220 happened within 5 mins, the recommended timer is 10 mins.
1221 Here this route expiration time is set to ip6_rt_mtu_expires
1222 which is 10 mins. After 10 mins the decreased pmtu is expired
1223 and detecting PMTU increase will be automatically happened.
1224 */
1225 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1226 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1227 }
1228 dst_release(&nrt->u.dst);
1229 } else {
1230 nrt = ip6_rt_copy(rt);
1231 if (nrt == NULL)
1232 goto out;
1233 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1234 nrt->rt6i_dst.plen = 128;
1235 nrt->u.dst.flags |= DST_HOST;
1236 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1237 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1238 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1239 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1317 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1240 if (allfrag) 1318 if (allfrag)
1241 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1319 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1320
1321 /* According to RFC 1981, detecting PMTU increase shouldn't be
1322 * happened within 5 mins, the recommended timer is 10 mins.
1323 * Here this route expiration time is set to ip6_rt_mtu_expires
1324 * which is 10 mins. After 10 mins the decreased pmtu is expired
1325 * and detecting PMTU increase will be automatically happened.
1326 */
1327 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1328 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1329
1242 ip6_ins_rt(nrt, NULL, NULL, NULL); 1330 ip6_ins_rt(nrt, NULL, NULL, NULL);
1243 } 1331 }
1244
1245out: 1332out:
1246 dst_release(&rt->u.dst); 1333 dst_release(&rt->u.dst);
1247} 1334}
@@ -1280,6 +1367,57 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1280 return rt; 1367 return rt;
1281} 1368}
1282 1369
1370#ifdef CONFIG_IPV6_ROUTE_INFO
1371static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1372 struct in6_addr *gwaddr, int ifindex)
1373{
1374 struct fib6_node *fn;
1375 struct rt6_info *rt = NULL;
1376
1377 write_lock_bh(&rt6_lock);
1378 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1379 if (!fn)
1380 goto out;
1381
1382 for (rt = fn->leaf; rt; rt = rt->u.next) {
1383 if (rt->rt6i_dev->ifindex != ifindex)
1384 continue;
1385 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1386 continue;
1387 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1388 continue;
1389 dst_hold(&rt->u.dst);
1390 break;
1391 }
1392out:
1393 write_unlock_bh(&rt6_lock);
1394 return rt;
1395}
1396
1397static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1398 struct in6_addr *gwaddr, int ifindex,
1399 unsigned pref)
1400{
1401 struct in6_rtmsg rtmsg;
1402
1403 memset(&rtmsg, 0, sizeof(rtmsg));
1404 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1405 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1406 rtmsg.rtmsg_dst_len = prefixlen;
1407 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1408 rtmsg.rtmsg_metric = 1024;
1409 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1410 /* We should treat it as a default route if prefix length is 0. */
1411 if (!prefixlen)
1412 rtmsg.rtmsg_flags |= RTF_DEFAULT;
1413 rtmsg.rtmsg_ifindex = ifindex;
1414
1415 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1416
1417 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1418}
1419#endif
1420
1283struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1421struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1284{ 1422{
1285 struct rt6_info *rt; 1423 struct rt6_info *rt;
@@ -1290,6 +1428,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1290 write_lock_bh(&rt6_lock); 1428 write_lock_bh(&rt6_lock);
1291 for (rt = fn->leaf; rt; rt=rt->u.next) { 1429 for (rt = fn->leaf; rt; rt=rt->u.next) {
1292 if (dev == rt->rt6i_dev && 1430 if (dev == rt->rt6i_dev &&
1431 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1293 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1432 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1294 break; 1433 break;
1295 } 1434 }
@@ -1300,7 +1439,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1300} 1439}
1301 1440
1302struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1441struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1303 struct net_device *dev) 1442 struct net_device *dev,
1443 unsigned int pref)
1304{ 1444{
1305 struct in6_rtmsg rtmsg; 1445 struct in6_rtmsg rtmsg;
1306 1446
@@ -1308,7 +1448,8 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1308 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1448 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1309 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); 1449 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1310 rtmsg.rtmsg_metric = 1024; 1450 rtmsg.rtmsg_metric = 1024;
1311 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES; 1451 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1452 RTF_PREF(pref);
1312 1453
1313 rtmsg.rtmsg_ifindex = dev->ifindex; 1454 rtmsg.rtmsg_ifindex = dev->ifindex;
1314 1455
@@ -1326,8 +1467,6 @@ restart:
1326 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1467 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1327 dst_hold(&rt->u.dst); 1468 dst_hold(&rt->u.dst);
1328 1469
1329 rt6_reset_dflt_pointer(NULL);
1330
1331 read_unlock_bh(&rt6_lock); 1470 read_unlock_bh(&rt6_lock);
1332 1471
1333 ip6_del_rt(rt, NULL, NULL, NULL); 1472 ip6_del_rt(rt, NULL, NULL, NULL);
@@ -1738,11 +1877,10 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1738 /* 1877 /*
1739 * 2. allocate and initialize walker. 1878 * 2. allocate and initialize walker.
1740 */ 1879 */
1741 w = kmalloc(sizeof(*w), GFP_ATOMIC); 1880 w = kzalloc(sizeof(*w), GFP_ATOMIC);
1742 if (w == NULL) 1881 if (w == NULL)
1743 return -ENOMEM; 1882 return -ENOMEM;
1744 RT6_TRACE("dump<%p", w); 1883 RT6_TRACE("dump<%p", w);
1745 memset(w, 0, sizeof(*w));
1746 w->root = &ip6_routing_table; 1884 w->root = &ip6_routing_table;
1747 w->func = fib6_dump_node; 1885 w->func = fib6_dump_node;
1748 w->args = &arg; 1886 w->args = &arg;