diff options
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 680 |
1 files changed, 409 insertions, 271 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e0d3ad02ffb5..79078747a646 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -72,6 +72,10 @@ | |||
72 | #define RT6_TRACE(x...) do { ; } while (0) | 72 | #define RT6_TRACE(x...) do { ; } while (0) |
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | #define CLONE_OFFLINK_ROUTE 0 | ||
76 | |||
77 | #define RT6_SELECT_F_IFACE 0x1 | ||
78 | #define RT6_SELECT_F_REACHABLE 0x2 | ||
75 | 79 | ||
76 | static int ip6_rt_max_size = 4096; | 80 | static int ip6_rt_max_size = 4096; |
77 | static int ip6_rt_gc_min_interval = HZ / 2; | 81 | static int ip6_rt_gc_min_interval = HZ / 2; |
@@ -94,6 +98,14 @@ static int ip6_pkt_discard_out(struct sk_buff *skb); | |||
94 | static void ip6_link_failure(struct sk_buff *skb); | 98 | static void ip6_link_failure(struct sk_buff *skb); |
95 | static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 99 | static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
96 | 100 | ||
101 | #ifdef CONFIG_IPV6_ROUTE_INFO | ||
102 | static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, | ||
103 | struct in6_addr *gwaddr, int ifindex, | ||
104 | unsigned pref); | ||
105 | static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, | ||
106 | struct in6_addr *gwaddr, int ifindex); | ||
107 | #endif | ||
108 | |||
97 | static struct dst_ops ip6_dst_ops = { | 109 | static struct dst_ops ip6_dst_ops = { |
98 | .family = AF_INET6, | 110 | .family = AF_INET6, |
99 | .protocol = __constant_htons(ETH_P_IPV6), | 111 | .protocol = __constant_htons(ETH_P_IPV6), |
@@ -214,150 +226,211 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, | |||
214 | return rt; | 226 | return rt; |
215 | } | 227 | } |
216 | 228 | ||
229 | #ifdef CONFIG_IPV6_ROUTER_PREF | ||
230 | static void rt6_probe(struct rt6_info *rt) | ||
231 | { | ||
232 | struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; | ||
233 | /* | ||
234 | * Okay, this does not seem to be appropriate | ||
235 | * for now, however, we need to check if it | ||
236 | * is really so; aka Router Reachability Probing. | ||
237 | * | ||
238 | * Router Reachability Probe MUST be rate-limited | ||
239 | * to no more than one per minute. | ||
240 | */ | ||
241 | if (!neigh || (neigh->nud_state & NUD_VALID)) | ||
242 | return; | ||
243 | read_lock_bh(&neigh->lock); | ||
244 | if (!(neigh->nud_state & NUD_VALID) && | ||
245 | time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { | ||
246 | struct in6_addr mcaddr; | ||
247 | struct in6_addr *target; | ||
248 | |||
249 | neigh->updated = jiffies; | ||
250 | read_unlock_bh(&neigh->lock); | ||
251 | |||
252 | target = (struct in6_addr *)&neigh->primary_key; | ||
253 | addrconf_addr_solict_mult(target, &mcaddr); | ||
254 | ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); | ||
255 | } else | ||
256 | read_unlock_bh(&neigh->lock); | ||
257 | } | ||
258 | #else | ||
259 | static inline void rt6_probe(struct rt6_info *rt) | ||
260 | { | ||
261 | return; | ||
262 | } | ||
263 | #endif | ||
264 | |||
217 | /* | 265 | /* |
218 | * pointer to the last default router chosen. BH is disabled locally. | 266 | * Default Router Selection (RFC 2461 6.3.6) |
219 | */ | 267 | */ |
220 | static struct rt6_info *rt6_dflt_pointer; | 268 | static int inline rt6_check_dev(struct rt6_info *rt, int oif) |
221 | static DEFINE_SPINLOCK(rt6_dflt_lock); | 269 | { |
270 | struct net_device *dev = rt->rt6i_dev; | ||
271 | if (!oif || dev->ifindex == oif) | ||
272 | return 2; | ||
273 | if ((dev->flags & IFF_LOOPBACK) && | ||
274 | rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) | ||
275 | return 1; | ||
276 | return 0; | ||
277 | } | ||
222 | 278 | ||
223 | void rt6_reset_dflt_pointer(struct rt6_info *rt) | 279 | static int inline rt6_check_neigh(struct rt6_info *rt) |
224 | { | 280 | { |
225 | spin_lock_bh(&rt6_dflt_lock); | 281 | struct neighbour *neigh = rt->rt6i_nexthop; |
226 | if (rt == NULL || rt == rt6_dflt_pointer) { | 282 | int m = 0; |
227 | RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer); | 283 | if (neigh) { |
228 | rt6_dflt_pointer = NULL; | 284 | read_lock_bh(&neigh->lock); |
285 | if (neigh->nud_state & NUD_VALID) | ||
286 | m = 1; | ||
287 | read_unlock_bh(&neigh->lock); | ||
229 | } | 288 | } |
230 | spin_unlock_bh(&rt6_dflt_lock); | 289 | return m; |
231 | } | 290 | } |
232 | 291 | ||
233 | /* Default Router Selection (RFC 2461 6.3.6) */ | 292 | static int rt6_score_route(struct rt6_info *rt, int oif, |
234 | static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) | 293 | int strict) |
235 | { | 294 | { |
236 | struct rt6_info *match = NULL; | 295 | int m = rt6_check_dev(rt, oif); |
237 | struct rt6_info *sprt; | 296 | if (!m && (strict & RT6_SELECT_F_IFACE)) |
238 | int mpri = 0; | 297 | return -1; |
239 | 298 | #ifdef CONFIG_IPV6_ROUTER_PREF | |
240 | for (sprt = rt; sprt; sprt = sprt->u.next) { | 299 | m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; |
241 | struct neighbour *neigh; | 300 | #endif |
242 | int m = 0; | 301 | if (rt6_check_neigh(rt)) |
243 | 302 | m |= 16; | |
244 | if (!oif || | 303 | else if (strict & RT6_SELECT_F_REACHABLE) |
245 | (sprt->rt6i_dev && | 304 | return -1; |
246 | sprt->rt6i_dev->ifindex == oif)) | 305 | return m; |
247 | m += 8; | 306 | } |
248 | 307 | ||
249 | if (rt6_check_expired(sprt)) | 308 | static struct rt6_info *rt6_select(struct rt6_info **head, int oif, |
250 | continue; | 309 | int strict) |
310 | { | ||
311 | struct rt6_info *match = NULL, *last = NULL; | ||
312 | struct rt6_info *rt, *rt0 = *head; | ||
313 | u32 metric; | ||
314 | int mpri = -1; | ||
251 | 315 | ||
252 | if (sprt == rt6_dflt_pointer) | 316 | RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n", |
253 | m += 4; | 317 | __FUNCTION__, head, head ? *head : NULL, oif); |
254 | 318 | ||
255 | if ((neigh = sprt->rt6i_nexthop) != NULL) { | 319 | for (rt = rt0, metric = rt0->rt6i_metric; |
256 | read_lock_bh(&neigh->lock); | 320 | rt && rt->rt6i_metric == metric; |
257 | switch (neigh->nud_state) { | 321 | rt = rt->u.next) { |
258 | case NUD_REACHABLE: | 322 | int m; |
259 | m += 3; | ||
260 | break; | ||
261 | 323 | ||
262 | case NUD_STALE: | 324 | if (rt6_check_expired(rt)) |
263 | case NUD_DELAY: | 325 | continue; |
264 | case NUD_PROBE: | ||
265 | m += 2; | ||
266 | break; | ||
267 | 326 | ||
268 | case NUD_NOARP: | 327 | last = rt; |
269 | case NUD_PERMANENT: | ||
270 | m += 1; | ||
271 | break; | ||
272 | 328 | ||
273 | case NUD_INCOMPLETE: | 329 | m = rt6_score_route(rt, oif, strict); |
274 | default: | 330 | if (m < 0) |
275 | read_unlock_bh(&neigh->lock); | ||
276 | continue; | ||
277 | } | ||
278 | read_unlock_bh(&neigh->lock); | ||
279 | } else { | ||
280 | continue; | 331 | continue; |
281 | } | ||
282 | 332 | ||
283 | if (m > mpri || m >= 12) { | 333 | if (m > mpri) { |
284 | match = sprt; | 334 | rt6_probe(match); |
335 | match = rt; | ||
285 | mpri = m; | 336 | mpri = m; |
286 | if (m >= 12) { | 337 | } else { |
287 | /* we choose the last default router if it | 338 | rt6_probe(rt); |
288 | * is in (probably) reachable state. | ||
289 | * If route changed, we should do pmtu | ||
290 | * discovery. --yoshfuji | ||
291 | */ | ||
292 | break; | ||
293 | } | ||
294 | } | 339 | } |
295 | } | 340 | } |
296 | 341 | ||
297 | spin_lock(&rt6_dflt_lock); | 342 | if (!match && |
298 | if (!match) { | 343 | (strict & RT6_SELECT_F_REACHABLE) && |
299 | /* | 344 | last && last != rt0) { |
300 | * No default routers are known to be reachable. | 345 | /* no entries matched; do round-robin */ |
301 | * SHOULD round robin | 346 | *head = rt0->u.next; |
302 | */ | 347 | rt0->u.next = last->u.next; |
303 | if (rt6_dflt_pointer) { | 348 | last->u.next = rt0; |
304 | for (sprt = rt6_dflt_pointer->u.next; | ||
305 | sprt; sprt = sprt->u.next) { | ||
306 | if (sprt->u.dst.obsolete <= 0 && | ||
307 | sprt->u.dst.error == 0 && | ||
308 | !rt6_check_expired(sprt)) { | ||
309 | match = sprt; | ||
310 | break; | ||
311 | } | ||
312 | } | ||
313 | for (sprt = rt; | ||
314 | !match && sprt; | ||
315 | sprt = sprt->u.next) { | ||
316 | if (sprt->u.dst.obsolete <= 0 && | ||
317 | sprt->u.dst.error == 0 && | ||
318 | !rt6_check_expired(sprt)) { | ||
319 | match = sprt; | ||
320 | break; | ||
321 | } | ||
322 | if (sprt == rt6_dflt_pointer) | ||
323 | break; | ||
324 | } | ||
325 | } | ||
326 | } | 349 | } |
327 | 350 | ||
328 | if (match) { | 351 | RT6_TRACE("%s() => %p, score=%d\n", |
329 | if (rt6_dflt_pointer != match) | 352 | __FUNCTION__, match, mpri); |
330 | RT6_TRACE("changed default router: %p->%p\n", | 353 | |
331 | rt6_dflt_pointer, match); | 354 | return (match ? match : &ip6_null_entry); |
332 | rt6_dflt_pointer = match; | 355 | } |
356 | |||
357 | #ifdef CONFIG_IPV6_ROUTE_INFO | ||
358 | int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, | ||
359 | struct in6_addr *gwaddr) | ||
360 | { | ||
361 | struct route_info *rinfo = (struct route_info *) opt; | ||
362 | struct in6_addr prefix_buf, *prefix; | ||
363 | unsigned int pref; | ||
364 | u32 lifetime; | ||
365 | struct rt6_info *rt; | ||
366 | |||
367 | if (len < sizeof(struct route_info)) { | ||
368 | return -EINVAL; | ||
333 | } | 369 | } |
334 | spin_unlock(&rt6_dflt_lock); | ||
335 | 370 | ||
336 | if (!match) { | 371 | /* Sanity check for prefix_len and length */ |
337 | /* | 372 | if (rinfo->length > 3) { |
338 | * Last Resort: if no default routers found, | 373 | return -EINVAL; |
339 | * use addrconf default route. | 374 | } else if (rinfo->prefix_len > 128) { |
340 | * We don't record this route. | 375 | return -EINVAL; |
341 | */ | 376 | } else if (rinfo->prefix_len > 64) { |
342 | for (sprt = ip6_routing_table.leaf; | 377 | if (rinfo->length < 2) { |
343 | sprt; sprt = sprt->u.next) { | 378 | return -EINVAL; |
344 | if (!rt6_check_expired(sprt) && | ||
345 | (sprt->rt6i_flags & RTF_DEFAULT) && | ||
346 | (!oif || | ||
347 | (sprt->rt6i_dev && | ||
348 | sprt->rt6i_dev->ifindex == oif))) { | ||
349 | match = sprt; | ||
350 | break; | ||
351 | } | ||
352 | } | 379 | } |
353 | if (!match) { | 380 | } else if (rinfo->prefix_len > 0) { |
354 | /* no default route. give up. */ | 381 | if (rinfo->length < 1) { |
355 | match = &ip6_null_entry; | 382 | return -EINVAL; |
356 | } | 383 | } |
357 | } | 384 | } |
358 | 385 | ||
359 | return match; | 386 | pref = rinfo->route_pref; |
387 | if (pref == ICMPV6_ROUTER_PREF_INVALID) | ||
388 | pref = ICMPV6_ROUTER_PREF_MEDIUM; | ||
389 | |||
390 | lifetime = htonl(rinfo->lifetime); | ||
391 | if (lifetime == 0xffffffff) { | ||
392 | /* infinity */ | ||
393 | } else if (lifetime > 0x7fffffff/HZ) { | ||
394 | /* Avoid arithmetic overflow */ | ||
395 | lifetime = 0x7fffffff/HZ - 1; | ||
396 | } | ||
397 | |||
398 | if (rinfo->length == 3) | ||
399 | prefix = (struct in6_addr *)rinfo->prefix; | ||
400 | else { | ||
401 | /* this function is safe */ | ||
402 | ipv6_addr_prefix(&prefix_buf, | ||
403 | (struct in6_addr *)rinfo->prefix, | ||
404 | rinfo->prefix_len); | ||
405 | prefix = &prefix_buf; | ||
406 | } | ||
407 | |||
408 | rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); | ||
409 | |||
410 | if (rt && !lifetime) { | ||
411 | ip6_del_rt(rt, NULL, NULL, NULL); | ||
412 | rt = NULL; | ||
413 | } | ||
414 | |||
415 | if (!rt && lifetime) | ||
416 | rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, | ||
417 | pref); | ||
418 | else if (rt) | ||
419 | rt->rt6i_flags = RTF_ROUTEINFO | | ||
420 | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); | ||
421 | |||
422 | if (rt) { | ||
423 | if (lifetime == 0xffffffff) { | ||
424 | rt->rt6i_flags &= ~RTF_EXPIRES; | ||
425 | } else { | ||
426 | rt->rt6i_expires = jiffies + HZ * lifetime; | ||
427 | rt->rt6i_flags |= RTF_EXPIRES; | ||
428 | } | ||
429 | dst_release(&rt->u.dst); | ||
430 | } | ||
431 | return 0; | ||
360 | } | 432 | } |
433 | #endif | ||
361 | 434 | ||
362 | struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, | 435 | struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, |
363 | int oif, int strict) | 436 | int oif, int strict) |
@@ -397,14 +470,9 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, | |||
397 | return err; | 470 | return err; |
398 | } | 471 | } |
399 | 472 | ||
400 | /* No rt6_lock! If COW failed, the function returns dead route entry | 473 | static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, |
401 | with dst->error set to errno value. | 474 | struct in6_addr *saddr) |
402 | */ | ||
403 | |||
404 | static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, | ||
405 | struct in6_addr *saddr, struct netlink_skb_parms *req) | ||
406 | { | 475 | { |
407 | int err; | ||
408 | struct rt6_info *rt; | 476 | struct rt6_info *rt; |
409 | 477 | ||
410 | /* | 478 | /* |
@@ -435,25 +503,30 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, | |||
435 | 503 | ||
436 | rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | 504 | rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); |
437 | 505 | ||
438 | dst_hold(&rt->u.dst); | 506 | } |
439 | |||
440 | err = ip6_ins_rt(rt, NULL, NULL, req); | ||
441 | if (err == 0) | ||
442 | return rt; | ||
443 | 507 | ||
444 | rt->u.dst.error = err; | 508 | return rt; |
509 | } | ||
445 | 510 | ||
446 | return rt; | 511 | static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) |
512 | { | ||
513 | struct rt6_info *rt = ip6_rt_copy(ort); | ||
514 | if (rt) { | ||
515 | ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); | ||
516 | rt->rt6i_dst.plen = 128; | ||
517 | rt->rt6i_flags |= RTF_CACHE; | ||
518 | if (rt->rt6i_flags & RTF_REJECT) | ||
519 | rt->u.dst.error = ort->u.dst.error; | ||
520 | rt->u.dst.flags |= DST_HOST; | ||
521 | rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); | ||
447 | } | 522 | } |
448 | dst_hold(&ip6_null_entry.u.dst); | 523 | return rt; |
449 | return &ip6_null_entry; | ||
450 | } | 524 | } |
451 | 525 | ||
452 | #define BACKTRACK() \ | 526 | #define BACKTRACK() \ |
453 | if (rt == &ip6_null_entry && strict) { \ | 527 | if (rt == &ip6_null_entry) { \ |
454 | while ((fn = fn->parent) != NULL) { \ | 528 | while ((fn = fn->parent) != NULL) { \ |
455 | if (fn->fn_flags & RTN_ROOT) { \ | 529 | if (fn->fn_flags & RTN_ROOT) { \ |
456 | dst_hold(&rt->u.dst); \ | ||
457 | goto out; \ | 530 | goto out; \ |
458 | } \ | 531 | } \ |
459 | if (fn->fn_flags & RTN_RTINFO) \ | 532 | if (fn->fn_flags & RTN_RTINFO) \ |
@@ -465,115 +538,138 @@ if (rt == &ip6_null_entry && strict) { \ | |||
465 | void ip6_route_input(struct sk_buff *skb) | 538 | void ip6_route_input(struct sk_buff *skb) |
466 | { | 539 | { |
467 | struct fib6_node *fn; | 540 | struct fib6_node *fn; |
468 | struct rt6_info *rt; | 541 | struct rt6_info *rt, *nrt; |
469 | int strict; | 542 | int strict; |
470 | int attempts = 3; | 543 | int attempts = 3; |
544 | int err; | ||
545 | int reachable = RT6_SELECT_F_REACHABLE; | ||
471 | 546 | ||
472 | strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); | 547 | strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; |
473 | 548 | ||
474 | relookup: | 549 | relookup: |
475 | read_lock_bh(&rt6_lock); | 550 | read_lock_bh(&rt6_lock); |
476 | 551 | ||
552 | restart_2: | ||
477 | fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, | 553 | fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, |
478 | &skb->nh.ipv6h->saddr); | 554 | &skb->nh.ipv6h->saddr); |
479 | 555 | ||
480 | restart: | 556 | restart: |
481 | rt = fn->leaf; | 557 | rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); |
482 | |||
483 | if ((rt->rt6i_flags & RTF_CACHE)) { | ||
484 | rt = rt6_device_match(rt, skb->dev->ifindex, strict); | ||
485 | BACKTRACK(); | ||
486 | dst_hold(&rt->u.dst); | ||
487 | goto out; | ||
488 | } | ||
489 | |||
490 | rt = rt6_device_match(rt, skb->dev->ifindex, strict); | ||
491 | BACKTRACK(); | 558 | BACKTRACK(); |
559 | if (rt == &ip6_null_entry || | ||
560 | rt->rt6i_flags & RTF_CACHE) | ||
561 | goto out; | ||
492 | 562 | ||
493 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { | 563 | dst_hold(&rt->u.dst); |
494 | struct rt6_info *nrt; | 564 | read_unlock_bh(&rt6_lock); |
495 | dst_hold(&rt->u.dst); | ||
496 | read_unlock_bh(&rt6_lock); | ||
497 | 565 | ||
498 | nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr, | 566 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) |
499 | &skb->nh.ipv6h->saddr, | 567 | nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); |
500 | &NETLINK_CB(skb)); | 568 | else { |
569 | #if CLONE_OFFLINK_ROUTE | ||
570 | nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); | ||
571 | #else | ||
572 | goto out2; | ||
573 | #endif | ||
574 | } | ||
501 | 575 | ||
502 | dst_release(&rt->u.dst); | 576 | dst_release(&rt->u.dst); |
503 | rt = nrt; | 577 | rt = nrt ? : &ip6_null_entry; |
504 | 578 | ||
505 | if (rt->u.dst.error != -EEXIST || --attempts <= 0) | 579 | dst_hold(&rt->u.dst); |
580 | if (nrt) { | ||
581 | err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); | ||
582 | if (!err) | ||
506 | goto out2; | 583 | goto out2; |
507 | |||
508 | /* Race condition! In the gap, when rt6_lock was | ||
509 | released someone could insert this route. Relookup. | ||
510 | */ | ||
511 | dst_release(&rt->u.dst); | ||
512 | goto relookup; | ||
513 | } | 584 | } |
514 | dst_hold(&rt->u.dst); | 585 | |
586 | if (--attempts <= 0) | ||
587 | goto out2; | ||
588 | |||
589 | /* | ||
590 | * Race condition! In the gap, when rt6_lock was | ||
591 | * released someone could insert this route. Relookup. | ||
592 | */ | ||
593 | dst_release(&rt->u.dst); | ||
594 | goto relookup; | ||
515 | 595 | ||
516 | out: | 596 | out: |
597 | if (reachable) { | ||
598 | reachable = 0; | ||
599 | goto restart_2; | ||
600 | } | ||
601 | dst_hold(&rt->u.dst); | ||
517 | read_unlock_bh(&rt6_lock); | 602 | read_unlock_bh(&rt6_lock); |
518 | out2: | 603 | out2: |
519 | rt->u.dst.lastuse = jiffies; | 604 | rt->u.dst.lastuse = jiffies; |
520 | rt->u.dst.__use++; | 605 | rt->u.dst.__use++; |
521 | skb->dst = (struct dst_entry *) rt; | 606 | skb->dst = (struct dst_entry *) rt; |
607 | return; | ||
522 | } | 608 | } |
523 | 609 | ||
524 | struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) | 610 | struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) |
525 | { | 611 | { |
526 | struct fib6_node *fn; | 612 | struct fib6_node *fn; |
527 | struct rt6_info *rt; | 613 | struct rt6_info *rt, *nrt; |
528 | int strict; | 614 | int strict; |
529 | int attempts = 3; | 615 | int attempts = 3; |
616 | int err; | ||
617 | int reachable = RT6_SELECT_F_REACHABLE; | ||
530 | 618 | ||
531 | strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); | 619 | strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; |
532 | 620 | ||
533 | relookup: | 621 | relookup: |
534 | read_lock_bh(&rt6_lock); | 622 | read_lock_bh(&rt6_lock); |
535 | 623 | ||
624 | restart_2: | ||
536 | fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); | 625 | fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); |
537 | 626 | ||
538 | restart: | 627 | restart: |
539 | rt = fn->leaf; | 628 | rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); |
540 | 629 | BACKTRACK(); | |
541 | if ((rt->rt6i_flags & RTF_CACHE)) { | 630 | if (rt == &ip6_null_entry || |
542 | rt = rt6_device_match(rt, fl->oif, strict); | 631 | rt->rt6i_flags & RTF_CACHE) |
543 | BACKTRACK(); | ||
544 | dst_hold(&rt->u.dst); | ||
545 | goto out; | 632 | goto out; |
546 | } | ||
547 | if (rt->rt6i_flags & RTF_DEFAULT) { | ||
548 | if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF) | ||
549 | rt = rt6_best_dflt(rt, fl->oif); | ||
550 | } else { | ||
551 | rt = rt6_device_match(rt, fl->oif, strict); | ||
552 | BACKTRACK(); | ||
553 | } | ||
554 | 633 | ||
555 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { | 634 | dst_hold(&rt->u.dst); |
556 | struct rt6_info *nrt; | 635 | read_unlock_bh(&rt6_lock); |
557 | dst_hold(&rt->u.dst); | ||
558 | read_unlock_bh(&rt6_lock); | ||
559 | 636 | ||
560 | nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL); | 637 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) |
638 | nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); | ||
639 | else { | ||
640 | #if CLONE_OFFLINK_ROUTE | ||
641 | nrt = rt6_alloc_clone(rt, &fl->fl6_dst); | ||
642 | #else | ||
643 | goto out2; | ||
644 | #endif | ||
645 | } | ||
561 | 646 | ||
562 | dst_release(&rt->u.dst); | 647 | dst_release(&rt->u.dst); |
563 | rt = nrt; | 648 | rt = nrt ? : &ip6_null_entry; |
564 | 649 | ||
565 | if (rt->u.dst.error != -EEXIST || --attempts <= 0) | 650 | dst_hold(&rt->u.dst); |
651 | if (nrt) { | ||
652 | err = ip6_ins_rt(nrt, NULL, NULL, NULL); | ||
653 | if (!err) | ||
566 | goto out2; | 654 | goto out2; |
567 | |||
568 | /* Race condition! In the gap, when rt6_lock was | ||
569 | released someone could insert this route. Relookup. | ||
570 | */ | ||
571 | dst_release(&rt->u.dst); | ||
572 | goto relookup; | ||
573 | } | 655 | } |
574 | dst_hold(&rt->u.dst); | 656 | |
657 | if (--attempts <= 0) | ||
658 | goto out2; | ||
659 | |||
660 | /* | ||
661 | * Race condition! In the gap, when rt6_lock was | ||
662 | * released someone could insert this route. Relookup. | ||
663 | */ | ||
664 | dst_release(&rt->u.dst); | ||
665 | goto relookup; | ||
575 | 666 | ||
576 | out: | 667 | out: |
668 | if (reachable) { | ||
669 | reachable = 0; | ||
670 | goto restart_2; | ||
671 | } | ||
672 | dst_hold(&rt->u.dst); | ||
577 | read_unlock_bh(&rt6_lock); | 673 | read_unlock_bh(&rt6_lock); |
578 | out2: | 674 | out2: |
579 | rt->u.dst.lastuse = jiffies; | 675 | rt->u.dst.lastuse = jiffies; |
@@ -999,8 +1095,6 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct | |||
999 | 1095 | ||
1000 | write_lock_bh(&rt6_lock); | 1096 | write_lock_bh(&rt6_lock); |
1001 | 1097 | ||
1002 | rt6_reset_dflt_pointer(NULL); | ||
1003 | |||
1004 | err = fib6_del(rt, nlh, _rtattr, req); | 1098 | err = fib6_del(rt, nlh, _rtattr, req); |
1005 | dst_release(&rt->u.dst); | 1099 | dst_release(&rt->u.dst); |
1006 | 1100 | ||
@@ -1050,59 +1144,63 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r | |||
1050 | void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, | 1144 | void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, |
1051 | struct neighbour *neigh, u8 *lladdr, int on_link) | 1145 | struct neighbour *neigh, u8 *lladdr, int on_link) |
1052 | { | 1146 | { |
1053 | struct rt6_info *rt, *nrt; | 1147 | struct rt6_info *rt, *nrt = NULL; |
1054 | 1148 | int strict; | |
1055 | /* Locate old route to this destination. */ | 1149 | struct fib6_node *fn; |
1056 | rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1); | ||
1057 | |||
1058 | if (rt == NULL) | ||
1059 | return; | ||
1060 | |||
1061 | if (neigh->dev != rt->rt6i_dev) | ||
1062 | goto out; | ||
1063 | 1150 | ||
1064 | /* | 1151 | /* |
1065 | * Current route is on-link; redirect is always invalid. | 1152 | * Get the "current" route for this destination and |
1066 | * | 1153 | * check if the redirect has come from approriate router. |
1067 | * Seems, previous statement is not true. It could | 1154 | * |
1068 | * be node, which looks for us as on-link (f.e. proxy ndisc) | 1155 | * RFC 2461 specifies that redirects should only be |
1069 | * But then router serving it might decide, that we should | 1156 | * accepted if they come from the nexthop to the target. |
1070 | * know truth 8)8) --ANK (980726). | 1157 | * Due to the way the routes are chosen, this notion |
1158 | * is a bit fuzzy and one might need to check all possible | ||
1159 | * routes. | ||
1071 | */ | 1160 | */ |
1072 | if (!(rt->rt6i_flags&RTF_GATEWAY)) | 1161 | strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); |
1073 | goto out; | ||
1074 | 1162 | ||
1075 | /* | 1163 | read_lock_bh(&rt6_lock); |
1076 | * RFC 2461 specifies that redirects should only be | 1164 | fn = fib6_lookup(&ip6_routing_table, dest, NULL); |
1077 | * accepted if they come from the nexthop to the target. | 1165 | restart: |
1078 | * Due to the way default routers are chosen, this notion | 1166 | for (rt = fn->leaf; rt; rt = rt->u.next) { |
1079 | * is a bit fuzzy and one might need to check all default | 1167 | /* |
1080 | * routers. | 1168 | * Current route is on-link; redirect is always invalid. |
1081 | */ | 1169 | * |
1082 | if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) { | 1170 | * Seems, previous statement is not true. It could |
1083 | if (rt->rt6i_flags & RTF_DEFAULT) { | 1171 | * be node, which looks for us as on-link (f.e. proxy ndisc) |
1084 | struct rt6_info *rt1; | 1172 | * But then router serving it might decide, that we should |
1085 | 1173 | * know truth 8)8) --ANK (980726). | |
1086 | read_lock(&rt6_lock); | 1174 | */ |
1087 | for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) { | 1175 | if (rt6_check_expired(rt)) |
1088 | if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) { | 1176 | continue; |
1089 | dst_hold(&rt1->u.dst); | 1177 | if (!(rt->rt6i_flags & RTF_GATEWAY)) |
1090 | dst_release(&rt->u.dst); | 1178 | continue; |
1091 | read_unlock(&rt6_lock); | 1179 | if (neigh->dev != rt->rt6i_dev) |
1092 | rt = rt1; | 1180 | continue; |
1093 | goto source_ok; | 1181 | if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) |
1094 | } | 1182 | continue; |
1095 | } | 1183 | break; |
1096 | read_unlock(&rt6_lock); | 1184 | } |
1185 | if (rt) | ||
1186 | dst_hold(&rt->u.dst); | ||
1187 | else if (strict) { | ||
1188 | while ((fn = fn->parent) != NULL) { | ||
1189 | if (fn->fn_flags & RTN_ROOT) | ||
1190 | break; | ||
1191 | if (fn->fn_flags & RTN_RTINFO) | ||
1192 | goto restart; | ||
1097 | } | 1193 | } |
1194 | } | ||
1195 | read_unlock_bh(&rt6_lock); | ||
1196 | |||
1197 | if (!rt) { | ||
1098 | if (net_ratelimit()) | 1198 | if (net_ratelimit()) |
1099 | printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " | 1199 | printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " |
1100 | "for redirect target\n"); | 1200 | "for redirect target\n"); |
1101 | goto out; | 1201 | return; |
1102 | } | 1202 | } |
1103 | 1203 | ||
1104 | source_ok: | ||
1105 | |||
1106 | /* | 1204 | /* |
1107 | * We have finally decided to accept it. | 1205 | * We have finally decided to accept it. |
1108 | */ | 1206 | */ |
@@ -1210,38 +1308,27 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, | |||
1210 | 1. It is connected route. Action: COW | 1308 | 1. It is connected route. Action: COW |
1211 | 2. It is gatewayed route or NONEXTHOP route. Action: clone it. | 1309 | 2. It is gatewayed route or NONEXTHOP route. Action: clone it. |
1212 | */ | 1310 | */ |
1213 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { | 1311 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) |
1214 | nrt = rt6_cow(rt, daddr, saddr, NULL); | 1312 | nrt = rt6_alloc_cow(rt, daddr, saddr); |
1215 | if (!nrt->u.dst.error) { | 1313 | else |
1216 | nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; | 1314 | nrt = rt6_alloc_clone(rt, daddr); |
1217 | if (allfrag) | 1315 | |
1218 | nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; | 1316 | if (nrt) { |
1219 | /* According to RFC 1981, detecting PMTU increase shouldn't be | ||
1220 | happened within 5 mins, the recommended timer is 10 mins. | ||
1221 | Here this route expiration time is set to ip6_rt_mtu_expires | ||
1222 | which is 10 mins. After 10 mins the decreased pmtu is expired | ||
1223 | and detecting PMTU increase will be automatically happened. | ||
1224 | */ | ||
1225 | dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); | ||
1226 | nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; | ||
1227 | } | ||
1228 | dst_release(&nrt->u.dst); | ||
1229 | } else { | ||
1230 | nrt = ip6_rt_copy(rt); | ||
1231 | if (nrt == NULL) | ||
1232 | goto out; | ||
1233 | ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr); | ||
1234 | nrt->rt6i_dst.plen = 128; | ||
1235 | nrt->u.dst.flags |= DST_HOST; | ||
1236 | nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop); | ||
1237 | dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); | ||
1238 | nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; | ||
1239 | nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; | 1317 | nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; |
1240 | if (allfrag) | 1318 | if (allfrag) |
1241 | nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; | 1319 | nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; |
1320 | |||
1321 | /* According to RFC 1981, detecting PMTU increase shouldn't be | ||
1322 | * happened within 5 mins, the recommended timer is 10 mins. | ||
1323 | * Here this route expiration time is set to ip6_rt_mtu_expires | ||
1324 | * which is 10 mins. After 10 mins the decreased pmtu is expired | ||
1325 | * and detecting PMTU increase will be automatically happened. | ||
1326 | */ | ||
1327 | dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); | ||
1328 | nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; | ||
1329 | |||
1242 | ip6_ins_rt(nrt, NULL, NULL, NULL); | 1330 | ip6_ins_rt(nrt, NULL, NULL, NULL); |
1243 | } | 1331 | } |
1244 | |||
1245 | out: | 1332 | out: |
1246 | dst_release(&rt->u.dst); | 1333 | dst_release(&rt->u.dst); |
1247 | } | 1334 | } |
@@ -1280,6 +1367,57 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) | |||
1280 | return rt; | 1367 | return rt; |
1281 | } | 1368 | } |
1282 | 1369 | ||
1370 | #ifdef CONFIG_IPV6_ROUTE_INFO | ||
1371 | static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, | ||
1372 | struct in6_addr *gwaddr, int ifindex) | ||
1373 | { | ||
1374 | struct fib6_node *fn; | ||
1375 | struct rt6_info *rt = NULL; | ||
1376 | |||
1377 | write_lock_bh(&rt6_lock); | ||
1378 | fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); | ||
1379 | if (!fn) | ||
1380 | goto out; | ||
1381 | |||
1382 | for (rt = fn->leaf; rt; rt = rt->u.next) { | ||
1383 | if (rt->rt6i_dev->ifindex != ifindex) | ||
1384 | continue; | ||
1385 | if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) | ||
1386 | continue; | ||
1387 | if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) | ||
1388 | continue; | ||
1389 | dst_hold(&rt->u.dst); | ||
1390 | break; | ||
1391 | } | ||
1392 | out: | ||
1393 | write_unlock_bh(&rt6_lock); | ||
1394 | return rt; | ||
1395 | } | ||
1396 | |||
1397 | static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, | ||
1398 | struct in6_addr *gwaddr, int ifindex, | ||
1399 | unsigned pref) | ||
1400 | { | ||
1401 | struct in6_rtmsg rtmsg; | ||
1402 | |||
1403 | memset(&rtmsg, 0, sizeof(rtmsg)); | ||
1404 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; | ||
1405 | ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); | ||
1406 | rtmsg.rtmsg_dst_len = prefixlen; | ||
1407 | ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); | ||
1408 | rtmsg.rtmsg_metric = 1024; | ||
1409 | rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); | ||
1410 | /* We should treat it as a default route if prefix length is 0. */ | ||
1411 | if (!prefixlen) | ||
1412 | rtmsg.rtmsg_flags |= RTF_DEFAULT; | ||
1413 | rtmsg.rtmsg_ifindex = ifindex; | ||
1414 | |||
1415 | ip6_route_add(&rtmsg, NULL, NULL, NULL); | ||
1416 | |||
1417 | return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); | ||
1418 | } | ||
1419 | #endif | ||
1420 | |||
1283 | struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) | 1421 | struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) |
1284 | { | 1422 | { |
1285 | struct rt6_info *rt; | 1423 | struct rt6_info *rt; |
@@ -1290,6 +1428,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d | |||
1290 | write_lock_bh(&rt6_lock); | 1428 | write_lock_bh(&rt6_lock); |
1291 | for (rt = fn->leaf; rt; rt=rt->u.next) { | 1429 | for (rt = fn->leaf; rt; rt=rt->u.next) { |
1292 | if (dev == rt->rt6i_dev && | 1430 | if (dev == rt->rt6i_dev && |
1431 | ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && | ||
1293 | ipv6_addr_equal(&rt->rt6i_gateway, addr)) | 1432 | ipv6_addr_equal(&rt->rt6i_gateway, addr)) |
1294 | break; | 1433 | break; |
1295 | } | 1434 | } |
@@ -1300,7 +1439,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d | |||
1300 | } | 1439 | } |
1301 | 1440 | ||
1302 | struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, | 1441 | struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, |
1303 | struct net_device *dev) | 1442 | struct net_device *dev, |
1443 | unsigned int pref) | ||
1304 | { | 1444 | { |
1305 | struct in6_rtmsg rtmsg; | 1445 | struct in6_rtmsg rtmsg; |
1306 | 1446 | ||
@@ -1308,7 +1448,8 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, | |||
1308 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; | 1448 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; |
1309 | ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); | 1449 | ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); |
1310 | rtmsg.rtmsg_metric = 1024; | 1450 | rtmsg.rtmsg_metric = 1024; |
1311 | rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES; | 1451 | rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | |
1452 | RTF_PREF(pref); | ||
1312 | 1453 | ||
1313 | rtmsg.rtmsg_ifindex = dev->ifindex; | 1454 | rtmsg.rtmsg_ifindex = dev->ifindex; |
1314 | 1455 | ||
@@ -1326,8 +1467,6 @@ restart: | |||
1326 | if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { | 1467 | if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { |
1327 | dst_hold(&rt->u.dst); | 1468 | dst_hold(&rt->u.dst); |
1328 | 1469 | ||
1329 | rt6_reset_dflt_pointer(NULL); | ||
1330 | |||
1331 | read_unlock_bh(&rt6_lock); | 1470 | read_unlock_bh(&rt6_lock); |
1332 | 1471 | ||
1333 | ip6_del_rt(rt, NULL, NULL, NULL); | 1472 | ip6_del_rt(rt, NULL, NULL, NULL); |
@@ -1738,11 +1877,10 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | |||
1738 | /* | 1877 | /* |
1739 | * 2. allocate and initialize walker. | 1878 | * 2. allocate and initialize walker. |
1740 | */ | 1879 | */ |
1741 | w = kmalloc(sizeof(*w), GFP_ATOMIC); | 1880 | w = kzalloc(sizeof(*w), GFP_ATOMIC); |
1742 | if (w == NULL) | 1881 | if (w == NULL) |
1743 | return -ENOMEM; | 1882 | return -ENOMEM; |
1744 | RT6_TRACE("dump<%p", w); | 1883 | RT6_TRACE("dump<%p", w); |
1745 | memset(w, 0, sizeof(*w)); | ||
1746 | w->root = &ip6_routing_table; | 1884 | w->root = &ip6_routing_table; |
1747 | w->func = fib6_dump_node; | 1885 | w->func = fib6_dump_node; |
1748 | w->args = &arg; | 1886 | w->args = &arg; |