diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 144 |
1 files changed, 101 insertions, 43 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d35bbf0cf404..a9a54a236832 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -565,10 +565,25 @@ static inline void rt_free(struct rtable *rt) | |||
565 | 565 | ||
566 | static DEFINE_SPINLOCK(fnhe_lock); | 566 | static DEFINE_SPINLOCK(fnhe_lock); |
567 | 567 | ||
568 | static void fnhe_flush_routes(struct fib_nh_exception *fnhe) | ||
569 | { | ||
570 | struct rtable *rt; | ||
571 | |||
572 | rt = rcu_dereference(fnhe->fnhe_rth_input); | ||
573 | if (rt) { | ||
574 | RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); | ||
575 | rt_free(rt); | ||
576 | } | ||
577 | rt = rcu_dereference(fnhe->fnhe_rth_output); | ||
578 | if (rt) { | ||
579 | RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); | ||
580 | rt_free(rt); | ||
581 | } | ||
582 | } | ||
583 | |||
568 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | 584 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) |
569 | { | 585 | { |
570 | struct fib_nh_exception *fnhe, *oldest; | 586 | struct fib_nh_exception *fnhe, *oldest; |
571 | struct rtable *orig; | ||
572 | 587 | ||
573 | oldest = rcu_dereference(hash->chain); | 588 | oldest = rcu_dereference(hash->chain); |
574 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; | 589 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; |
@@ -576,11 +591,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | |||
576 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) | 591 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) |
577 | oldest = fnhe; | 592 | oldest = fnhe; |
578 | } | 593 | } |
579 | orig = rcu_dereference(oldest->fnhe_rth); | 594 | fnhe_flush_routes(oldest); |
580 | if (orig) { | ||
581 | RCU_INIT_POINTER(oldest->fnhe_rth, NULL); | ||
582 | rt_free(orig); | ||
583 | } | ||
584 | return oldest; | 595 | return oldest; |
585 | } | 596 | } |
586 | 597 | ||
@@ -594,11 +605,25 @@ static inline u32 fnhe_hashfun(__be32 daddr) | |||
594 | return hval & (FNHE_HASH_SIZE - 1); | 605 | return hval & (FNHE_HASH_SIZE - 1); |
595 | } | 606 | } |
596 | 607 | ||
608 | static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) | ||
609 | { | ||
610 | rt->rt_pmtu = fnhe->fnhe_pmtu; | ||
611 | rt->dst.expires = fnhe->fnhe_expires; | ||
612 | |||
613 | if (fnhe->fnhe_gw) { | ||
614 | rt->rt_flags |= RTCF_REDIRECTED; | ||
615 | rt->rt_gateway = fnhe->fnhe_gw; | ||
616 | rt->rt_uses_gateway = 1; | ||
617 | } | ||
618 | } | ||
619 | |||
597 | static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | 620 | static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, |
598 | u32 pmtu, unsigned long expires) | 621 | u32 pmtu, unsigned long expires) |
599 | { | 622 | { |
600 | struct fnhe_hash_bucket *hash; | 623 | struct fnhe_hash_bucket *hash; |
601 | struct fib_nh_exception *fnhe; | 624 | struct fib_nh_exception *fnhe; |
625 | struct rtable *rt; | ||
626 | unsigned int i; | ||
602 | int depth; | 627 | int depth; |
603 | u32 hval = fnhe_hashfun(daddr); | 628 | u32 hval = fnhe_hashfun(daddr); |
604 | 629 | ||
@@ -627,8 +652,15 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
627 | fnhe->fnhe_gw = gw; | 652 | fnhe->fnhe_gw = gw; |
628 | if (pmtu) { | 653 | if (pmtu) { |
629 | fnhe->fnhe_pmtu = pmtu; | 654 | fnhe->fnhe_pmtu = pmtu; |
630 | fnhe->fnhe_expires = expires; | 655 | fnhe->fnhe_expires = max(1UL, expires); |
631 | } | 656 | } |
657 | /* Update all cached dsts too */ | ||
658 | rt = rcu_dereference(fnhe->fnhe_rth_input); | ||
659 | if (rt) | ||
660 | fill_route_from_fnhe(rt, fnhe); | ||
661 | rt = rcu_dereference(fnhe->fnhe_rth_output); | ||
662 | if (rt) | ||
663 | fill_route_from_fnhe(rt, fnhe); | ||
632 | } else { | 664 | } else { |
633 | if (depth > FNHE_RECLAIM_DEPTH) | 665 | if (depth > FNHE_RECLAIM_DEPTH) |
634 | fnhe = fnhe_oldest(hash); | 666 | fnhe = fnhe_oldest(hash); |
@@ -640,10 +672,27 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
640 | fnhe->fnhe_next = hash->chain; | 672 | fnhe->fnhe_next = hash->chain; |
641 | rcu_assign_pointer(hash->chain, fnhe); | 673 | rcu_assign_pointer(hash->chain, fnhe); |
642 | } | 674 | } |
675 | fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); | ||
643 | fnhe->fnhe_daddr = daddr; | 676 | fnhe->fnhe_daddr = daddr; |
644 | fnhe->fnhe_gw = gw; | 677 | fnhe->fnhe_gw = gw; |
645 | fnhe->fnhe_pmtu = pmtu; | 678 | fnhe->fnhe_pmtu = pmtu; |
646 | fnhe->fnhe_expires = expires; | 679 | fnhe->fnhe_expires = expires; |
680 | |||
681 | /* Exception created; mark the cached routes for the nexthop | ||
682 | * stale, so anyone caching it rechecks if this exception | ||
683 | * applies to them. | ||
684 | */ | ||
685 | rt = rcu_dereference(nh->nh_rth_input); | ||
686 | if (rt) | ||
687 | rt->dst.obsolete = DST_OBSOLETE_KILL; | ||
688 | |||
689 | for_each_possible_cpu(i) { | ||
690 | struct rtable __rcu **prt; | ||
691 | prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); | ||
692 | rt = rcu_dereference(*prt); | ||
693 | if (rt) | ||
694 | rt->dst.obsolete = DST_OBSOLETE_KILL; | ||
695 | } | ||
647 | } | 696 | } |
648 | 697 | ||
649 | fnhe->fnhe_stamp = jiffies; | 698 | fnhe->fnhe_stamp = jiffies; |
@@ -922,12 +971,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
922 | if (mtu < ip_rt_min_pmtu) | 971 | if (mtu < ip_rt_min_pmtu) |
923 | mtu = ip_rt_min_pmtu; | 972 | mtu = ip_rt_min_pmtu; |
924 | 973 | ||
925 | if (!rt->rt_pmtu) { | 974 | if (rt->rt_pmtu == mtu && |
926 | dst->obsolete = DST_OBSOLETE_KILL; | 975 | time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) |
927 | } else { | 976 | return; |
928 | rt->rt_pmtu = mtu; | ||
929 | dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); | ||
930 | } | ||
931 | 977 | ||
932 | rcu_read_lock(); | 978 | rcu_read_lock(); |
933 | if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { | 979 | if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { |
@@ -1068,11 +1114,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | |||
1068 | * DST_OBSOLETE_FORCE_CHK which forces validation calls down | 1114 | * DST_OBSOLETE_FORCE_CHK which forces validation calls down |
1069 | * into this function always. | 1115 | * into this function always. |
1070 | * | 1116 | * |
1071 | * When a PMTU/redirect information update invalidates a | 1117 | * When a PMTU/redirect information update invalidates a route, |
1072 | * route, this is indicated by setting obsolete to | 1118 | * this is indicated by setting obsolete to DST_OBSOLETE_KILL or |
1073 | * DST_OBSOLETE_KILL. | 1119 | * DST_OBSOLETE_DEAD by dst_free(). |
1074 | */ | 1120 | */ |
1075 | if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) | 1121 | if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) |
1076 | return NULL; | 1122 | return NULL; |
1077 | return dst; | 1123 | return dst; |
1078 | } | 1124 | } |
@@ -1214,34 +1260,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | |||
1214 | spin_lock_bh(&fnhe_lock); | 1260 | spin_lock_bh(&fnhe_lock); |
1215 | 1261 | ||
1216 | if (daddr == fnhe->fnhe_daddr) { | 1262 | if (daddr == fnhe->fnhe_daddr) { |
1217 | struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); | 1263 | struct rtable __rcu **porig; |
1218 | if (orig && rt_is_expired(orig)) { | 1264 | struct rtable *orig; |
1265 | int genid = fnhe_genid(dev_net(rt->dst.dev)); | ||
1266 | |||
1267 | if (rt_is_input_route(rt)) | ||
1268 | porig = &fnhe->fnhe_rth_input; | ||
1269 | else | ||
1270 | porig = &fnhe->fnhe_rth_output; | ||
1271 | orig = rcu_dereference(*porig); | ||
1272 | |||
1273 | if (fnhe->fnhe_genid != genid) { | ||
1274 | fnhe->fnhe_genid = genid; | ||
1219 | fnhe->fnhe_gw = 0; | 1275 | fnhe->fnhe_gw = 0; |
1220 | fnhe->fnhe_pmtu = 0; | 1276 | fnhe->fnhe_pmtu = 0; |
1221 | fnhe->fnhe_expires = 0; | 1277 | fnhe->fnhe_expires = 0; |
1278 | fnhe_flush_routes(fnhe); | ||
1279 | orig = NULL; | ||
1222 | } | 1280 | } |
1223 | if (fnhe->fnhe_pmtu) { | 1281 | fill_route_from_fnhe(rt, fnhe); |
1224 | unsigned long expires = fnhe->fnhe_expires; | 1282 | if (!rt->rt_gateway) |
1225 | unsigned long diff = expires - jiffies; | ||
1226 | |||
1227 | if (time_before(jiffies, expires)) { | ||
1228 | rt->rt_pmtu = fnhe->fnhe_pmtu; | ||
1229 | dst_set_expires(&rt->dst, diff); | ||
1230 | } | ||
1231 | } | ||
1232 | if (fnhe->fnhe_gw) { | ||
1233 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1234 | rt->rt_gateway = fnhe->fnhe_gw; | ||
1235 | rt->rt_uses_gateway = 1; | ||
1236 | } else if (!rt->rt_gateway) | ||
1237 | rt->rt_gateway = daddr; | 1283 | rt->rt_gateway = daddr; |
1238 | 1284 | ||
1239 | rcu_assign_pointer(fnhe->fnhe_rth, rt); | 1285 | if (!(rt->dst.flags & DST_NOCACHE)) { |
1240 | if (orig) | 1286 | rcu_assign_pointer(*porig, rt); |
1241 | rt_free(orig); | 1287 | if (orig) |
1288 | rt_free(orig); | ||
1289 | ret = true; | ||
1290 | } | ||
1242 | 1291 | ||
1243 | fnhe->fnhe_stamp = jiffies; | 1292 | fnhe->fnhe_stamp = jiffies; |
1244 | ret = true; | ||
1245 | } | 1293 | } |
1246 | spin_unlock_bh(&fnhe_lock); | 1294 | spin_unlock_bh(&fnhe_lock); |
1247 | 1295 | ||
@@ -1473,6 +1521,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1473 | struct in_device *in_dev, | 1521 | struct in_device *in_dev, |
1474 | __be32 daddr, __be32 saddr, u32 tos) | 1522 | __be32 daddr, __be32 saddr, u32 tos) |
1475 | { | 1523 | { |
1524 | struct fib_nh_exception *fnhe; | ||
1476 | struct rtable *rth; | 1525 | struct rtable *rth; |
1477 | int err; | 1526 | int err; |
1478 | struct in_device *out_dev; | 1527 | struct in_device *out_dev; |
@@ -1519,8 +1568,13 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1519 | } | 1568 | } |
1520 | } | 1569 | } |
1521 | 1570 | ||
1571 | fnhe = find_exception(&FIB_RES_NH(*res), daddr); | ||
1522 | if (do_cache) { | 1572 | if (do_cache) { |
1523 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); | 1573 | if (fnhe != NULL) |
1574 | rth = rcu_dereference(fnhe->fnhe_rth_input); | ||
1575 | else | ||
1576 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); | ||
1577 | |||
1524 | if (rt_cache_valid(rth)) { | 1578 | if (rt_cache_valid(rth)) { |
1525 | skb_dst_set_noref(skb, &rth->dst); | 1579 | skb_dst_set_noref(skb, &rth->dst); |
1526 | goto out; | 1580 | goto out; |
@@ -1548,7 +1602,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1548 | rth->dst.input = ip_forward; | 1602 | rth->dst.input = ip_forward; |
1549 | rth->dst.output = ip_output; | 1603 | rth->dst.output = ip_output; |
1550 | 1604 | ||
1551 | rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); | 1605 | rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); |
1552 | skb_dst_set(skb, &rth->dst); | 1606 | skb_dst_set(skb, &rth->dst); |
1553 | out: | 1607 | out: |
1554 | err = 0; | 1608 | err = 0; |
@@ -1863,7 +1917,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1863 | 1917 | ||
1864 | fnhe = find_exception(nh, fl4->daddr); | 1918 | fnhe = find_exception(nh, fl4->daddr); |
1865 | if (fnhe) | 1919 | if (fnhe) |
1866 | prth = &fnhe->fnhe_rth; | 1920 | prth = &fnhe->fnhe_rth_output; |
1867 | else { | 1921 | else { |
1868 | if (unlikely(fl4->flowi4_flags & | 1922 | if (unlikely(fl4->flowi4_flags & |
1869 | FLOWI_FLAG_KNOWN_NH && | 1923 | FLOWI_FLAG_KNOWN_NH && |
@@ -2429,19 +2483,22 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ; | |||
2429 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; | 2483 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; |
2430 | static int ip_rt_gc_elasticity __read_mostly = 8; | 2484 | static int ip_rt_gc_elasticity __read_mostly = 8; |
2431 | 2485 | ||
2432 | static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | 2486 | static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, |
2433 | void __user *buffer, | 2487 | void __user *buffer, |
2434 | size_t *lenp, loff_t *ppos) | 2488 | size_t *lenp, loff_t *ppos) |
2435 | { | 2489 | { |
2490 | struct net *net = (struct net *)__ctl->extra1; | ||
2491 | |||
2436 | if (write) { | 2492 | if (write) { |
2437 | rt_cache_flush((struct net *)__ctl->extra1); | 2493 | rt_cache_flush(net); |
2494 | fnhe_genid_bump(net); | ||
2438 | return 0; | 2495 | return 0; |
2439 | } | 2496 | } |
2440 | 2497 | ||
2441 | return -EINVAL; | 2498 | return -EINVAL; |
2442 | } | 2499 | } |
2443 | 2500 | ||
2444 | static ctl_table ipv4_route_table[] = { | 2501 | static struct ctl_table ipv4_route_table[] = { |
2445 | { | 2502 | { |
2446 | .procname = "gc_thresh", | 2503 | .procname = "gc_thresh", |
2447 | .data = &ipv4_dst_ops.gc_thresh, | 2504 | .data = &ipv4_dst_ops.gc_thresh, |
@@ -2609,6 +2666,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { | |||
2609 | static __net_init int rt_genid_init(struct net *net) | 2666 | static __net_init int rt_genid_init(struct net *net) |
2610 | { | 2667 | { |
2611 | atomic_set(&net->rt_genid, 0); | 2668 | atomic_set(&net->rt_genid, 0); |
2669 | atomic_set(&net->fnhe_genid, 0); | ||
2612 | get_random_bytes(&net->ipv4.dev_addr_genid, | 2670 | get_random_bytes(&net->ipv4.dev_addr_genid, |
2613 | sizeof(net->ipv4.dev_addr_genid)); | 2671 | sizeof(net->ipv4.dev_addr_genid)); |
2614 | return 0; | 2672 | return 0; |