aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c144
1 files changed, 101 insertions, 43 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d35bbf0cf404..a9a54a236832 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -565,10 +565,25 @@ static inline void rt_free(struct rtable *rt)
565 565
566static DEFINE_SPINLOCK(fnhe_lock); 566static DEFINE_SPINLOCK(fnhe_lock);
567 567
568static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
569{
570 struct rtable *rt;
571
572 rt = rcu_dereference(fnhe->fnhe_rth_input);
573 if (rt) {
574 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
575 rt_free(rt);
576 }
577 rt = rcu_dereference(fnhe->fnhe_rth_output);
578 if (rt) {
579 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
580 rt_free(rt);
581 }
582}
583
568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 584static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
569{ 585{
570 struct fib_nh_exception *fnhe, *oldest; 586 struct fib_nh_exception *fnhe, *oldest;
571 struct rtable *orig;
572 587
573 oldest = rcu_dereference(hash->chain); 588 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 589 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -576,11 +591,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 591 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe; 592 oldest = fnhe;
578 } 593 }
579 orig = rcu_dereference(oldest->fnhe_rth); 594 fnhe_flush_routes(oldest);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
584 return oldest; 595 return oldest;
585} 596}
586 597
@@ -594,11 +605,25 @@ static inline u32 fnhe_hashfun(__be32 daddr)
594 return hval & (FNHE_HASH_SIZE - 1); 605 return hval & (FNHE_HASH_SIZE - 1);
595} 606}
596 607
608static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
609{
610 rt->rt_pmtu = fnhe->fnhe_pmtu;
611 rt->dst.expires = fnhe->fnhe_expires;
612
613 if (fnhe->fnhe_gw) {
614 rt->rt_flags |= RTCF_REDIRECTED;
615 rt->rt_gateway = fnhe->fnhe_gw;
616 rt->rt_uses_gateway = 1;
617 }
618}
619
597static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, 620static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
598 u32 pmtu, unsigned long expires) 621 u32 pmtu, unsigned long expires)
599{ 622{
600 struct fnhe_hash_bucket *hash; 623 struct fnhe_hash_bucket *hash;
601 struct fib_nh_exception *fnhe; 624 struct fib_nh_exception *fnhe;
625 struct rtable *rt;
626 unsigned int i;
602 int depth; 627 int depth;
603 u32 hval = fnhe_hashfun(daddr); 628 u32 hval = fnhe_hashfun(daddr);
604 629
@@ -627,8 +652,15 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
627 fnhe->fnhe_gw = gw; 652 fnhe->fnhe_gw = gw;
628 if (pmtu) { 653 if (pmtu) {
629 fnhe->fnhe_pmtu = pmtu; 654 fnhe->fnhe_pmtu = pmtu;
630 fnhe->fnhe_expires = expires; 655 fnhe->fnhe_expires = max(1UL, expires);
631 } 656 }
657 /* Update all cached dsts too */
658 rt = rcu_dereference(fnhe->fnhe_rth_input);
659 if (rt)
660 fill_route_from_fnhe(rt, fnhe);
661 rt = rcu_dereference(fnhe->fnhe_rth_output);
662 if (rt)
663 fill_route_from_fnhe(rt, fnhe);
632 } else { 664 } else {
633 if (depth > FNHE_RECLAIM_DEPTH) 665 if (depth > FNHE_RECLAIM_DEPTH)
634 fnhe = fnhe_oldest(hash); 666 fnhe = fnhe_oldest(hash);
@@ -640,10 +672,27 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
640 fnhe->fnhe_next = hash->chain; 672 fnhe->fnhe_next = hash->chain;
641 rcu_assign_pointer(hash->chain, fnhe); 673 rcu_assign_pointer(hash->chain, fnhe);
642 } 674 }
675 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
643 fnhe->fnhe_daddr = daddr; 676 fnhe->fnhe_daddr = daddr;
644 fnhe->fnhe_gw = gw; 677 fnhe->fnhe_gw = gw;
645 fnhe->fnhe_pmtu = pmtu; 678 fnhe->fnhe_pmtu = pmtu;
646 fnhe->fnhe_expires = expires; 679 fnhe->fnhe_expires = expires;
680
681 /* Exception created; mark the cached routes for the nexthop
682 * stale, so anyone caching it rechecks if this exception
683 * applies to them.
684 */
685 rt = rcu_dereference(nh->nh_rth_input);
686 if (rt)
687 rt->dst.obsolete = DST_OBSOLETE_KILL;
688
689 for_each_possible_cpu(i) {
690 struct rtable __rcu **prt;
691 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
692 rt = rcu_dereference(*prt);
693 if (rt)
694 rt->dst.obsolete = DST_OBSOLETE_KILL;
695 }
647 } 696 }
648 697
649 fnhe->fnhe_stamp = jiffies; 698 fnhe->fnhe_stamp = jiffies;
@@ -922,12 +971,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
922 if (mtu < ip_rt_min_pmtu) 971 if (mtu < ip_rt_min_pmtu)
923 mtu = ip_rt_min_pmtu; 972 mtu = ip_rt_min_pmtu;
924 973
925 if (!rt->rt_pmtu) { 974 if (rt->rt_pmtu == mtu &&
926 dst->obsolete = DST_OBSOLETE_KILL; 975 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
927 } else { 976 return;
928 rt->rt_pmtu = mtu;
929 dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
930 }
931 977
932 rcu_read_lock(); 978 rcu_read_lock();
933 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { 979 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
@@ -1068,11 +1114,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1068 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1114 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1069 * into this function always. 1115 * into this function always.
1070 * 1116 *
1071 * When a PMTU/redirect information update invalidates a 1117 * When a PMTU/redirect information update invalidates a route,
1072 * route, this is indicated by setting obsolete to 1118 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1073 * DST_OBSOLETE_KILL. 1119 * DST_OBSOLETE_DEAD by dst_free().
1074 */ 1120 */
1075 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) 1121 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
1076 return NULL; 1122 return NULL;
1077 return dst; 1123 return dst;
1078} 1124}
@@ -1214,34 +1260,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1214 spin_lock_bh(&fnhe_lock); 1260 spin_lock_bh(&fnhe_lock);
1215 1261
1216 if (daddr == fnhe->fnhe_daddr) { 1262 if (daddr == fnhe->fnhe_daddr) {
1217 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); 1263 struct rtable __rcu **porig;
1218 if (orig && rt_is_expired(orig)) { 1264 struct rtable *orig;
1265 int genid = fnhe_genid(dev_net(rt->dst.dev));
1266
1267 if (rt_is_input_route(rt))
1268 porig = &fnhe->fnhe_rth_input;
1269 else
1270 porig = &fnhe->fnhe_rth_output;
1271 orig = rcu_dereference(*porig);
1272
1273 if (fnhe->fnhe_genid != genid) {
1274 fnhe->fnhe_genid = genid;
1219 fnhe->fnhe_gw = 0; 1275 fnhe->fnhe_gw = 0;
1220 fnhe->fnhe_pmtu = 0; 1276 fnhe->fnhe_pmtu = 0;
1221 fnhe->fnhe_expires = 0; 1277 fnhe->fnhe_expires = 0;
1278 fnhe_flush_routes(fnhe);
1279 orig = NULL;
1222 } 1280 }
1223 if (fnhe->fnhe_pmtu) { 1281 fill_route_from_fnhe(rt, fnhe);
1224 unsigned long expires = fnhe->fnhe_expires; 1282 if (!rt->rt_gateway)
1225 unsigned long diff = expires - jiffies;
1226
1227 if (time_before(jiffies, expires)) {
1228 rt->rt_pmtu = fnhe->fnhe_pmtu;
1229 dst_set_expires(&rt->dst, diff);
1230 }
1231 }
1232 if (fnhe->fnhe_gw) {
1233 rt->rt_flags |= RTCF_REDIRECTED;
1234 rt->rt_gateway = fnhe->fnhe_gw;
1235 rt->rt_uses_gateway = 1;
1236 } else if (!rt->rt_gateway)
1237 rt->rt_gateway = daddr; 1283 rt->rt_gateway = daddr;
1238 1284
1239 rcu_assign_pointer(fnhe->fnhe_rth, rt); 1285 if (!(rt->dst.flags & DST_NOCACHE)) {
1240 if (orig) 1286 rcu_assign_pointer(*porig, rt);
1241 rt_free(orig); 1287 if (orig)
1288 rt_free(orig);
1289 ret = true;
1290 }
1242 1291
1243 fnhe->fnhe_stamp = jiffies; 1292 fnhe->fnhe_stamp = jiffies;
1244 ret = true;
1245 } 1293 }
1246 spin_unlock_bh(&fnhe_lock); 1294 spin_unlock_bh(&fnhe_lock);
1247 1295
@@ -1473,6 +1521,7 @@ static int __mkroute_input(struct sk_buff *skb,
1473 struct in_device *in_dev, 1521 struct in_device *in_dev,
1474 __be32 daddr, __be32 saddr, u32 tos) 1522 __be32 daddr, __be32 saddr, u32 tos)
1475{ 1523{
1524 struct fib_nh_exception *fnhe;
1476 struct rtable *rth; 1525 struct rtable *rth;
1477 int err; 1526 int err;
1478 struct in_device *out_dev; 1527 struct in_device *out_dev;
@@ -1519,8 +1568,13 @@ static int __mkroute_input(struct sk_buff *skb,
1519 } 1568 }
1520 } 1569 }
1521 1570
1571 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1522 if (do_cache) { 1572 if (do_cache) {
1523 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); 1573 if (fnhe != NULL)
1574 rth = rcu_dereference(fnhe->fnhe_rth_input);
1575 else
1576 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1577
1524 if (rt_cache_valid(rth)) { 1578 if (rt_cache_valid(rth)) {
1525 skb_dst_set_noref(skb, &rth->dst); 1579 skb_dst_set_noref(skb, &rth->dst);
1526 goto out; 1580 goto out;
@@ -1548,7 +1602,7 @@ static int __mkroute_input(struct sk_buff *skb,
1548 rth->dst.input = ip_forward; 1602 rth->dst.input = ip_forward;
1549 rth->dst.output = ip_output; 1603 rth->dst.output = ip_output;
1550 1604
1551 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); 1605 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1552 skb_dst_set(skb, &rth->dst); 1606 skb_dst_set(skb, &rth->dst);
1553out: 1607out:
1554 err = 0; 1608 err = 0;
@@ -1863,7 +1917,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1863 1917
1864 fnhe = find_exception(nh, fl4->daddr); 1918 fnhe = find_exception(nh, fl4->daddr);
1865 if (fnhe) 1919 if (fnhe)
1866 prth = &fnhe->fnhe_rth; 1920 prth = &fnhe->fnhe_rth_output;
1867 else { 1921 else {
1868 if (unlikely(fl4->flowi4_flags & 1922 if (unlikely(fl4->flowi4_flags &
1869 FLOWI_FLAG_KNOWN_NH && 1923 FLOWI_FLAG_KNOWN_NH &&
@@ -2429,19 +2483,22 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2429static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 2483static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2430static int ip_rt_gc_elasticity __read_mostly = 8; 2484static int ip_rt_gc_elasticity __read_mostly = 8;
2431 2485
2432static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, 2486static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
2433 void __user *buffer, 2487 void __user *buffer,
2434 size_t *lenp, loff_t *ppos) 2488 size_t *lenp, loff_t *ppos)
2435{ 2489{
2490 struct net *net = (struct net *)__ctl->extra1;
2491
2436 if (write) { 2492 if (write) {
2437 rt_cache_flush((struct net *)__ctl->extra1); 2493 rt_cache_flush(net);
2494 fnhe_genid_bump(net);
2438 return 0; 2495 return 0;
2439 } 2496 }
2440 2497
2441 return -EINVAL; 2498 return -EINVAL;
2442} 2499}
2443 2500
2444static ctl_table ipv4_route_table[] = { 2501static struct ctl_table ipv4_route_table[] = {
2445 { 2502 {
2446 .procname = "gc_thresh", 2503 .procname = "gc_thresh",
2447 .data = &ipv4_dst_ops.gc_thresh, 2504 .data = &ipv4_dst_ops.gc_thresh,
@@ -2609,6 +2666,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
2609static __net_init int rt_genid_init(struct net *net) 2666static __net_init int rt_genid_init(struct net *net)
2610{ 2667{
2611 atomic_set(&net->rt_genid, 0); 2668 atomic_set(&net->rt_genid, 0);
2669 atomic_set(&net->fnhe_genid, 0);
2612 get_random_bytes(&net->ipv4.dev_addr_genid, 2670 get_random_bytes(&net->ipv4.dev_addr_genid,
2613 sizeof(net->ipv4.dev_addr_genid)); 2671 sizeof(net->ipv4.dev_addr_genid));
2614 return 0; 2672 return 0;