aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-09-29 23:33:58 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-01 00:16:06 -0400
commit0197aa38df2ce550c0bfc96194b07ce6b68af814 (patch)
treebf7f41be7e701f6e282ed92e8063e69d339554f8 /net/ipv4/route.c
parent82efee1499a27c06f5afb11b07db384fdb3f7004 (diff)
ipv4: rcu conversion in ip_route_output_slow
ip_route_output_slow() is enclosed in an rcu_read_lock() protected section, so that no references are taken/released on device, thanks to __ip_dev_find() & dev_get_by_index_rcu() Tested with ip route cache disabled, and a stress test : Before patch: elapsed time : real 1m38.347s user 0m11.909s sys 23m51.501s Profile: 13788.00 22.7% ip_route_output_slow [kernel] 7875.00 13.0% dst_destroy [kernel] 3925.00 6.5% fib_semantic_match [kernel] 3144.00 5.2% fib_rules_lookup [kernel] 3061.00 5.0% dst_alloc [kernel] 2276.00 3.7% rt_set_nexthop [kernel] 1762.00 2.9% fib_table_lookup [kernel] 1538.00 2.5% _raw_read_lock [kernel] 1358.00 2.2% ip_output [kernel] After patch: real 1m28.808s user 0m13.245s sys 20m37.293s 10950.00 17.2% ip_route_output_slow [kernel] 10726.00 16.9% dst_destroy [kernel] 5170.00 8.1% fib_semantic_match [kernel] 3937.00 6.2% dst_alloc [kernel] 3635.00 5.7% rt_set_nexthop [kernel] 2900.00 4.6% fib_rules_lookup [kernel] 2240.00 3.5% fib_table_lookup [kernel] 1427.00 2.2% _raw_read_lock [kernel] 1157.00 1.8% kmem_cache_alloc [kernel] Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c38
1 files changed, 12 insertions, 26 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ea895004caf3..a61acea975f1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2487,6 +2487,7 @@ static int ip_mkroute_output(struct rtable **rp,
2487 2487
2488/* 2488/*
2489 * Major route resolver routine. 2489 * Major route resolver routine.
2490 * called with rcu_read_lock();
2490 */ 2491 */
2491 2492
2492static int ip_route_output_slow(struct net *net, struct rtable **rp, 2493static int ip_route_output_slow(struct net *net, struct rtable **rp,
@@ -2505,7 +2506,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2505 .iif = net->loopback_dev->ifindex, 2506 .iif = net->loopback_dev->ifindex,
2506 .oif = oldflp->oif }; 2507 .oif = oldflp->oif };
2507 struct fib_result res; 2508 struct fib_result res;
2508 unsigned flags = 0; 2509 unsigned int flags = 0;
2509 struct net_device *dev_out = NULL; 2510 struct net_device *dev_out = NULL;
2510 int free_res = 0; 2511 int free_res = 0;
2511 int err; 2512 int err;
@@ -2535,7 +2536,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2535 (ipv4_is_multicast(oldflp->fl4_dst) || 2536 (ipv4_is_multicast(oldflp->fl4_dst) ||
2536 oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2537 oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2537 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2538 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2538 dev_out = ip_dev_find(net, oldflp->fl4_src); 2539 dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
2539 if (dev_out == NULL) 2540 if (dev_out == NULL)
2540 goto out; 2541 goto out;
2541 2542
@@ -2560,26 +2561,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2560 2561
2561 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2562 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
2562 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2563 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2563 dev_out = ip_dev_find(net, oldflp->fl4_src); 2564 if (!__ip_dev_find(net, oldflp->fl4_src, false))
2564 if (dev_out == NULL)
2565 goto out; 2565 goto out;
2566 dev_put(dev_out);
2567 dev_out = NULL;
2568 } 2566 }
2569 } 2567 }
2570 2568
2571 2569
2572 if (oldflp->oif) { 2570 if (oldflp->oif) {
2573 dev_out = dev_get_by_index(net, oldflp->oif); 2571 dev_out = dev_get_by_index_rcu(net, oldflp->oif);
2574 err = -ENODEV; 2572 err = -ENODEV;
2575 if (dev_out == NULL) 2573 if (dev_out == NULL)
2576 goto out; 2574 goto out;
2577 2575
2578 /* RACE: Check return value of inet_select_addr instead. */ 2576 /* RACE: Check return value of inet_select_addr instead. */
2579 if (rcu_dereference_raw(dev_out->ip_ptr) == NULL) { 2577 if (rcu_dereference(dev_out->ip_ptr) == NULL)
2580 dev_put(dev_out);
2581 goto out; /* Wrong error code */ 2578 goto out; /* Wrong error code */
2582 }
2583 2579
2584 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2580 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2585 oldflp->fl4_dst == htonl(0xFFFFFFFF)) { 2581 oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
@@ -2602,10 +2598,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2602 fl.fl4_dst = fl.fl4_src; 2598 fl.fl4_dst = fl.fl4_src;
2603 if (!fl.fl4_dst) 2599 if (!fl.fl4_dst)
2604 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2600 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2605 if (dev_out)
2606 dev_put(dev_out);
2607 dev_out = net->loopback_dev; 2601 dev_out = net->loopback_dev;
2608 dev_hold(dev_out);
2609 fl.oif = net->loopback_dev->ifindex; 2602 fl.oif = net->loopback_dev->ifindex;
2610 res.type = RTN_LOCAL; 2603 res.type = RTN_LOCAL;
2611 flags |= RTCF_LOCAL; 2604 flags |= RTCF_LOCAL;
@@ -2639,8 +2632,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2639 res.type = RTN_UNICAST; 2632 res.type = RTN_UNICAST;
2640 goto make_route; 2633 goto make_route;
2641 } 2634 }
2642 if (dev_out)
2643 dev_put(dev_out);
2644 err = -ENETUNREACH; 2635 err = -ENETUNREACH;
2645 goto out; 2636 goto out;
2646 } 2637 }
@@ -2649,10 +2640,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2649 if (res.type == RTN_LOCAL) { 2640 if (res.type == RTN_LOCAL) {
2650 if (!fl.fl4_src) 2641 if (!fl.fl4_src)
2651 fl.fl4_src = fl.fl4_dst; 2642 fl.fl4_src = fl.fl4_dst;
2652 if (dev_out)
2653 dev_put(dev_out);
2654 dev_out = net->loopback_dev; 2643 dev_out = net->loopback_dev;
2655 dev_hold(dev_out);
2656 fl.oif = dev_out->ifindex; 2644 fl.oif = dev_out->ifindex;
2657 if (res.fi) 2645 if (res.fi)
2658 fib_info_put(res.fi); 2646 fib_info_put(res.fi);
@@ -2672,28 +2660,23 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2672 if (!fl.fl4_src) 2660 if (!fl.fl4_src)
2673 fl.fl4_src = FIB_RES_PREFSRC(res); 2661 fl.fl4_src = FIB_RES_PREFSRC(res);
2674 2662
2675 if (dev_out)
2676 dev_put(dev_out);
2677 dev_out = FIB_RES_DEV(res); 2663 dev_out = FIB_RES_DEV(res);
2678 dev_hold(dev_out);
2679 fl.oif = dev_out->ifindex; 2664 fl.oif = dev_out->ifindex;
2680 2665
2681 2666
2682make_route: 2667make_route:
2683 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2668 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2684 2669
2685
2686 if (free_res) 2670 if (free_res)
2687 fib_res_put(&res); 2671 fib_res_put(&res);
2688 if (dev_out)
2689 dev_put(dev_out);
2690out: return err; 2672out: return err;
2691} 2673}
2692 2674
2693int __ip_route_output_key(struct net *net, struct rtable **rp, 2675int __ip_route_output_key(struct net *net, struct rtable **rp,
2694 const struct flowi *flp) 2676 const struct flowi *flp)
2695{ 2677{
2696 unsigned hash; 2678 unsigned int hash;
2679 int res;
2697 struct rtable *rth; 2680 struct rtable *rth;
2698 2681
2699 if (!rt_caching(net)) 2682 if (!rt_caching(net))
@@ -2724,7 +2707,10 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2724 rcu_read_unlock_bh(); 2707 rcu_read_unlock_bh();
2725 2708
2726slow_output: 2709slow_output:
2727 return ip_route_output_slow(net, rp, flp); 2710 rcu_read_lock();
2711 res = ip_route_output_slow(net, rp, flp);
2712 rcu_read_unlock();
2713 return res;
2728} 2714}
2729EXPORT_SYMBOL_GPL(__ip_route_output_key); 2715EXPORT_SYMBOL_GPL(__ip_route_output_key);
2730 2716