diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-09-29 23:33:58 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-01 00:16:06 -0400 |
commit | 0197aa38df2ce550c0bfc96194b07ce6b68af814 (patch) | |
tree | bf7f41be7e701f6e282ed92e8063e69d339554f8 /net/ipv4/route.c | |
parent | 82efee1499a27c06f5afb11b07db384fdb3f7004 (diff) |
ipv4: rcu conversion in ip_route_output_slow
ip_route_output_slow() is enclosed in an rcu_read_lock() protected
section, so that no references are taken/released on device, thanks to
__ip_dev_find() & dev_get_by_index_rcu()
Tested with ip route cache disabled, and a stress test :
Before patch:
elapsed time :
real 1m38.347s
user 0m11.909s
sys 23m51.501s
Profile:
13788.00 22.7% ip_route_output_slow [kernel]
7875.00 13.0% dst_destroy [kernel]
3925.00 6.5% fib_semantic_match [kernel]
3144.00 5.2% fib_rules_lookup [kernel]
3061.00 5.0% dst_alloc [kernel]
2276.00 3.7% rt_set_nexthop [kernel]
1762.00 2.9% fib_table_lookup [kernel]
1538.00 2.5% _raw_read_lock [kernel]
1358.00 2.2% ip_output [kernel]
After patch:
real 1m28.808s
user 0m13.245s
sys 20m37.293s
10950.00 17.2% ip_route_output_slow [kernel]
10726.00 16.9% dst_destroy [kernel]
5170.00 8.1% fib_semantic_match [kernel]
3937.00 6.2% dst_alloc [kernel]
3635.00 5.7% rt_set_nexthop [kernel]
2900.00 4.6% fib_rules_lookup [kernel]
2240.00 3.5% fib_table_lookup [kernel]
1427.00 2.2% _raw_read_lock [kernel]
1157.00 1.8% kmem_cache_alloc [kernel]
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 38 |
1 files changed, 12 insertions, 26 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ea895004caf3..a61acea975f1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -2487,6 +2487,7 @@ static int ip_mkroute_output(struct rtable **rp, | |||
2487 | 2487 | ||
2488 | /* | 2488 | /* |
2489 | * Major route resolver routine. | 2489 | * Major route resolver routine. |
2490 | * called with rcu_read_lock(); | ||
2490 | */ | 2491 | */ |
2491 | 2492 | ||
2492 | static int ip_route_output_slow(struct net *net, struct rtable **rp, | 2493 | static int ip_route_output_slow(struct net *net, struct rtable **rp, |
@@ -2505,7 +2506,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2505 | .iif = net->loopback_dev->ifindex, | 2506 | .iif = net->loopback_dev->ifindex, |
2506 | .oif = oldflp->oif }; | 2507 | .oif = oldflp->oif }; |
2507 | struct fib_result res; | 2508 | struct fib_result res; |
2508 | unsigned flags = 0; | 2509 | unsigned int flags = 0; |
2509 | struct net_device *dev_out = NULL; | 2510 | struct net_device *dev_out = NULL; |
2510 | int free_res = 0; | 2511 | int free_res = 0; |
2511 | int err; | 2512 | int err; |
@@ -2535,7 +2536,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2535 | (ipv4_is_multicast(oldflp->fl4_dst) || | 2536 | (ipv4_is_multicast(oldflp->fl4_dst) || |
2536 | oldflp->fl4_dst == htonl(0xFFFFFFFF))) { | 2537 | oldflp->fl4_dst == htonl(0xFFFFFFFF))) { |
2537 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2538 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2538 | dev_out = ip_dev_find(net, oldflp->fl4_src); | 2539 | dev_out = __ip_dev_find(net, oldflp->fl4_src, false); |
2539 | if (dev_out == NULL) | 2540 | if (dev_out == NULL) |
2540 | goto out; | 2541 | goto out; |
2541 | 2542 | ||
@@ -2560,26 +2561,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2560 | 2561 | ||
2561 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { | 2562 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { |
2562 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2563 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2563 | dev_out = ip_dev_find(net, oldflp->fl4_src); | 2564 | if (!__ip_dev_find(net, oldflp->fl4_src, false)) |
2564 | if (dev_out == NULL) | ||
2565 | goto out; | 2565 | goto out; |
2566 | dev_put(dev_out); | ||
2567 | dev_out = NULL; | ||
2568 | } | 2566 | } |
2569 | } | 2567 | } |
2570 | 2568 | ||
2571 | 2569 | ||
2572 | if (oldflp->oif) { | 2570 | if (oldflp->oif) { |
2573 | dev_out = dev_get_by_index(net, oldflp->oif); | 2571 | dev_out = dev_get_by_index_rcu(net, oldflp->oif); |
2574 | err = -ENODEV; | 2572 | err = -ENODEV; |
2575 | if (dev_out == NULL) | 2573 | if (dev_out == NULL) |
2576 | goto out; | 2574 | goto out; |
2577 | 2575 | ||
2578 | /* RACE: Check return value of inet_select_addr instead. */ | 2576 | /* RACE: Check return value of inet_select_addr instead. */ |
2579 | if (rcu_dereference_raw(dev_out->ip_ptr) == NULL) { | 2577 | if (rcu_dereference(dev_out->ip_ptr) == NULL) |
2580 | dev_put(dev_out); | ||
2581 | goto out; /* Wrong error code */ | 2578 | goto out; /* Wrong error code */ |
2582 | } | ||
2583 | 2579 | ||
2584 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2580 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || |
2585 | oldflp->fl4_dst == htonl(0xFFFFFFFF)) { | 2581 | oldflp->fl4_dst == htonl(0xFFFFFFFF)) { |
@@ -2602,10 +2598,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2602 | fl.fl4_dst = fl.fl4_src; | 2598 | fl.fl4_dst = fl.fl4_src; |
2603 | if (!fl.fl4_dst) | 2599 | if (!fl.fl4_dst) |
2604 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); | 2600 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); |
2605 | if (dev_out) | ||
2606 | dev_put(dev_out); | ||
2607 | dev_out = net->loopback_dev; | 2601 | dev_out = net->loopback_dev; |
2608 | dev_hold(dev_out); | ||
2609 | fl.oif = net->loopback_dev->ifindex; | 2602 | fl.oif = net->loopback_dev->ifindex; |
2610 | res.type = RTN_LOCAL; | 2603 | res.type = RTN_LOCAL; |
2611 | flags |= RTCF_LOCAL; | 2604 | flags |= RTCF_LOCAL; |
@@ -2639,8 +2632,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2639 | res.type = RTN_UNICAST; | 2632 | res.type = RTN_UNICAST; |
2640 | goto make_route; | 2633 | goto make_route; |
2641 | } | 2634 | } |
2642 | if (dev_out) | ||
2643 | dev_put(dev_out); | ||
2644 | err = -ENETUNREACH; | 2635 | err = -ENETUNREACH; |
2645 | goto out; | 2636 | goto out; |
2646 | } | 2637 | } |
@@ -2649,10 +2640,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2649 | if (res.type == RTN_LOCAL) { | 2640 | if (res.type == RTN_LOCAL) { |
2650 | if (!fl.fl4_src) | 2641 | if (!fl.fl4_src) |
2651 | fl.fl4_src = fl.fl4_dst; | 2642 | fl.fl4_src = fl.fl4_dst; |
2652 | if (dev_out) | ||
2653 | dev_put(dev_out); | ||
2654 | dev_out = net->loopback_dev; | 2643 | dev_out = net->loopback_dev; |
2655 | dev_hold(dev_out); | ||
2656 | fl.oif = dev_out->ifindex; | 2644 | fl.oif = dev_out->ifindex; |
2657 | if (res.fi) | 2645 | if (res.fi) |
2658 | fib_info_put(res.fi); | 2646 | fib_info_put(res.fi); |
@@ -2672,28 +2660,23 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2672 | if (!fl.fl4_src) | 2660 | if (!fl.fl4_src) |
2673 | fl.fl4_src = FIB_RES_PREFSRC(res); | 2661 | fl.fl4_src = FIB_RES_PREFSRC(res); |
2674 | 2662 | ||
2675 | if (dev_out) | ||
2676 | dev_put(dev_out); | ||
2677 | dev_out = FIB_RES_DEV(res); | 2663 | dev_out = FIB_RES_DEV(res); |
2678 | dev_hold(dev_out); | ||
2679 | fl.oif = dev_out->ifindex; | 2664 | fl.oif = dev_out->ifindex; |
2680 | 2665 | ||
2681 | 2666 | ||
2682 | make_route: | 2667 | make_route: |
2683 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2668 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); |
2684 | 2669 | ||
2685 | |||
2686 | if (free_res) | 2670 | if (free_res) |
2687 | fib_res_put(&res); | 2671 | fib_res_put(&res); |
2688 | if (dev_out) | ||
2689 | dev_put(dev_out); | ||
2690 | out: return err; | 2672 | out: return err; |
2691 | } | 2673 | } |
2692 | 2674 | ||
2693 | int __ip_route_output_key(struct net *net, struct rtable **rp, | 2675 | int __ip_route_output_key(struct net *net, struct rtable **rp, |
2694 | const struct flowi *flp) | 2676 | const struct flowi *flp) |
2695 | { | 2677 | { |
2696 | unsigned hash; | 2678 | unsigned int hash; |
2679 | int res; | ||
2697 | struct rtable *rth; | 2680 | struct rtable *rth; |
2698 | 2681 | ||
2699 | if (!rt_caching(net)) | 2682 | if (!rt_caching(net)) |
@@ -2724,7 +2707,10 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2724 | rcu_read_unlock_bh(); | 2707 | rcu_read_unlock_bh(); |
2725 | 2708 | ||
2726 | slow_output: | 2709 | slow_output: |
2727 | return ip_route_output_slow(net, rp, flp); | 2710 | rcu_read_lock(); |
2711 | res = ip_route_output_slow(net, rp, flp); | ||
2712 | rcu_read_unlock(); | ||
2713 | return res; | ||
2728 | } | 2714 | } |
2729 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2715 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2730 | 2716 | ||