aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-05 06:41:36 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-05 23:39:38 -0400
commitebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 (patch)
tree395e50547ffccc6b73e04a44190eb4b4f2d2316b /net/ipv4/route.c
parentc2952c314b4fe61820ba8fd6c949eed636140d52 (diff)
fib: RCU conversion of fib_lookup()
fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c59
1 files changed, 24 insertions, 35 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 04e0df82b88c..7864d0c48968 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1773,12 +1773,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1773 1773
1774 if (rt->fl.iif == 0) 1774 if (rt->fl.iif == 0)
1775 src = rt->rt_src; 1775 src = rt->rt_src;
1776 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { 1776 else {
1777 src = FIB_RES_PREFSRC(res); 1777 rcu_read_lock();
1778 fib_res_put(&res); 1778 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
1779 } else 1779 src = FIB_RES_PREFSRC(res);
1780 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1780 else
1781 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1781 RT_SCOPE_UNIVERSE); 1782 RT_SCOPE_UNIVERSE);
1783 rcu_read_unlock();
1784 }
1782 memcpy(addr, &src, 4); 1785 memcpy(addr, &src, 4);
1783} 1786}
1784 1787
@@ -2081,6 +2084,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2081 * Such approach solves two big problems: 2084 * Such approach solves two big problems:
2082 * 1. Not simplex devices are handled properly. 2085 * 1. Not simplex devices are handled properly.
2083 * 2. IP spoofing attempts are filtered with 100% of guarantee. 2086 * 2. IP spoofing attempts are filtered with 100% of guarantee.
2087 * called with rcu_read_lock()
2084 */ 2088 */
2085 2089
2086static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2090static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2102,7 +2106,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2102 unsigned hash; 2106 unsigned hash;
2103 __be32 spec_dst; 2107 __be32 spec_dst;
2104 int err = -EINVAL; 2108 int err = -EINVAL;
2105 int free_res = 0;
2106 struct net * net = dev_net(dev); 2109 struct net * net = dev_net(dev);
2107 2110
2108 /* IP on this device is disabled. */ 2111 /* IP on this device is disabled. */
@@ -2134,12 +2137,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2134 /* 2137 /*
2135 * Now we are ready to route packet. 2138 * Now we are ready to route packet.
2136 */ 2139 */
2137 if ((err = fib_lookup(net, &fl, &res)) != 0) { 2140 err = fib_lookup(net, &fl, &res);
2141 if (err != 0) {
2138 if (!IN_DEV_FORWARD(in_dev)) 2142 if (!IN_DEV_FORWARD(in_dev))
2139 goto e_hostunreach; 2143 goto e_hostunreach;
2140 goto no_route; 2144 goto no_route;
2141 } 2145 }
2142 free_res = 1;
2143 2146
2144 RT_CACHE_STAT_INC(in_slow_tot); 2147 RT_CACHE_STAT_INC(in_slow_tot);
2145 2148
@@ -2148,8 +2151,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2148 2151
2149 if (res.type == RTN_LOCAL) { 2152 if (res.type == RTN_LOCAL) {
2150 err = fib_validate_source(saddr, daddr, tos, 2153 err = fib_validate_source(saddr, daddr, tos,
2151 net->loopback_dev->ifindex, 2154 net->loopback_dev->ifindex,
2152 dev, &spec_dst, &itag, skb->mark); 2155 dev, &spec_dst, &itag, skb->mark);
2153 if (err < 0) 2156 if (err < 0)
2154 goto martian_source_keep_err; 2157 goto martian_source_keep_err;
2155 if (err) 2158 if (err)
@@ -2164,9 +2167,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2164 goto martian_destination; 2167 goto martian_destination;
2165 2168
2166 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2169 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2167done:
2168 if (free_res)
2169 fib_res_put(&res);
2170out: return err; 2170out: return err;
2171 2171
2172brd_input: 2172brd_input:
@@ -2226,7 +2226,7 @@ local_input:
2226 rth->rt_type = res.type; 2226 rth->rt_type = res.type;
2227 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2227 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2228 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2228 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
2229 goto done; 2229 goto out;
2230 2230
2231no_route: 2231no_route:
2232 RT_CACHE_STAT_INC(in_no_route); 2232 RT_CACHE_STAT_INC(in_no_route);
@@ -2249,21 +2249,21 @@ martian_destination:
2249 2249
2250e_hostunreach: 2250e_hostunreach:
2251 err = -EHOSTUNREACH; 2251 err = -EHOSTUNREACH;
2252 goto done; 2252 goto out;
2253 2253
2254e_inval: 2254e_inval:
2255 err = -EINVAL; 2255 err = -EINVAL;
2256 goto done; 2256 goto out;
2257 2257
2258e_nobufs: 2258e_nobufs:
2259 err = -ENOBUFS; 2259 err = -ENOBUFS;
2260 goto done; 2260 goto out;
2261 2261
2262martian_source: 2262martian_source:
2263 err = -EINVAL; 2263 err = -EINVAL;
2264martian_source_keep_err: 2264martian_source_keep_err:
2265 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2265 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2266 goto done; 2266 goto out;
2267} 2267}
2268 2268
2269int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2269int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2349,6 +2349,7 @@ skip_cache:
2349} 2349}
2350EXPORT_SYMBOL(ip_route_input_common); 2350EXPORT_SYMBOL(ip_route_input_common);
2351 2351
2352/* called with rcu_read_lock() */
2352static int __mkroute_output(struct rtable **result, 2353static int __mkroute_output(struct rtable **result,
2353 struct fib_result *res, 2354 struct fib_result *res,
2354 const struct flowi *fl, 2355 const struct flowi *fl,
@@ -2373,18 +2374,13 @@ static int __mkroute_output(struct rtable **result,
2373 if (dev_out->flags & IFF_LOOPBACK) 2374 if (dev_out->flags & IFF_LOOPBACK)
2374 flags |= RTCF_LOCAL; 2375 flags |= RTCF_LOCAL;
2375 2376
2376 rcu_read_lock();
2377 in_dev = __in_dev_get_rcu(dev_out); 2377 in_dev = __in_dev_get_rcu(dev_out);
2378 if (!in_dev) { 2378 if (!in_dev)
2379 rcu_read_unlock();
2380 return -EINVAL; 2379 return -EINVAL;
2381 } 2380
2382 if (res->type == RTN_BROADCAST) { 2381 if (res->type == RTN_BROADCAST) {
2383 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2382 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2384 if (res->fi) { 2383 res->fi = NULL;
2385 fib_info_put(res->fi);
2386 res->fi = NULL;
2387 }
2388 } else if (res->type == RTN_MULTICAST) { 2384 } else if (res->type == RTN_MULTICAST) {
2389 flags |= RTCF_MULTICAST | RTCF_LOCAL; 2385 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2390 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2386 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
@@ -2394,10 +2390,8 @@ static int __mkroute_output(struct rtable **result,
2394 * default one, but do not gateway in this case. 2390 * default one, but do not gateway in this case.
2395 * Yes, it is hack. 2391 * Yes, it is hack.
2396 */ 2392 */
2397 if (res->fi && res->prefixlen < 4) { 2393 if (res->fi && res->prefixlen < 4)
2398 fib_info_put(res->fi);
2399 res->fi = NULL; 2394 res->fi = NULL;
2400 }
2401 } 2395 }
2402 2396
2403 2397
@@ -2467,6 +2461,7 @@ static int __mkroute_output(struct rtable **result,
2467 return 0; 2461 return 0;
2468} 2462}
2469 2463
2464/* called with rcu_read_lock() */
2470static int ip_mkroute_output(struct rtable **rp, 2465static int ip_mkroute_output(struct rtable **rp,
2471 struct fib_result *res, 2466 struct fib_result *res,
2472 const struct flowi *fl, 2467 const struct flowi *fl,
@@ -2509,7 +2504,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2509 struct fib_result res; 2504 struct fib_result res;
2510 unsigned int flags = 0; 2505 unsigned int flags = 0;
2511 struct net_device *dev_out = NULL; 2506 struct net_device *dev_out = NULL;
2512 int free_res = 0;
2513 int err; 2507 int err;
2514 2508
2515 2509
@@ -2636,15 +2630,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2636 err = -ENETUNREACH; 2630 err = -ENETUNREACH;
2637 goto out; 2631 goto out;
2638 } 2632 }
2639 free_res = 1;
2640 2633
2641 if (res.type == RTN_LOCAL) { 2634 if (res.type == RTN_LOCAL) {
2642 if (!fl.fl4_src) 2635 if (!fl.fl4_src)
2643 fl.fl4_src = fl.fl4_dst; 2636 fl.fl4_src = fl.fl4_dst;
2644 dev_out = net->loopback_dev; 2637 dev_out = net->loopback_dev;
2645 fl.oif = dev_out->ifindex; 2638 fl.oif = dev_out->ifindex;
2646 if (res.fi)
2647 fib_info_put(res.fi);
2648 res.fi = NULL; 2639 res.fi = NULL;
2649 flags |= RTCF_LOCAL; 2640 flags |= RTCF_LOCAL;
2650 goto make_route; 2641 goto make_route;
@@ -2668,8 +2659,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2668make_route: 2659make_route:
2669 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2660 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2670 2661
2671 if (free_res)
2672 fib_res_put(&res);
2673out: return err; 2662out: return err;
2674} 2663}
2675 2664