diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-05 06:41:36 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-05 23:39:38 -0400 |
commit | ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 (patch) | |
tree | 395e50547ffccc6b73e04a44190eb4b4f2d2316b /net/ipv4/fib_frontend.c | |
parent | c2952c314b4fe61820ba8fd6c949eed636140d52 (diff) |
fib: RCU conversion of fib_lookup()
fib_lookup() converted to be called in RCU protected context, no
reference taken and released on a contended cache line (fib_clntref)
fib_table_lookup() and fib_semantic_match() get an additional parameter.
struct fib_info gets an rcu_head field, and is freed after an rcu grace
period.
Stress test :
(Sending 160.000.000 UDP frames on same neighbour,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_HASH) (about same results for FIB_TRIE)
Before patch :
real 1m31.199s
user 0m13.761s
sys 23m24.780s
After patch:
real 1m5.375s
user 0m14.997s
sys 15m50.115s
Before patch Profile :
13044.00 15.4% __ip_route_output_key vmlinux
8438.00 10.0% dst_destroy vmlinux
5983.00 7.1% fib_semantic_match vmlinux
5410.00 6.4% fib_rules_lookup vmlinux
4803.00 5.7% neigh_lookup vmlinux
4420.00 5.2% _raw_spin_lock vmlinux
3883.00 4.6% rt_set_nexthop vmlinux
3261.00 3.9% _raw_read_lock vmlinux
2794.00 3.3% fib_table_lookup vmlinux
2374.00 2.8% neigh_resolve_output vmlinux
2153.00 2.5% dst_alloc vmlinux
1502.00 1.8% _raw_read_lock_bh vmlinux
1484.00 1.8% kmem_cache_alloc vmlinux
1407.00 1.7% eth_header vmlinux
1406.00 1.7% ipv4_dst_destroy vmlinux
1298.00 1.5% __copy_from_user_ll vmlinux
1174.00 1.4% dev_queue_xmit vmlinux
1000.00 1.2% ip_output vmlinux
After patch Profile :
13712.00 15.8% dst_destroy vmlinux
8548.00 9.9% __ip_route_output_key vmlinux
7017.00 8.1% neigh_lookup vmlinux
4554.00 5.3% fib_semantic_match vmlinux
4067.00 4.7% _raw_read_lock vmlinux
3491.00 4.0% dst_alloc vmlinux
3186.00 3.7% neigh_resolve_output vmlinux
3103.00 3.6% fib_table_lookup vmlinux
2098.00 2.4% _raw_read_lock_bh vmlinux
2081.00 2.4% kmem_cache_alloc vmlinux
2013.00 2.3% _raw_spin_lock vmlinux
1763.00 2.0% __copy_from_user_ll vmlinux
1763.00 2.0% ip_output vmlinux
1761.00 2.0% ipv4_dst_destroy vmlinux
1631.00 1.9% eth_header vmlinux
1440.00 1.7% _raw_read_unlock_bh vmlinux
Reference results, if IP route cache is enabled :
real 0m29.718s
user 0m10.845s
sys 7m37.341s
25213.00 29.5% __ip_route_output_key vmlinux
9011.00 10.5% dst_release vmlinux
4817.00 5.6% ip_push_pending_frames vmlinux
4232.00 5.0% ip_finish_output vmlinux
3940.00 4.6% udp_sendmsg vmlinux
3730.00 4.4% __copy_from_user_ll vmlinux
3716.00 4.4% ip_route_output_flow vmlinux
2451.00 2.9% __xfrm_lookup vmlinux
2221.00 2.6% ip_append_data vmlinux
1718.00 2.0% _raw_spin_lock_bh vmlinux
1655.00 1.9% __alloc_skb vmlinux
1572.00 1.8% sock_wfree vmlinux
1345.00 1.6% kfree vmlinux
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/fib_frontend.c')
-rw-r--r-- | net/ipv4/fib_frontend.c | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b05c23b05a9f..919f2ad19b49 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -168,8 +168,11 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | |||
168 | struct fib_result res = { 0 }; | 168 | struct fib_result res = { 0 }; |
169 | struct net_device *dev = NULL; | 169 | struct net_device *dev = NULL; |
170 | 170 | ||
171 | if (fib_lookup(net, &fl, &res)) | 171 | rcu_read_lock(); |
172 | if (fib_lookup(net, &fl, &res)) { | ||
173 | rcu_read_unlock(); | ||
172 | return NULL; | 174 | return NULL; |
175 | } | ||
173 | if (res.type != RTN_LOCAL) | 176 | if (res.type != RTN_LOCAL) |
174 | goto out; | 177 | goto out; |
175 | dev = FIB_RES_DEV(res); | 178 | dev = FIB_RES_DEV(res); |
@@ -177,7 +180,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | |||
177 | if (dev && devref) | 180 | if (dev && devref) |
178 | dev_hold(dev); | 181 | dev_hold(dev); |
179 | out: | 182 | out: |
180 | fib_res_put(&res); | 183 | rcu_read_unlock(); |
181 | return dev; | 184 | return dev; |
182 | } | 185 | } |
183 | EXPORT_SYMBOL(__ip_dev_find); | 186 | EXPORT_SYMBOL(__ip_dev_find); |
@@ -207,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
207 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | 210 | local_table = fib_get_table(net, RT_TABLE_LOCAL); |
208 | if (local_table) { | 211 | if (local_table) { |
209 | ret = RTN_UNICAST; | 212 | ret = RTN_UNICAST; |
210 | if (!fib_table_lookup(local_table, &fl, &res)) { | 213 | rcu_read_lock(); |
214 | if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | ||
211 | if (!dev || dev == res.fi->fib_dev) | 215 | if (!dev || dev == res.fi->fib_dev) |
212 | ret = res.type; | 216 | ret = res.type; |
213 | fib_res_put(&res); | ||
214 | } | 217 | } |
218 | rcu_read_unlock(); | ||
215 | } | 219 | } |
216 | return ret; | 220 | return ret; |
217 | } | 221 | } |
@@ -235,6 +239,7 @@ EXPORT_SYMBOL(inet_dev_addr_type); | |||
235 | * - figure out what "logical" interface this packet arrived | 239 | * - figure out what "logical" interface this packet arrived |
236 | * and calculate "specific destination" address. | 240 | * and calculate "specific destination" address. |
237 | * - check, that packet arrived from expected physical interface. | 241 | * - check, that packet arrived from expected physical interface. |
242 | * called with rcu_read_lock() | ||
238 | */ | 243 | */ |
239 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | 244 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, |
240 | struct net_device *dev, __be32 *spec_dst, | 245 | struct net_device *dev, __be32 *spec_dst, |
@@ -259,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
259 | struct net *net; | 264 | struct net *net; |
260 | 265 | ||
261 | no_addr = rpf = accept_local = 0; | 266 | no_addr = rpf = accept_local = 0; |
262 | rcu_read_lock(); | ||
263 | in_dev = __in_dev_get_rcu(dev); | 267 | in_dev = __in_dev_get_rcu(dev); |
264 | if (in_dev) { | 268 | if (in_dev) { |
265 | no_addr = in_dev->ifa_list == NULL; | 269 | no_addr = in_dev->ifa_list == NULL; |
@@ -268,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
268 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | 272 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) |
269 | fl.mark = 0; | 273 | fl.mark = 0; |
270 | } | 274 | } |
271 | rcu_read_unlock(); | ||
272 | 275 | ||
273 | if (in_dev == NULL) | 276 | if (in_dev == NULL) |
274 | goto e_inval; | 277 | goto e_inval; |
@@ -278,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
278 | goto last_resort; | 281 | goto last_resort; |
279 | if (res.type != RTN_UNICAST) { | 282 | if (res.type != RTN_UNICAST) { |
280 | if (res.type != RTN_LOCAL || !accept_local) | 283 | if (res.type != RTN_LOCAL || !accept_local) |
281 | goto e_inval_res; | 284 | goto e_inval; |
282 | } | 285 | } |
283 | *spec_dst = FIB_RES_PREFSRC(res); | 286 | *spec_dst = FIB_RES_PREFSRC(res); |
284 | fib_combine_itag(itag, &res); | 287 | fib_combine_itag(itag, &res); |
@@ -299,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
299 | #endif | 302 | #endif |
300 | if (dev_match) { | 303 | if (dev_match) { |
301 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 304 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
302 | fib_res_put(&res); | ||
303 | return ret; | 305 | return ret; |
304 | } | 306 | } |
305 | fib_res_put(&res); | ||
306 | if (no_addr) | 307 | if (no_addr) |
307 | goto last_resort; | 308 | goto last_resort; |
308 | if (rpf == 1) | 309 | if (rpf == 1) |
@@ -315,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
315 | *spec_dst = FIB_RES_PREFSRC(res); | 316 | *spec_dst = FIB_RES_PREFSRC(res); |
316 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 317 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
317 | } | 318 | } |
318 | fib_res_put(&res); | ||
319 | } | 319 | } |
320 | return ret; | 320 | return ret; |
321 | 321 | ||
@@ -326,8 +326,6 @@ last_resort: | |||
326 | *itag = 0; | 326 | *itag = 0; |
327 | return 0; | 327 | return 0; |
328 | 328 | ||
329 | e_inval_res: | ||
330 | fib_res_put(&res); | ||
331 | e_inval: | 329 | e_inval: |
332 | return -EINVAL; | 330 | return -EINVAL; |
333 | e_rpf: | 331 | e_rpf: |
@@ -873,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) | |||
873 | local_bh_disable(); | 871 | local_bh_disable(); |
874 | 872 | ||
875 | frn->tb_id = tb->tb_id; | 873 | frn->tb_id = tb->tb_id; |
876 | frn->err = fib_table_lookup(tb, &fl, &res); | 874 | rcu_read_lock(); |
875 | frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); | ||
877 | 876 | ||
878 | if (!frn->err) { | 877 | if (!frn->err) { |
879 | frn->prefixlen = res.prefixlen; | 878 | frn->prefixlen = res.prefixlen; |
880 | frn->nh_sel = res.nh_sel; | 879 | frn->nh_sel = res.nh_sel; |
881 | frn->type = res.type; | 880 | frn->type = res.type; |
882 | frn->scope = res.scope; | 881 | frn->scope = res.scope; |
883 | fib_res_put(&res); | ||
884 | } | 882 | } |
883 | rcu_read_unlock(); | ||
885 | local_bh_enable(); | 884 | local_bh_enable(); |
886 | } | 885 | } |
887 | } | 886 | } |