diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-05 06:41:36 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-05 23:39:38 -0400 |
commit | ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 (patch) | |
tree | 395e50547ffccc6b73e04a44190eb4b4f2d2316b /net | |
parent | c2952c314b4fe61820ba8fd6c949eed636140d52 (diff) |
fib: RCU conversion of fib_lookup()
fib_lookup() converted to be called in RCU protected context, no
reference taken and released on a contended cache line (fib_clntref)
fib_table_lookup() and fib_semantic_match() get an additional parameter.
struct fib_info gets an rcu_head field, and is freed after an rcu grace
period.
Stress test :
(Sending 160.000.000 UDP frames on same neighbour,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_HASH) (about same results for FIB_TRIE)
Before patch :
real 1m31.199s
user 0m13.761s
sys 23m24.780s
After patch:
real 1m5.375s
user 0m14.997s
sys 15m50.115s
Before patch Profile :
13044.00 15.4% __ip_route_output_key vmlinux
8438.00 10.0% dst_destroy vmlinux
5983.00 7.1% fib_semantic_match vmlinux
5410.00 6.4% fib_rules_lookup vmlinux
4803.00 5.7% neigh_lookup vmlinux
4420.00 5.2% _raw_spin_lock vmlinux
3883.00 4.6% rt_set_nexthop vmlinux
3261.00 3.9% _raw_read_lock vmlinux
2794.00 3.3% fib_table_lookup vmlinux
2374.00 2.8% neigh_resolve_output vmlinux
2153.00 2.5% dst_alloc vmlinux
1502.00 1.8% _raw_read_lock_bh vmlinux
1484.00 1.8% kmem_cache_alloc vmlinux
1407.00 1.7% eth_header vmlinux
1406.00 1.7% ipv4_dst_destroy vmlinux
1298.00 1.5% __copy_from_user_ll vmlinux
1174.00 1.4% dev_queue_xmit vmlinux
1000.00 1.2% ip_output vmlinux
After patch Profile :
13712.00 15.8% dst_destroy vmlinux
8548.00 9.9% __ip_route_output_key vmlinux
7017.00 8.1% neigh_lookup vmlinux
4554.00 5.3% fib_semantic_match vmlinux
4067.00 4.7% _raw_read_lock vmlinux
3491.00 4.0% dst_alloc vmlinux
3186.00 3.7% neigh_resolve_output vmlinux
3103.00 3.6% fib_table_lookup vmlinux
2098.00 2.4% _raw_read_lock_bh vmlinux
2081.00 2.4% kmem_cache_alloc vmlinux
2013.00 2.3% _raw_spin_lock vmlinux
1763.00 2.0% __copy_from_user_ll vmlinux
1763.00 2.0% ip_output vmlinux
1761.00 2.0% ipv4_dst_destroy vmlinux
1631.00 1.9% eth_header vmlinux
1440.00 1.7% _raw_read_unlock_bh vmlinux
Reference results, if IP route cache is enabled :
real 0m29.718s
user 0m10.845s
sys 7m37.341s
25213.00 29.5% __ip_route_output_key vmlinux
9011.00 10.5% dst_release vmlinux
4817.00 5.6% ip_push_pending_frames vmlinux
4232.00 5.0% ip_finish_output vmlinux
3940.00 4.6% udp_sendmsg vmlinux
3730.00 4.4% __copy_from_user_ll vmlinux
3716.00 4.4% ip_route_output_flow vmlinux
2451.00 2.9% __xfrm_lookup vmlinux
2221.00 2.6% ip_append_data vmlinux
1718.00 2.0% _raw_spin_lock_bh vmlinux
1655.00 1.9% __alloc_skb vmlinux
1572.00 1.8% sock_wfree vmlinux
1345.00 1.6% kfree vmlinux
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/fib_rules.c | 3 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 27 | ||||
-rw-r--r-- | net/ipv4/fib_hash.c | 5 | ||||
-rw-r--r-- | net/ipv4/fib_lookup.h | 2 | ||||
-rw-r--r-- | net/ipv4/fib_rules.c | 3 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 21 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 10 | ||||
-rw-r--r-- | net/ipv4/route.c | 59 |
8 files changed, 66 insertions, 64 deletions
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index cfb7d25c172d..21698f8c49ee 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
@@ -225,7 +225,8 @@ jumped: | |||
225 | err = ops->action(rule, fl, flags, arg); | 225 | err = ops->action(rule, fl, flags, arg); |
226 | 226 | ||
227 | if (err != -EAGAIN) { | 227 | if (err != -EAGAIN) { |
228 | if (likely(atomic_inc_not_zero(&rule->refcnt))) { | 228 | if ((arg->flags & FIB_LOOKUP_NOREF) || |
229 | likely(atomic_inc_not_zero(&rule->refcnt))) { | ||
229 | arg->rule = rule; | 230 | arg->rule = rule; |
230 | goto out; | 231 | goto out; |
231 | } | 232 | } |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b05c23b05a9f..919f2ad19b49 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -168,8 +168,11 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | |||
168 | struct fib_result res = { 0 }; | 168 | struct fib_result res = { 0 }; |
169 | struct net_device *dev = NULL; | 169 | struct net_device *dev = NULL; |
170 | 170 | ||
171 | if (fib_lookup(net, &fl, &res)) | 171 | rcu_read_lock(); |
172 | if (fib_lookup(net, &fl, &res)) { | ||
173 | rcu_read_unlock(); | ||
172 | return NULL; | 174 | return NULL; |
175 | } | ||
173 | if (res.type != RTN_LOCAL) | 176 | if (res.type != RTN_LOCAL) |
174 | goto out; | 177 | goto out; |
175 | dev = FIB_RES_DEV(res); | 178 | dev = FIB_RES_DEV(res); |
@@ -177,7 +180,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | |||
177 | if (dev && devref) | 180 | if (dev && devref) |
178 | dev_hold(dev); | 181 | dev_hold(dev); |
179 | out: | 182 | out: |
180 | fib_res_put(&res); | 183 | rcu_read_unlock(); |
181 | return dev; | 184 | return dev; |
182 | } | 185 | } |
183 | EXPORT_SYMBOL(__ip_dev_find); | 186 | EXPORT_SYMBOL(__ip_dev_find); |
@@ -207,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
207 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | 210 | local_table = fib_get_table(net, RT_TABLE_LOCAL); |
208 | if (local_table) { | 211 | if (local_table) { |
209 | ret = RTN_UNICAST; | 212 | ret = RTN_UNICAST; |
210 | if (!fib_table_lookup(local_table, &fl, &res)) { | 213 | rcu_read_lock(); |
214 | if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | ||
211 | if (!dev || dev == res.fi->fib_dev) | 215 | if (!dev || dev == res.fi->fib_dev) |
212 | ret = res.type; | 216 | ret = res.type; |
213 | fib_res_put(&res); | ||
214 | } | 217 | } |
218 | rcu_read_unlock(); | ||
215 | } | 219 | } |
216 | return ret; | 220 | return ret; |
217 | } | 221 | } |
@@ -235,6 +239,7 @@ EXPORT_SYMBOL(inet_dev_addr_type); | |||
235 | * - figure out what "logical" interface this packet arrived | 239 | * - figure out what "logical" interface this packet arrived |
236 | * and calculate "specific destination" address. | 240 | * and calculate "specific destination" address. |
237 | * - check, that packet arrived from expected physical interface. | 241 | * - check, that packet arrived from expected physical interface. |
242 | * called with rcu_read_lock() | ||
238 | */ | 243 | */ |
239 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | 244 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, |
240 | struct net_device *dev, __be32 *spec_dst, | 245 | struct net_device *dev, __be32 *spec_dst, |
@@ -259,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
259 | struct net *net; | 264 | struct net *net; |
260 | 265 | ||
261 | no_addr = rpf = accept_local = 0; | 266 | no_addr = rpf = accept_local = 0; |
262 | rcu_read_lock(); | ||
263 | in_dev = __in_dev_get_rcu(dev); | 267 | in_dev = __in_dev_get_rcu(dev); |
264 | if (in_dev) { | 268 | if (in_dev) { |
265 | no_addr = in_dev->ifa_list == NULL; | 269 | no_addr = in_dev->ifa_list == NULL; |
@@ -268,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
268 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | 272 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) |
269 | fl.mark = 0; | 273 | fl.mark = 0; |
270 | } | 274 | } |
271 | rcu_read_unlock(); | ||
272 | 275 | ||
273 | if (in_dev == NULL) | 276 | if (in_dev == NULL) |
274 | goto e_inval; | 277 | goto e_inval; |
@@ -278,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
278 | goto last_resort; | 281 | goto last_resort; |
279 | if (res.type != RTN_UNICAST) { | 282 | if (res.type != RTN_UNICAST) { |
280 | if (res.type != RTN_LOCAL || !accept_local) | 283 | if (res.type != RTN_LOCAL || !accept_local) |
281 | goto e_inval_res; | 284 | goto e_inval; |
282 | } | 285 | } |
283 | *spec_dst = FIB_RES_PREFSRC(res); | 286 | *spec_dst = FIB_RES_PREFSRC(res); |
284 | fib_combine_itag(itag, &res); | 287 | fib_combine_itag(itag, &res); |
@@ -299,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
299 | #endif | 302 | #endif |
300 | if (dev_match) { | 303 | if (dev_match) { |
301 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 304 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
302 | fib_res_put(&res); | ||
303 | return ret; | 305 | return ret; |
304 | } | 306 | } |
305 | fib_res_put(&res); | ||
306 | if (no_addr) | 307 | if (no_addr) |
307 | goto last_resort; | 308 | goto last_resort; |
308 | if (rpf == 1) | 309 | if (rpf == 1) |
@@ -315,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
315 | *spec_dst = FIB_RES_PREFSRC(res); | 316 | *spec_dst = FIB_RES_PREFSRC(res); |
316 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 317 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
317 | } | 318 | } |
318 | fib_res_put(&res); | ||
319 | } | 319 | } |
320 | return ret; | 320 | return ret; |
321 | 321 | ||
@@ -326,8 +326,6 @@ last_resort: | |||
326 | *itag = 0; | 326 | *itag = 0; |
327 | return 0; | 327 | return 0; |
328 | 328 | ||
329 | e_inval_res: | ||
330 | fib_res_put(&res); | ||
331 | e_inval: | 329 | e_inval: |
332 | return -EINVAL; | 330 | return -EINVAL; |
333 | e_rpf: | 331 | e_rpf: |
@@ -873,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) | |||
873 | local_bh_disable(); | 871 | local_bh_disable(); |
874 | 872 | ||
875 | frn->tb_id = tb->tb_id; | 873 | frn->tb_id = tb->tb_id; |
876 | frn->err = fib_table_lookup(tb, &fl, &res); | 874 | rcu_read_lock(); |
875 | frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); | ||
877 | 876 | ||
878 | if (!frn->err) { | 877 | if (!frn->err) { |
879 | frn->prefixlen = res.prefixlen; | 878 | frn->prefixlen = res.prefixlen; |
880 | frn->nh_sel = res.nh_sel; | 879 | frn->nh_sel = res.nh_sel; |
881 | frn->type = res.type; | 880 | frn->type = res.type; |
882 | frn->scope = res.scope; | 881 | frn->scope = res.scope; |
883 | fib_res_put(&res); | ||
884 | } | 882 | } |
883 | rcu_read_unlock(); | ||
885 | local_bh_enable(); | 884 | local_bh_enable(); |
886 | } | 885 | } |
887 | } | 886 | } |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 4ed7e0dea1bc..83cca68e259c 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -244,7 +244,8 @@ fn_new_zone(struct fn_hash *table, int z) | |||
244 | } | 244 | } |
245 | 245 | ||
246 | int fib_table_lookup(struct fib_table *tb, | 246 | int fib_table_lookup(struct fib_table *tb, |
247 | const struct flowi *flp, struct fib_result *res) | 247 | const struct flowi *flp, struct fib_result *res, |
248 | int fib_flags) | ||
248 | { | 249 | { |
249 | int err; | 250 | int err; |
250 | struct fn_zone *fz; | 251 | struct fn_zone *fz; |
@@ -264,7 +265,7 @@ int fib_table_lookup(struct fib_table *tb, | |||
264 | 265 | ||
265 | err = fib_semantic_match(&f->fn_alias, | 266 | err = fib_semantic_match(&f->fn_alias, |
266 | flp, res, | 267 | flp, res, |
267 | fz->fz_order); | 268 | fz->fz_order, fib_flags); |
268 | if (err <= 0) | 269 | if (err <= 0) |
269 | goto out; | 270 | goto out; |
270 | } | 271 | } |
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 637b133973bd..b9c9a9f2aee5 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h | |||
@@ -22,7 +22,7 @@ struct fib_alias { | |||
22 | /* Exported by fib_semantics.c */ | 22 | /* Exported by fib_semantics.c */ |
23 | extern int fib_semantic_match(struct list_head *head, | 23 | extern int fib_semantic_match(struct list_head *head, |
24 | const struct flowi *flp, | 24 | const struct flowi *flp, |
25 | struct fib_result *res, int prefixlen); | 25 | struct fib_result *res, int prefixlen, int fib_flags); |
26 | extern void fib_release_info(struct fib_info *); | 26 | extern void fib_release_info(struct fib_info *); |
27 | extern struct fib_info *fib_create_info(struct fib_config *cfg); | 27 | extern struct fib_info *fib_create_info(struct fib_config *cfg); |
28 | extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); | 28 | extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 32300521e32c..7981a24f5c7b 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -57,6 +57,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | |||
57 | { | 57 | { |
58 | struct fib_lookup_arg arg = { | 58 | struct fib_lookup_arg arg = { |
59 | .result = res, | 59 | .result = res, |
60 | .flags = FIB_LOOKUP_NOREF, | ||
60 | }; | 61 | }; |
61 | int err; | 62 | int err; |
62 | 63 | ||
@@ -94,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, | |||
94 | if (!tbl) | 95 | if (!tbl) |
95 | goto errout; | 96 | goto errout; |
96 | 97 | ||
97 | err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); | 98 | err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); |
98 | if (err > 0) | 99 | if (err > 0) |
99 | err = -EAGAIN; | 100 | err = -EAGAIN; |
100 | errout: | 101 | errout: |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ba52f399a898..0f80dfc2f7fb 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -148,6 +148,13 @@ static const struct | |||
148 | 148 | ||
149 | /* Release a nexthop info record */ | 149 | /* Release a nexthop info record */ |
150 | 150 | ||
151 | static void free_fib_info_rcu(struct rcu_head *head) | ||
152 | { | ||
153 | struct fib_info *fi = container_of(head, struct fib_info, rcu); | ||
154 | |||
155 | kfree(fi); | ||
156 | } | ||
157 | |||
151 | void free_fib_info(struct fib_info *fi) | 158 | void free_fib_info(struct fib_info *fi) |
152 | { | 159 | { |
153 | if (fi->fib_dead == 0) { | 160 | if (fi->fib_dead == 0) { |
@@ -161,7 +168,7 @@ void free_fib_info(struct fib_info *fi) | |||
161 | } endfor_nexthops(fi); | 168 | } endfor_nexthops(fi); |
162 | fib_info_cnt--; | 169 | fib_info_cnt--; |
163 | release_net(fi->fib_net); | 170 | release_net(fi->fib_net); |
164 | kfree(fi); | 171 | call_rcu(&fi->rcu, free_fib_info_rcu); |
165 | } | 172 | } |
166 | 173 | ||
167 | void fib_release_info(struct fib_info *fi) | 174 | void fib_release_info(struct fib_info *fi) |
@@ -553,6 +560,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
553 | nh->nh_scope = RT_SCOPE_LINK; | 560 | nh->nh_scope = RT_SCOPE_LINK; |
554 | return 0; | 561 | return 0; |
555 | } | 562 | } |
563 | rcu_read_lock(); | ||
556 | { | 564 | { |
557 | struct flowi fl = { | 565 | struct flowi fl = { |
558 | .nl_u = { | 566 | .nl_u = { |
@@ -568,8 +576,10 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
568 | if (fl.fl4_scope < RT_SCOPE_LINK) | 576 | if (fl.fl4_scope < RT_SCOPE_LINK) |
569 | fl.fl4_scope = RT_SCOPE_LINK; | 577 | fl.fl4_scope = RT_SCOPE_LINK; |
570 | err = fib_lookup(net, &fl, &res); | 578 | err = fib_lookup(net, &fl, &res); |
571 | if (err) | 579 | if (err) { |
580 | rcu_read_unlock(); | ||
572 | return err; | 581 | return err; |
582 | } | ||
573 | } | 583 | } |
574 | err = -EINVAL; | 584 | err = -EINVAL; |
575 | if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | 585 | if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) |
@@ -585,7 +595,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
585 | goto out; | 595 | goto out; |
586 | err = 0; | 596 | err = 0; |
587 | out: | 597 | out: |
588 | fib_res_put(&res); | 598 | rcu_read_unlock(); |
589 | return err; | 599 | return err; |
590 | } else { | 600 | } else { |
591 | struct in_device *in_dev; | 601 | struct in_device *in_dev; |
@@ -879,7 +889,7 @@ failure: | |||
879 | 889 | ||
880 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ | 890 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ |
881 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, | 891 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, |
882 | struct fib_result *res, int prefixlen) | 892 | struct fib_result *res, int prefixlen, int fib_flags) |
883 | { | 893 | { |
884 | struct fib_alias *fa; | 894 | struct fib_alias *fa; |
885 | int nh_sel = 0; | 895 | int nh_sel = 0; |
@@ -943,7 +953,8 @@ out_fill_res: | |||
943 | res->type = fa->fa_type; | 953 | res->type = fa->fa_type; |
944 | res->scope = fa->fa_scope; | 954 | res->scope = fa->fa_scope; |
945 | res->fi = fa->fa_info; | 955 | res->fi = fa->fa_info; |
946 | atomic_inc(&res->fi->fib_clntref); | 956 | if (!(fib_flags & FIB_LOOKUP_NOREF)) |
957 | atomic_inc(&res->fi->fib_clntref); | ||
947 | return 0; | 958 | return 0; |
948 | } | 959 | } |
949 | 960 | ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a96e5ec211a0..271c89bdf049 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -1342,7 +1342,7 @@ err: | |||
1342 | /* should be called with rcu_read_lock */ | 1342 | /* should be called with rcu_read_lock */ |
1343 | static int check_leaf(struct trie *t, struct leaf *l, | 1343 | static int check_leaf(struct trie *t, struct leaf *l, |
1344 | t_key key, const struct flowi *flp, | 1344 | t_key key, const struct flowi *flp, |
1345 | struct fib_result *res) | 1345 | struct fib_result *res, int fib_flags) |
1346 | { | 1346 | { |
1347 | struct leaf_info *li; | 1347 | struct leaf_info *li; |
1348 | struct hlist_head *hhead = &l->list; | 1348 | struct hlist_head *hhead = &l->list; |
@@ -1356,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l, | |||
1356 | if (l->key != (key & ntohl(mask))) | 1356 | if (l->key != (key & ntohl(mask))) |
1357 | continue; | 1357 | continue; |
1358 | 1358 | ||
1359 | err = fib_semantic_match(&li->falh, flp, res, plen); | 1359 | err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); |
1360 | 1360 | ||
1361 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1361 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1362 | if (err <= 0) | 1362 | if (err <= 0) |
@@ -1372,7 +1372,7 @@ static int check_leaf(struct trie *t, struct leaf *l, | |||
1372 | } | 1372 | } |
1373 | 1373 | ||
1374 | int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | 1374 | int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, |
1375 | struct fib_result *res) | 1375 | struct fib_result *res, int fib_flags) |
1376 | { | 1376 | { |
1377 | struct trie *t = (struct trie *) tb->tb_data; | 1377 | struct trie *t = (struct trie *) tb->tb_data; |
1378 | int ret; | 1378 | int ret; |
@@ -1399,7 +1399,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1399 | 1399 | ||
1400 | /* Just a leaf? */ | 1400 | /* Just a leaf? */ |
1401 | if (IS_LEAF(n)) { | 1401 | if (IS_LEAF(n)) { |
1402 | ret = check_leaf(t, (struct leaf *)n, key, flp, res); | 1402 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); |
1403 | goto found; | 1403 | goto found; |
1404 | } | 1404 | } |
1405 | 1405 | ||
@@ -1424,7 +1424,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1424 | } | 1424 | } |
1425 | 1425 | ||
1426 | if (IS_LEAF(n)) { | 1426 | if (IS_LEAF(n)) { |
1427 | ret = check_leaf(t, (struct leaf *)n, key, flp, res); | 1427 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); |
1428 | if (ret > 0) | 1428 | if (ret > 0) |
1429 | goto backtrace; | 1429 | goto backtrace; |
1430 | goto found; | 1430 | goto found; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 04e0df82b88c..7864d0c48968 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1773,12 +1773,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1773 | 1773 | ||
1774 | if (rt->fl.iif == 0) | 1774 | if (rt->fl.iif == 0) |
1775 | src = rt->rt_src; | 1775 | src = rt->rt_src; |
1776 | else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { | 1776 | else { |
1777 | src = FIB_RES_PREFSRC(res); | 1777 | rcu_read_lock(); |
1778 | fib_res_put(&res); | 1778 | if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) |
1779 | } else | 1779 | src = FIB_RES_PREFSRC(res); |
1780 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1780 | else |
1781 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | ||
1781 | RT_SCOPE_UNIVERSE); | 1782 | RT_SCOPE_UNIVERSE); |
1783 | rcu_read_unlock(); | ||
1784 | } | ||
1782 | memcpy(addr, &src, 4); | 1785 | memcpy(addr, &src, 4); |
1783 | } | 1786 | } |
1784 | 1787 | ||
@@ -2081,6 +2084,7 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2081 | * Such approach solves two big problems: | 2084 | * Such approach solves two big problems: |
2082 | * 1. Not simplex devices are handled properly. | 2085 | * 1. Not simplex devices are handled properly. |
2083 | * 2. IP spoofing attempts are filtered with 100% of guarantee. | 2086 | * 2. IP spoofing attempts are filtered with 100% of guarantee. |
2087 | * called with rcu_read_lock() | ||
2084 | */ | 2088 | */ |
2085 | 2089 | ||
2086 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2090 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -2102,7 +2106,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2102 | unsigned hash; | 2106 | unsigned hash; |
2103 | __be32 spec_dst; | 2107 | __be32 spec_dst; |
2104 | int err = -EINVAL; | 2108 | int err = -EINVAL; |
2105 | int free_res = 0; | ||
2106 | struct net * net = dev_net(dev); | 2109 | struct net * net = dev_net(dev); |
2107 | 2110 | ||
2108 | /* IP on this device is disabled. */ | 2111 | /* IP on this device is disabled. */ |
@@ -2134,12 +2137,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2134 | /* | 2137 | /* |
2135 | * Now we are ready to route packet. | 2138 | * Now we are ready to route packet. |
2136 | */ | 2139 | */ |
2137 | if ((err = fib_lookup(net, &fl, &res)) != 0) { | 2140 | err = fib_lookup(net, &fl, &res); |
2141 | if (err != 0) { | ||
2138 | if (!IN_DEV_FORWARD(in_dev)) | 2142 | if (!IN_DEV_FORWARD(in_dev)) |
2139 | goto e_hostunreach; | 2143 | goto e_hostunreach; |
2140 | goto no_route; | 2144 | goto no_route; |
2141 | } | 2145 | } |
2142 | free_res = 1; | ||
2143 | 2146 | ||
2144 | RT_CACHE_STAT_INC(in_slow_tot); | 2147 | RT_CACHE_STAT_INC(in_slow_tot); |
2145 | 2148 | ||
@@ -2148,8 +2151,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2148 | 2151 | ||
2149 | if (res.type == RTN_LOCAL) { | 2152 | if (res.type == RTN_LOCAL) { |
2150 | err = fib_validate_source(saddr, daddr, tos, | 2153 | err = fib_validate_source(saddr, daddr, tos, |
2151 | net->loopback_dev->ifindex, | 2154 | net->loopback_dev->ifindex, |
2152 | dev, &spec_dst, &itag, skb->mark); | 2155 | dev, &spec_dst, &itag, skb->mark); |
2153 | if (err < 0) | 2156 | if (err < 0) |
2154 | goto martian_source_keep_err; | 2157 | goto martian_source_keep_err; |
2155 | if (err) | 2158 | if (err) |
@@ -2164,9 +2167,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2164 | goto martian_destination; | 2167 | goto martian_destination; |
2165 | 2168 | ||
2166 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2169 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); |
2167 | done: | ||
2168 | if (free_res) | ||
2169 | fib_res_put(&res); | ||
2170 | out: return err; | 2170 | out: return err; |
2171 | 2171 | ||
2172 | brd_input: | 2172 | brd_input: |
@@ -2226,7 +2226,7 @@ local_input: | |||
2226 | rth->rt_type = res.type; | 2226 | rth->rt_type = res.type; |
2227 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2227 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); |
2228 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); | 2228 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); |
2229 | goto done; | 2229 | goto out; |
2230 | 2230 | ||
2231 | no_route: | 2231 | no_route: |
2232 | RT_CACHE_STAT_INC(in_no_route); | 2232 | RT_CACHE_STAT_INC(in_no_route); |
@@ -2249,21 +2249,21 @@ martian_destination: | |||
2249 | 2249 | ||
2250 | e_hostunreach: | 2250 | e_hostunreach: |
2251 | err = -EHOSTUNREACH; | 2251 | err = -EHOSTUNREACH; |
2252 | goto done; | 2252 | goto out; |
2253 | 2253 | ||
2254 | e_inval: | 2254 | e_inval: |
2255 | err = -EINVAL; | 2255 | err = -EINVAL; |
2256 | goto done; | 2256 | goto out; |
2257 | 2257 | ||
2258 | e_nobufs: | 2258 | e_nobufs: |
2259 | err = -ENOBUFS; | 2259 | err = -ENOBUFS; |
2260 | goto done; | 2260 | goto out; |
2261 | 2261 | ||
2262 | martian_source: | 2262 | martian_source: |
2263 | err = -EINVAL; | 2263 | err = -EINVAL; |
2264 | martian_source_keep_err: | 2264 | martian_source_keep_err: |
2265 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 2265 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
2266 | goto done; | 2266 | goto out; |
2267 | } | 2267 | } |
2268 | 2268 | ||
2269 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2269 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -2349,6 +2349,7 @@ skip_cache: | |||
2349 | } | 2349 | } |
2350 | EXPORT_SYMBOL(ip_route_input_common); | 2350 | EXPORT_SYMBOL(ip_route_input_common); |
2351 | 2351 | ||
2352 | /* called with rcu_read_lock() */ | ||
2352 | static int __mkroute_output(struct rtable **result, | 2353 | static int __mkroute_output(struct rtable **result, |
2353 | struct fib_result *res, | 2354 | struct fib_result *res, |
2354 | const struct flowi *fl, | 2355 | const struct flowi *fl, |
@@ -2373,18 +2374,13 @@ static int __mkroute_output(struct rtable **result, | |||
2373 | if (dev_out->flags & IFF_LOOPBACK) | 2374 | if (dev_out->flags & IFF_LOOPBACK) |
2374 | flags |= RTCF_LOCAL; | 2375 | flags |= RTCF_LOCAL; |
2375 | 2376 | ||
2376 | rcu_read_lock(); | ||
2377 | in_dev = __in_dev_get_rcu(dev_out); | 2377 | in_dev = __in_dev_get_rcu(dev_out); |
2378 | if (!in_dev) { | 2378 | if (!in_dev) |
2379 | rcu_read_unlock(); | ||
2380 | return -EINVAL; | 2379 | return -EINVAL; |
2381 | } | 2380 | |
2382 | if (res->type == RTN_BROADCAST) { | 2381 | if (res->type == RTN_BROADCAST) { |
2383 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2382 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2384 | if (res->fi) { | 2383 | res->fi = NULL; |
2385 | fib_info_put(res->fi); | ||
2386 | res->fi = NULL; | ||
2387 | } | ||
2388 | } else if (res->type == RTN_MULTICAST) { | 2384 | } else if (res->type == RTN_MULTICAST) { |
2389 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 2385 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2390 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, | 2386 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, |
@@ -2394,10 +2390,8 @@ static int __mkroute_output(struct rtable **result, | |||
2394 | * default one, but do not gateway in this case. | 2390 | * default one, but do not gateway in this case. |
2395 | * Yes, it is hack. | 2391 | * Yes, it is hack. |
2396 | */ | 2392 | */ |
2397 | if (res->fi && res->prefixlen < 4) { | 2393 | if (res->fi && res->prefixlen < 4) |
2398 | fib_info_put(res->fi); | ||
2399 | res->fi = NULL; | 2394 | res->fi = NULL; |
2400 | } | ||
2401 | } | 2395 | } |
2402 | 2396 | ||
2403 | 2397 | ||
@@ -2467,6 +2461,7 @@ static int __mkroute_output(struct rtable **result, | |||
2467 | return 0; | 2461 | return 0; |
2468 | } | 2462 | } |
2469 | 2463 | ||
2464 | /* called with rcu_read_lock() */ | ||
2470 | static int ip_mkroute_output(struct rtable **rp, | 2465 | static int ip_mkroute_output(struct rtable **rp, |
2471 | struct fib_result *res, | 2466 | struct fib_result *res, |
2472 | const struct flowi *fl, | 2467 | const struct flowi *fl, |
@@ -2509,7 +2504,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2509 | struct fib_result res; | 2504 | struct fib_result res; |
2510 | unsigned int flags = 0; | 2505 | unsigned int flags = 0; |
2511 | struct net_device *dev_out = NULL; | 2506 | struct net_device *dev_out = NULL; |
2512 | int free_res = 0; | ||
2513 | int err; | 2507 | int err; |
2514 | 2508 | ||
2515 | 2509 | ||
@@ -2636,15 +2630,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2636 | err = -ENETUNREACH; | 2630 | err = -ENETUNREACH; |
2637 | goto out; | 2631 | goto out; |
2638 | } | 2632 | } |
2639 | free_res = 1; | ||
2640 | 2633 | ||
2641 | if (res.type == RTN_LOCAL) { | 2634 | if (res.type == RTN_LOCAL) { |
2642 | if (!fl.fl4_src) | 2635 | if (!fl.fl4_src) |
2643 | fl.fl4_src = fl.fl4_dst; | 2636 | fl.fl4_src = fl.fl4_dst; |
2644 | dev_out = net->loopback_dev; | 2637 | dev_out = net->loopback_dev; |
2645 | fl.oif = dev_out->ifindex; | 2638 | fl.oif = dev_out->ifindex; |
2646 | if (res.fi) | ||
2647 | fib_info_put(res.fi); | ||
2648 | res.fi = NULL; | 2639 | res.fi = NULL; |
2649 | flags |= RTCF_LOCAL; | 2640 | flags |= RTCF_LOCAL; |
2650 | goto make_route; | 2641 | goto make_route; |
@@ -2668,8 +2659,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2668 | make_route: | 2659 | make_route: |
2669 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2660 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); |
2670 | 2661 | ||
2671 | if (free_res) | ||
2672 | fib_res_put(&res); | ||
2673 | out: return err; | 2662 | out: return err; |
2674 | } | 2663 | } |
2675 | 2664 | ||