diff options
Diffstat (limited to 'net/ipv4/fib_rules.c')
-rw-r--r-- | net/ipv4/fib_rules.c | 140 |
1 files changed, 92 insertions, 48 deletions
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0dd4d06e456d..ec566f3e66c7 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -40,6 +40,8 @@ | |||
40 | #include <linux/skbuff.h> | 40 | #include <linux/skbuff.h> |
41 | #include <linux/netlink.h> | 41 | #include <linux/netlink.h> |
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/list.h> | ||
44 | #include <linux/rcupdate.h> | ||
43 | 45 | ||
44 | #include <net/ip.h> | 46 | #include <net/ip.h> |
45 | #include <net/protocol.h> | 47 | #include <net/protocol.h> |
@@ -52,7 +54,7 @@ | |||
52 | 54 | ||
53 | struct fib_rule | 55 | struct fib_rule |
54 | { | 56 | { |
55 | struct fib_rule *r_next; | 57 | struct hlist_node hlist; |
56 | atomic_t r_clntref; | 58 | atomic_t r_clntref; |
57 | u32 r_preference; | 59 | u32 r_preference; |
58 | unsigned char r_table; | 60 | unsigned char r_table; |
@@ -75,6 +77,7 @@ struct fib_rule | |||
75 | #endif | 77 | #endif |
76 | char r_ifname[IFNAMSIZ]; | 78 | char r_ifname[IFNAMSIZ]; |
77 | int r_dead; | 79 | int r_dead; |
80 | struct rcu_head rcu; | ||
78 | }; | 81 | }; |
79 | 82 | ||
80 | static struct fib_rule default_rule = { | 83 | static struct fib_rule default_rule = { |
@@ -85,7 +88,6 @@ static struct fib_rule default_rule = { | |||
85 | }; | 88 | }; |
86 | 89 | ||
87 | static struct fib_rule main_rule = { | 90 | static struct fib_rule main_rule = { |
88 | .r_next = &default_rule, | ||
89 | .r_clntref = ATOMIC_INIT(2), | 91 | .r_clntref = ATOMIC_INIT(2), |
90 | .r_preference = 0x7FFE, | 92 | .r_preference = 0x7FFE, |
91 | .r_table = RT_TABLE_MAIN, | 93 | .r_table = RT_TABLE_MAIN, |
@@ -93,23 +95,26 @@ static struct fib_rule main_rule = { | |||
93 | }; | 95 | }; |
94 | 96 | ||
95 | static struct fib_rule local_rule = { | 97 | static struct fib_rule local_rule = { |
96 | .r_next = &main_rule, | ||
97 | .r_clntref = ATOMIC_INIT(2), | 98 | .r_clntref = ATOMIC_INIT(2), |
98 | .r_table = RT_TABLE_LOCAL, | 99 | .r_table = RT_TABLE_LOCAL, |
99 | .r_action = RTN_UNICAST, | 100 | .r_action = RTN_UNICAST, |
100 | }; | 101 | }; |
101 | 102 | ||
102 | static struct fib_rule *fib_rules = &local_rule; | 103 | static struct hlist_head fib_rules; |
103 | static DEFINE_RWLOCK(fib_rules_lock); | 104 | |
105 | /* writer func called from netlink -- rtnl_sem hold*/ | ||
106 | |||
107 | static void rtmsg_rule(int, struct fib_rule *); | ||
104 | 108 | ||
105 | int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 109 | int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
106 | { | 110 | { |
107 | struct rtattr **rta = arg; | 111 | struct rtattr **rta = arg; |
108 | struct rtmsg *rtm = NLMSG_DATA(nlh); | 112 | struct rtmsg *rtm = NLMSG_DATA(nlh); |
109 | struct fib_rule *r, **rp; | 113 | struct fib_rule *r; |
114 | struct hlist_node *node; | ||
110 | int err = -ESRCH; | 115 | int err = -ESRCH; |
111 | 116 | ||
112 | for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) { | 117 | hlist_for_each_entry(r, node, &fib_rules, hlist) { |
113 | if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && | 118 | if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && |
114 | rtm->rtm_src_len == r->r_src_len && | 119 | rtm->rtm_src_len == r->r_src_len && |
115 | rtm->rtm_dst_len == r->r_dst_len && | 120 | rtm->rtm_dst_len == r->r_dst_len && |
@@ -126,10 +131,9 @@ int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
126 | if (r == &local_rule) | 131 | if (r == &local_rule) |
127 | break; | 132 | break; |
128 | 133 | ||
129 | write_lock_bh(&fib_rules_lock); | 134 | hlist_del_rcu(&r->hlist); |
130 | *rp = r->r_next; | ||
131 | r->r_dead = 1; | 135 | r->r_dead = 1; |
132 | write_unlock_bh(&fib_rules_lock); | 136 | rtmsg_rule(RTM_DELRULE, r); |
133 | fib_rule_put(r); | 137 | fib_rule_put(r); |
134 | err = 0; | 138 | err = 0; |
135 | break; | 139 | break; |
@@ -150,21 +154,30 @@ static struct fib_table *fib_empty_table(void) | |||
150 | return NULL; | 154 | return NULL; |
151 | } | 155 | } |
152 | 156 | ||
157 | static inline void fib_rule_put_rcu(struct rcu_head *head) | ||
158 | { | ||
159 | struct fib_rule *r = container_of(head, struct fib_rule, rcu); | ||
160 | kfree(r); | ||
161 | } | ||
162 | |||
153 | void fib_rule_put(struct fib_rule *r) | 163 | void fib_rule_put(struct fib_rule *r) |
154 | { | 164 | { |
155 | if (atomic_dec_and_test(&r->r_clntref)) { | 165 | if (atomic_dec_and_test(&r->r_clntref)) { |
156 | if (r->r_dead) | 166 | if (r->r_dead) |
157 | kfree(r); | 167 | call_rcu(&r->rcu, fib_rule_put_rcu); |
158 | else | 168 | else |
159 | printk("Freeing alive rule %p\n", r); | 169 | printk("Freeing alive rule %p\n", r); |
160 | } | 170 | } |
161 | } | 171 | } |
162 | 172 | ||
173 | /* writer func called from netlink -- rtnl_sem hold*/ | ||
174 | |||
163 | int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 175 | int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
164 | { | 176 | { |
165 | struct rtattr **rta = arg; | 177 | struct rtattr **rta = arg; |
166 | struct rtmsg *rtm = NLMSG_DATA(nlh); | 178 | struct rtmsg *rtm = NLMSG_DATA(nlh); |
167 | struct fib_rule *r, *new_r, **rp; | 179 | struct fib_rule *r, *new_r, *last = NULL; |
180 | struct hlist_node *node = NULL; | ||
168 | unsigned char table_id; | 181 | unsigned char table_id; |
169 | 182 | ||
170 | if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || | 183 | if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || |
@@ -188,6 +201,7 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
188 | if (!new_r) | 201 | if (!new_r) |
189 | return -ENOMEM; | 202 | return -ENOMEM; |
190 | memset(new_r, 0, sizeof(*new_r)); | 203 | memset(new_r, 0, sizeof(*new_r)); |
204 | |||
191 | if (rta[RTA_SRC-1]) | 205 | if (rta[RTA_SRC-1]) |
192 | memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); | 206 | memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); |
193 | if (rta[RTA_DST-1]) | 207 | if (rta[RTA_DST-1]) |
@@ -220,28 +234,29 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
220 | if (rta[RTA_FLOW-1]) | 234 | if (rta[RTA_FLOW-1]) |
221 | memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); | 235 | memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); |
222 | #endif | 236 | #endif |
237 | r = container_of(fib_rules.first, struct fib_rule, hlist); | ||
223 | 238 | ||
224 | rp = &fib_rules; | ||
225 | if (!new_r->r_preference) { | 239 | if (!new_r->r_preference) { |
226 | r = fib_rules; | 240 | if (r && r->hlist.next != NULL) { |
227 | if (r && (r = r->r_next) != NULL) { | 241 | r = container_of(r->hlist.next, struct fib_rule, hlist); |
228 | rp = &fib_rules->r_next; | ||
229 | if (r->r_preference) | 242 | if (r->r_preference) |
230 | new_r->r_preference = r->r_preference - 1; | 243 | new_r->r_preference = r->r_preference - 1; |
231 | } | 244 | } |
232 | } | 245 | } |
233 | 246 | ||
234 | while ( (r = *rp) != NULL ) { | 247 | hlist_for_each_entry(r, node, &fib_rules, hlist) { |
235 | if (r->r_preference > new_r->r_preference) | 248 | if (r->r_preference > new_r->r_preference) |
236 | break; | 249 | break; |
237 | rp = &r->r_next; | 250 | last = r; |
238 | } | 251 | } |
239 | |||
240 | new_r->r_next = r; | ||
241 | atomic_inc(&new_r->r_clntref); | 252 | atomic_inc(&new_r->r_clntref); |
242 | write_lock_bh(&fib_rules_lock); | 253 | |
243 | *rp = new_r; | 254 | if (last) |
244 | write_unlock_bh(&fib_rules_lock); | 255 | hlist_add_after_rcu(&last->hlist, &new_r->hlist); |
256 | else | ||
257 | hlist_add_before_rcu(&new_r->hlist, &r->hlist); | ||
258 | |||
259 | rtmsg_rule(RTM_NEWRULE, new_r); | ||
245 | return 0; | 260 | return 0; |
246 | } | 261 | } |
247 | 262 | ||
@@ -254,30 +269,30 @@ u32 fib_rules_tclass(struct fib_result *res) | |||
254 | } | 269 | } |
255 | #endif | 270 | #endif |
256 | 271 | ||
272 | /* callers should hold rtnl semaphore */ | ||
257 | 273 | ||
258 | static void fib_rules_detach(struct net_device *dev) | 274 | static void fib_rules_detach(struct net_device *dev) |
259 | { | 275 | { |
276 | struct hlist_node *node; | ||
260 | struct fib_rule *r; | 277 | struct fib_rule *r; |
261 | 278 | ||
262 | for (r=fib_rules; r; r=r->r_next) { | 279 | hlist_for_each_entry(r, node, &fib_rules, hlist) { |
263 | if (r->r_ifindex == dev->ifindex) { | 280 | if (r->r_ifindex == dev->ifindex) |
264 | write_lock_bh(&fib_rules_lock); | ||
265 | r->r_ifindex = -1; | 281 | r->r_ifindex = -1; |
266 | write_unlock_bh(&fib_rules_lock); | 282 | |
267 | } | ||
268 | } | 283 | } |
269 | } | 284 | } |
270 | 285 | ||
286 | /* callers should hold rtnl semaphore */ | ||
287 | |||
271 | static void fib_rules_attach(struct net_device *dev) | 288 | static void fib_rules_attach(struct net_device *dev) |
272 | { | 289 | { |
290 | struct hlist_node *node; | ||
273 | struct fib_rule *r; | 291 | struct fib_rule *r; |
274 | 292 | ||
275 | for (r=fib_rules; r; r=r->r_next) { | 293 | hlist_for_each_entry(r, node, &fib_rules, hlist) { |
276 | if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) { | 294 | if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) |
277 | write_lock_bh(&fib_rules_lock); | ||
278 | r->r_ifindex = dev->ifindex; | 295 | r->r_ifindex = dev->ifindex; |
279 | write_unlock_bh(&fib_rules_lock); | ||
280 | } | ||
281 | } | 296 | } |
282 | } | 297 | } |
283 | 298 | ||
@@ -286,14 +301,17 @@ int fib_lookup(const struct flowi *flp, struct fib_result *res) | |||
286 | int err; | 301 | int err; |
287 | struct fib_rule *r, *policy; | 302 | struct fib_rule *r, *policy; |
288 | struct fib_table *tb; | 303 | struct fib_table *tb; |
304 | struct hlist_node *node; | ||
289 | 305 | ||
290 | u32 daddr = flp->fl4_dst; | 306 | u32 daddr = flp->fl4_dst; |
291 | u32 saddr = flp->fl4_src; | 307 | u32 saddr = flp->fl4_src; |
292 | 308 | ||
293 | FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", | 309 | FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", |
294 | NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); | 310 | NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); |
295 | read_lock(&fib_rules_lock); | 311 | |
296 | for (r = fib_rules; r; r=r->r_next) { | 312 | rcu_read_lock(); |
313 | |||
314 | hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) { | ||
297 | if (((saddr^r->r_src) & r->r_srcmask) || | 315 | if (((saddr^r->r_src) & r->r_srcmask) || |
298 | ((daddr^r->r_dst) & r->r_dstmask) || | 316 | ((daddr^r->r_dst) & r->r_dstmask) || |
299 | (r->r_tos && r->r_tos != flp->fl4_tos) || | 317 | (r->r_tos && r->r_tos != flp->fl4_tos) || |
@@ -309,14 +327,14 @@ FRprintk("tb %d r %d ", r->r_table, r->r_action); | |||
309 | policy = r; | 327 | policy = r; |
310 | break; | 328 | break; |
311 | case RTN_UNREACHABLE: | 329 | case RTN_UNREACHABLE: |
312 | read_unlock(&fib_rules_lock); | 330 | rcu_read_unlock(); |
313 | return -ENETUNREACH; | 331 | return -ENETUNREACH; |
314 | default: | 332 | default: |
315 | case RTN_BLACKHOLE: | 333 | case RTN_BLACKHOLE: |
316 | read_unlock(&fib_rules_lock); | 334 | rcu_read_unlock(); |
317 | return -EINVAL; | 335 | return -EINVAL; |
318 | case RTN_PROHIBIT: | 336 | case RTN_PROHIBIT: |
319 | read_unlock(&fib_rules_lock); | 337 | rcu_read_unlock(); |
320 | return -EACCES; | 338 | return -EACCES; |
321 | } | 339 | } |
322 | 340 | ||
@@ -327,16 +345,16 @@ FRprintk("tb %d r %d ", r->r_table, r->r_action); | |||
327 | res->r = policy; | 345 | res->r = policy; |
328 | if (policy) | 346 | if (policy) |
329 | atomic_inc(&policy->r_clntref); | 347 | atomic_inc(&policy->r_clntref); |
330 | read_unlock(&fib_rules_lock); | 348 | rcu_read_unlock(); |
331 | return 0; | 349 | return 0; |
332 | } | 350 | } |
333 | if (err < 0 && err != -EAGAIN) { | 351 | if (err < 0 && err != -EAGAIN) { |
334 | read_unlock(&fib_rules_lock); | 352 | rcu_read_unlock(); |
335 | return err; | 353 | return err; |
336 | } | 354 | } |
337 | } | 355 | } |
338 | FRprintk("FAILURE\n"); | 356 | FRprintk("FAILURE\n"); |
339 | read_unlock(&fib_rules_lock); | 357 | rcu_read_unlock(); |
340 | return -ENETUNREACH; | 358 | return -ENETUNREACH; |
341 | } | 359 | } |
342 | 360 | ||
@@ -368,14 +386,14 @@ static struct notifier_block fib_rules_notifier = { | |||
368 | 386 | ||
369 | static __inline__ int inet_fill_rule(struct sk_buff *skb, | 387 | static __inline__ int inet_fill_rule(struct sk_buff *skb, |
370 | struct fib_rule *r, | 388 | struct fib_rule *r, |
371 | struct netlink_callback *cb, | 389 | u32 pid, u32 seq, int event, |
372 | unsigned int flags) | 390 | unsigned int flags) |
373 | { | 391 | { |
374 | struct rtmsg *rtm; | 392 | struct rtmsg *rtm; |
375 | struct nlmsghdr *nlh; | 393 | struct nlmsghdr *nlh; |
376 | unsigned char *b = skb->tail; | 394 | unsigned char *b = skb->tail; |
377 | 395 | ||
378 | nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); | 396 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); |
379 | rtm = NLMSG_DATA(nlh); | 397 | rtm = NLMSG_DATA(nlh); |
380 | rtm->rtm_family = AF_INET; | 398 | rtm->rtm_family = AF_INET; |
381 | rtm->rtm_dst_len = r->r_dst_len; | 399 | rtm->rtm_dst_len = r->r_dst_len; |
@@ -414,20 +432,42 @@ rtattr_failure: | |||
414 | return -1; | 432 | return -1; |
415 | } | 433 | } |
416 | 434 | ||
435 | /* callers should hold rtnl semaphore */ | ||
436 | |||
437 | static void rtmsg_rule(int event, struct fib_rule *r) | ||
438 | { | ||
439 | int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); | ||
440 | struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); | ||
441 | |||
442 | if (!skb) | ||
443 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS); | ||
444 | else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) { | ||
445 | kfree_skb(skb); | ||
446 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL); | ||
447 | } else { | ||
448 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL); | ||
449 | } | ||
450 | } | ||
451 | |||
417 | int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) | 452 | int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) |
418 | { | 453 | { |
419 | int idx; | 454 | int idx = 0; |
420 | int s_idx = cb->args[0]; | 455 | int s_idx = cb->args[0]; |
421 | struct fib_rule *r; | 456 | struct fib_rule *r; |
457 | struct hlist_node *node; | ||
458 | |||
459 | rcu_read_lock(); | ||
460 | hlist_for_each_entry(r, node, &fib_rules, hlist) { | ||
422 | 461 | ||
423 | read_lock(&fib_rules_lock); | ||
424 | for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { | ||
425 | if (idx < s_idx) | 462 | if (idx < s_idx) |
426 | continue; | 463 | continue; |
427 | if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) | 464 | if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, |
465 | cb->nlh->nlmsg_seq, | ||
466 | RTM_NEWRULE, NLM_F_MULTI) < 0) | ||
428 | break; | 467 | break; |
468 | idx++; | ||
429 | } | 469 | } |
430 | read_unlock(&fib_rules_lock); | 470 | rcu_read_unlock(); |
431 | cb->args[0] = idx; | 471 | cb->args[0] = idx; |
432 | 472 | ||
433 | return skb->len; | 473 | return skb->len; |
@@ -435,5 +475,9 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) | |||
435 | 475 | ||
436 | void __init fib_rules_init(void) | 476 | void __init fib_rules_init(void) |
437 | { | 477 | { |
478 | INIT_HLIST_HEAD(&fib_rules); | ||
479 | hlist_add_head(&local_rule.hlist, &fib_rules); | ||
480 | hlist_add_after(&local_rule.hlist, &main_rule.hlist); | ||
481 | hlist_add_after(&main_rule.hlist, &default_rule.hlist); | ||
438 | register_netdevice_notifier(&fib_rules_notifier); | 482 | register_netdevice_notifier(&fib_rules_notifier); |
439 | } | 483 | } |