diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/ipv4/fib_semantics.c |
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'net/ipv4/fib_semantics.c')
-rw-r--r-- | net/ipv4/fib_semantics.c | 1332 |
1 files changed, 1332 insertions, 0 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c new file mode 100644 index 00000000000..029362d6613 --- /dev/null +++ b/net/ipv4/fib_semantics.c | |||
@@ -0,0 +1,1332 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IPv4 Forwarding Information Base: semantics. | ||
7 | * | ||
8 | * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ | ||
9 | * | ||
10 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version | ||
15 | * 2 of the License, or (at your option) any later version. | ||
16 | */ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | #include <asm/uaccess.h> | ||
20 | #include <asm/system.h> | ||
21 | #include <linux/bitops.h> | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/jiffies.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/string.h> | ||
27 | #include <linux/socket.h> | ||
28 | #include <linux/sockios.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/in.h> | ||
31 | #include <linux/inet.h> | ||
32 | #include <linux/netdevice.h> | ||
33 | #include <linux/if_arp.h> | ||
34 | #include <linux/proc_fs.h> | ||
35 | #include <linux/skbuff.h> | ||
36 | #include <linux/netlink.h> | ||
37 | #include <linux/init.h> | ||
38 | |||
39 | #include <net/ip.h> | ||
40 | #include <net/protocol.h> | ||
41 | #include <net/route.h> | ||
42 | #include <net/tcp.h> | ||
43 | #include <net/sock.h> | ||
44 | #include <net/ip_fib.h> | ||
45 | #include <net/ip_mp_alg.h> | ||
46 | |||
47 | #include "fib_lookup.h" | ||
48 | |||
49 | #define FSprintk(a...) | ||
50 | |||
51 | static DEFINE_RWLOCK(fib_info_lock); | ||
52 | static struct hlist_head *fib_info_hash; | ||
53 | static struct hlist_head *fib_info_laddrhash; | ||
54 | static unsigned int fib_hash_size; | ||
55 | static unsigned int fib_info_cnt; | ||
56 | |||
57 | #define DEVINDEX_HASHBITS 8 | ||
58 | #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) | ||
59 | static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; | ||
60 | |||
61 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
62 | |||
63 | static DEFINE_SPINLOCK(fib_multipath_lock); | ||
64 | |||
65 | #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ | ||
66 | for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) | ||
67 | |||
68 | #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ | ||
69 | for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) | ||
70 | |||
71 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ | ||
72 | |||
73 | /* Hope, that gcc will optimize it to get rid of dummy loop */ | ||
74 | |||
75 | #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ | ||
76 | for (nhsel=0; nhsel < 1; nhsel++) | ||
77 | |||
78 | #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ | ||
79 | for (nhsel=0; nhsel < 1; nhsel++) | ||
80 | |||
81 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | ||
82 | |||
83 | #define endfor_nexthops(fi) } | ||
84 | |||
85 | |||
86 | static struct | ||
87 | { | ||
88 | int error; | ||
89 | u8 scope; | ||
90 | } fib_props[RTA_MAX + 1] = { | ||
91 | { | ||
92 | .error = 0, | ||
93 | .scope = RT_SCOPE_NOWHERE, | ||
94 | }, /* RTN_UNSPEC */ | ||
95 | { | ||
96 | .error = 0, | ||
97 | .scope = RT_SCOPE_UNIVERSE, | ||
98 | }, /* RTN_UNICAST */ | ||
99 | { | ||
100 | .error = 0, | ||
101 | .scope = RT_SCOPE_HOST, | ||
102 | }, /* RTN_LOCAL */ | ||
103 | { | ||
104 | .error = 0, | ||
105 | .scope = RT_SCOPE_LINK, | ||
106 | }, /* RTN_BROADCAST */ | ||
107 | { | ||
108 | .error = 0, | ||
109 | .scope = RT_SCOPE_LINK, | ||
110 | }, /* RTN_ANYCAST */ | ||
111 | { | ||
112 | .error = 0, | ||
113 | .scope = RT_SCOPE_UNIVERSE, | ||
114 | }, /* RTN_MULTICAST */ | ||
115 | { | ||
116 | .error = -EINVAL, | ||
117 | .scope = RT_SCOPE_UNIVERSE, | ||
118 | }, /* RTN_BLACKHOLE */ | ||
119 | { | ||
120 | .error = -EHOSTUNREACH, | ||
121 | .scope = RT_SCOPE_UNIVERSE, | ||
122 | }, /* RTN_UNREACHABLE */ | ||
123 | { | ||
124 | .error = -EACCES, | ||
125 | .scope = RT_SCOPE_UNIVERSE, | ||
126 | }, /* RTN_PROHIBIT */ | ||
127 | { | ||
128 | .error = -EAGAIN, | ||
129 | .scope = RT_SCOPE_UNIVERSE, | ||
130 | }, /* RTN_THROW */ | ||
131 | { | ||
132 | .error = -EINVAL, | ||
133 | .scope = RT_SCOPE_NOWHERE, | ||
134 | }, /* RTN_NAT */ | ||
135 | { | ||
136 | .error = -EINVAL, | ||
137 | .scope = RT_SCOPE_NOWHERE, | ||
138 | }, /* RTN_XRESOLVE */ | ||
139 | }; | ||
140 | |||
141 | |||
142 | /* Release a nexthop info record */ | ||
143 | |||
144 | void free_fib_info(struct fib_info *fi) | ||
145 | { | ||
146 | if (fi->fib_dead == 0) { | ||
147 | printk("Freeing alive fib_info %p\n", fi); | ||
148 | return; | ||
149 | } | ||
150 | change_nexthops(fi) { | ||
151 | if (nh->nh_dev) | ||
152 | dev_put(nh->nh_dev); | ||
153 | nh->nh_dev = NULL; | ||
154 | } endfor_nexthops(fi); | ||
155 | fib_info_cnt--; | ||
156 | kfree(fi); | ||
157 | } | ||
158 | |||
159 | void fib_release_info(struct fib_info *fi) | ||
160 | { | ||
161 | write_lock(&fib_info_lock); | ||
162 | if (fi && --fi->fib_treeref == 0) { | ||
163 | hlist_del(&fi->fib_hash); | ||
164 | if (fi->fib_prefsrc) | ||
165 | hlist_del(&fi->fib_lhash); | ||
166 | change_nexthops(fi) { | ||
167 | if (!nh->nh_dev) | ||
168 | continue; | ||
169 | hlist_del(&nh->nh_hash); | ||
170 | } endfor_nexthops(fi) | ||
171 | fi->fib_dead = 1; | ||
172 | fib_info_put(fi); | ||
173 | } | ||
174 | write_unlock(&fib_info_lock); | ||
175 | } | ||
176 | |||
177 | static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) | ||
178 | { | ||
179 | const struct fib_nh *onh = ofi->fib_nh; | ||
180 | |||
181 | for_nexthops(fi) { | ||
182 | if (nh->nh_oif != onh->nh_oif || | ||
183 | nh->nh_gw != onh->nh_gw || | ||
184 | nh->nh_scope != onh->nh_scope || | ||
185 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
186 | nh->nh_weight != onh->nh_weight || | ||
187 | #endif | ||
188 | #ifdef CONFIG_NET_CLS_ROUTE | ||
189 | nh->nh_tclassid != onh->nh_tclassid || | ||
190 | #endif | ||
191 | ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) | ||
192 | return -1; | ||
193 | onh++; | ||
194 | } endfor_nexthops(fi); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | static inline unsigned int fib_info_hashfn(const struct fib_info *fi) | ||
199 | { | ||
200 | unsigned int mask = (fib_hash_size - 1); | ||
201 | unsigned int val = fi->fib_nhs; | ||
202 | |||
203 | val ^= fi->fib_protocol; | ||
204 | val ^= fi->fib_prefsrc; | ||
205 | val ^= fi->fib_priority; | ||
206 | |||
207 | return (val ^ (val >> 7) ^ (val >> 12)) & mask; | ||
208 | } | ||
209 | |||
210 | static struct fib_info *fib_find_info(const struct fib_info *nfi) | ||
211 | { | ||
212 | struct hlist_head *head; | ||
213 | struct hlist_node *node; | ||
214 | struct fib_info *fi; | ||
215 | unsigned int hash; | ||
216 | |||
217 | hash = fib_info_hashfn(nfi); | ||
218 | head = &fib_info_hash[hash]; | ||
219 | |||
220 | hlist_for_each_entry(fi, node, head, fib_hash) { | ||
221 | if (fi->fib_nhs != nfi->fib_nhs) | ||
222 | continue; | ||
223 | if (nfi->fib_protocol == fi->fib_protocol && | ||
224 | nfi->fib_prefsrc == fi->fib_prefsrc && | ||
225 | nfi->fib_priority == fi->fib_priority && | ||
226 | memcmp(nfi->fib_metrics, fi->fib_metrics, | ||
227 | sizeof(fi->fib_metrics)) == 0 && | ||
228 | ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && | ||
229 | (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) | ||
230 | return fi; | ||
231 | } | ||
232 | |||
233 | return NULL; | ||
234 | } | ||
235 | |||
236 | static inline unsigned int fib_devindex_hashfn(unsigned int val) | ||
237 | { | ||
238 | unsigned int mask = DEVINDEX_HASHSIZE - 1; | ||
239 | |||
240 | return (val ^ | ||
241 | (val >> DEVINDEX_HASHBITS) ^ | ||
242 | (val >> (DEVINDEX_HASHBITS * 2))) & mask; | ||
243 | } | ||
244 | |||
245 | /* Check, that the gateway is already configured. | ||
246 | Used only by redirect accept routine. | ||
247 | */ | ||
248 | |||
249 | int ip_fib_check_default(u32 gw, struct net_device *dev) | ||
250 | { | ||
251 | struct hlist_head *head; | ||
252 | struct hlist_node *node; | ||
253 | struct fib_nh *nh; | ||
254 | unsigned int hash; | ||
255 | |||
256 | read_lock(&fib_info_lock); | ||
257 | |||
258 | hash = fib_devindex_hashfn(dev->ifindex); | ||
259 | head = &fib_info_devhash[hash]; | ||
260 | hlist_for_each_entry(nh, node, head, nh_hash) { | ||
261 | if (nh->nh_dev == dev && | ||
262 | nh->nh_gw == gw && | ||
263 | !(nh->nh_flags&RTNH_F_DEAD)) { | ||
264 | read_unlock(&fib_info_lock); | ||
265 | return 0; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | read_unlock(&fib_info_lock); | ||
270 | |||
271 | return -1; | ||
272 | } | ||
273 | |||
274 | void rtmsg_fib(int event, u32 key, struct fib_alias *fa, | ||
275 | int z, int tb_id, | ||
276 | struct nlmsghdr *n, struct netlink_skb_parms *req) | ||
277 | { | ||
278 | struct sk_buff *skb; | ||
279 | u32 pid = req ? req->pid : 0; | ||
280 | int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); | ||
281 | |||
282 | skb = alloc_skb(size, GFP_KERNEL); | ||
283 | if (!skb) | ||
284 | return; | ||
285 | |||
286 | if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, | ||
287 | fa->fa_type, fa->fa_scope, &key, z, | ||
288 | fa->fa_tos, | ||
289 | fa->fa_info) < 0) { | ||
290 | kfree_skb(skb); | ||
291 | return; | ||
292 | } | ||
293 | NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; | ||
294 | if (n->nlmsg_flags&NLM_F_ECHO) | ||
295 | atomic_inc(&skb->users); | ||
296 | netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); | ||
297 | if (n->nlmsg_flags&NLM_F_ECHO) | ||
298 | netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); | ||
299 | } | ||
300 | |||
301 | /* Return the first fib alias matching TOS with | ||
302 | * priority less than or equal to PRIO. | ||
303 | */ | ||
304 | struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) | ||
305 | { | ||
306 | if (fah) { | ||
307 | struct fib_alias *fa; | ||
308 | list_for_each_entry(fa, fah, fa_list) { | ||
309 | if (fa->fa_tos > tos) | ||
310 | continue; | ||
311 | if (fa->fa_info->fib_priority >= prio || | ||
312 | fa->fa_tos < tos) | ||
313 | return fa; | ||
314 | } | ||
315 | } | ||
316 | return NULL; | ||
317 | } | ||
318 | |||
319 | int fib_detect_death(struct fib_info *fi, int order, | ||
320 | struct fib_info **last_resort, int *last_idx, int *dflt) | ||
321 | { | ||
322 | struct neighbour *n; | ||
323 | int state = NUD_NONE; | ||
324 | |||
325 | n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); | ||
326 | if (n) { | ||
327 | state = n->nud_state; | ||
328 | neigh_release(n); | ||
329 | } | ||
330 | if (state==NUD_REACHABLE) | ||
331 | return 0; | ||
332 | if ((state&NUD_VALID) && order != *dflt) | ||
333 | return 0; | ||
334 | if ((state&NUD_VALID) || | ||
335 | (*last_idx<0 && order > *dflt)) { | ||
336 | *last_resort = fi; | ||
337 | *last_idx = order; | ||
338 | } | ||
339 | return 1; | ||
340 | } | ||
341 | |||
342 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
343 | |||
344 | static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) | ||
345 | { | ||
346 | while (RTA_OK(attr,attrlen)) { | ||
347 | if (attr->rta_type == type) | ||
348 | return *(u32*)RTA_DATA(attr); | ||
349 | attr = RTA_NEXT(attr, attrlen); | ||
350 | } | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static int | ||
355 | fib_count_nexthops(struct rtattr *rta) | ||
356 | { | ||
357 | int nhs = 0; | ||
358 | struct rtnexthop *nhp = RTA_DATA(rta); | ||
359 | int nhlen = RTA_PAYLOAD(rta); | ||
360 | |||
361 | while (nhlen >= (int)sizeof(struct rtnexthop)) { | ||
362 | if ((nhlen -= nhp->rtnh_len) < 0) | ||
363 | return 0; | ||
364 | nhs++; | ||
365 | nhp = RTNH_NEXT(nhp); | ||
366 | }; | ||
367 | return nhs; | ||
368 | } | ||
369 | |||
370 | static int | ||
371 | fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) | ||
372 | { | ||
373 | struct rtnexthop *nhp = RTA_DATA(rta); | ||
374 | int nhlen = RTA_PAYLOAD(rta); | ||
375 | |||
376 | change_nexthops(fi) { | ||
377 | int attrlen = nhlen - sizeof(struct rtnexthop); | ||
378 | if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) | ||
379 | return -EINVAL; | ||
380 | nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; | ||
381 | nh->nh_oif = nhp->rtnh_ifindex; | ||
382 | nh->nh_weight = nhp->rtnh_hops + 1; | ||
383 | if (attrlen) { | ||
384 | nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); | ||
385 | #ifdef CONFIG_NET_CLS_ROUTE | ||
386 | nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); | ||
387 | #endif | ||
388 | } | ||
389 | nhp = RTNH_NEXT(nhp); | ||
390 | } endfor_nexthops(fi); | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | #endif | ||
395 | |||
396 | int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, | ||
397 | struct fib_info *fi) | ||
398 | { | ||
399 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
400 | struct rtnexthop *nhp; | ||
401 | int nhlen; | ||
402 | #endif | ||
403 | |||
404 | if (rta->rta_priority && | ||
405 | *rta->rta_priority != fi->fib_priority) | ||
406 | return 1; | ||
407 | |||
408 | if (rta->rta_oif || rta->rta_gw) { | ||
409 | if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && | ||
410 | (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) | ||
411 | return 0; | ||
412 | return 1; | ||
413 | } | ||
414 | |||
415 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
416 | if (rta->rta_mp == NULL) | ||
417 | return 0; | ||
418 | nhp = RTA_DATA(rta->rta_mp); | ||
419 | nhlen = RTA_PAYLOAD(rta->rta_mp); | ||
420 | |||
421 | for_nexthops(fi) { | ||
422 | int attrlen = nhlen - sizeof(struct rtnexthop); | ||
423 | u32 gw; | ||
424 | |||
425 | if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) | ||
426 | return -EINVAL; | ||
427 | if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) | ||
428 | return 1; | ||
429 | if (attrlen) { | ||
430 | gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); | ||
431 | if (gw && gw != nh->nh_gw) | ||
432 | return 1; | ||
433 | #ifdef CONFIG_NET_CLS_ROUTE | ||
434 | gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); | ||
435 | if (gw && gw != nh->nh_tclassid) | ||
436 | return 1; | ||
437 | #endif | ||
438 | } | ||
439 | nhp = RTNH_NEXT(nhp); | ||
440 | } endfor_nexthops(fi); | ||
441 | #endif | ||
442 | return 0; | ||
443 | } | ||
444 | |||
445 | |||
446 | /* | ||
447 | Picture | ||
448 | ------- | ||
449 | |||
450 | Semantics of nexthop is very messy by historical reasons. | ||
451 | We have to take into account, that: | ||
452 | a) gateway can be actually local interface address, | ||
453 | so that gatewayed route is direct. | ||
454 | b) gateway must be on-link address, possibly | ||
455 | described not by an ifaddr, but also by a direct route. | ||
456 | c) If both gateway and interface are specified, they should not | ||
457 | contradict. | ||
458 | d) If we use tunnel routes, gateway could be not on-link. | ||
459 | |||
460 | Attempt to reconcile all of these (alas, self-contradictory) conditions | ||
461 | results in pretty ugly and hairy code with obscure logic. | ||
462 | |||
463 | I chose to generalized it instead, so that the size | ||
464 | of code does not increase practically, but it becomes | ||
465 | much more general. | ||
466 | Every prefix is assigned a "scope" value: "host" is local address, | ||
467 | "link" is direct route, | ||
468 | [ ... "site" ... "interior" ... ] | ||
469 | and "universe" is true gateway route with global meaning. | ||
470 | |||
471 | Every prefix refers to a set of "nexthop"s (gw, oif), | ||
472 | where gw must have narrower scope. This recursion stops | ||
473 | when gw has LOCAL scope or if "nexthop" is declared ONLINK, | ||
474 | which means that gw is forced to be on link. | ||
475 | |||
476 | Code is still hairy, but now it is apparently logically | ||
477 | consistent and very flexible. F.e. as by-product it allows | ||
478 | to co-exists in peace independent exterior and interior | ||
479 | routing processes. | ||
480 | |||
481 | Normally it looks as following. | ||
482 | |||
483 | {universe prefix} -> (gw, oif) [scope link] | ||
484 | | | ||
485 | |-> {link prefix} -> (gw, oif) [scope local] | ||
486 | | | ||
487 | |-> {local prefix} (terminal node) | ||
488 | */ | ||
489 | |||
490 | static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) | ||
491 | { | ||
492 | int err; | ||
493 | |||
494 | if (nh->nh_gw) { | ||
495 | struct fib_result res; | ||
496 | |||
497 | #ifdef CONFIG_IP_ROUTE_PERVASIVE | ||
498 | if (nh->nh_flags&RTNH_F_PERVASIVE) | ||
499 | return 0; | ||
500 | #endif | ||
501 | if (nh->nh_flags&RTNH_F_ONLINK) { | ||
502 | struct net_device *dev; | ||
503 | |||
504 | if (r->rtm_scope >= RT_SCOPE_LINK) | ||
505 | return -EINVAL; | ||
506 | if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) | ||
507 | return -EINVAL; | ||
508 | if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) | ||
509 | return -ENODEV; | ||
510 | if (!(dev->flags&IFF_UP)) | ||
511 | return -ENETDOWN; | ||
512 | nh->nh_dev = dev; | ||
513 | dev_hold(dev); | ||
514 | nh->nh_scope = RT_SCOPE_LINK; | ||
515 | return 0; | ||
516 | } | ||
517 | { | ||
518 | struct flowi fl = { .nl_u = { .ip4_u = | ||
519 | { .daddr = nh->nh_gw, | ||
520 | .scope = r->rtm_scope + 1 } }, | ||
521 | .oif = nh->nh_oif }; | ||
522 | |||
523 | /* It is not necessary, but requires a bit of thinking */ | ||
524 | if (fl.fl4_scope < RT_SCOPE_LINK) | ||
525 | fl.fl4_scope = RT_SCOPE_LINK; | ||
526 | if ((err = fib_lookup(&fl, &res)) != 0) | ||
527 | return err; | ||
528 | } | ||
529 | err = -EINVAL; | ||
530 | if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | ||
531 | goto out; | ||
532 | nh->nh_scope = res.scope; | ||
533 | nh->nh_oif = FIB_RES_OIF(res); | ||
534 | if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) | ||
535 | goto out; | ||
536 | dev_hold(nh->nh_dev); | ||
537 | err = -ENETDOWN; | ||
538 | if (!(nh->nh_dev->flags & IFF_UP)) | ||
539 | goto out; | ||
540 | err = 0; | ||
541 | out: | ||
542 | fib_res_put(&res); | ||
543 | return err; | ||
544 | } else { | ||
545 | struct in_device *in_dev; | ||
546 | |||
547 | if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) | ||
548 | return -EINVAL; | ||
549 | |||
550 | in_dev = inetdev_by_index(nh->nh_oif); | ||
551 | if (in_dev == NULL) | ||
552 | return -ENODEV; | ||
553 | if (!(in_dev->dev->flags&IFF_UP)) { | ||
554 | in_dev_put(in_dev); | ||
555 | return -ENETDOWN; | ||
556 | } | ||
557 | nh->nh_dev = in_dev->dev; | ||
558 | dev_hold(nh->nh_dev); | ||
559 | nh->nh_scope = RT_SCOPE_HOST; | ||
560 | in_dev_put(in_dev); | ||
561 | } | ||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | static inline unsigned int fib_laddr_hashfn(u32 val) | ||
566 | { | ||
567 | unsigned int mask = (fib_hash_size - 1); | ||
568 | |||
569 | return (val ^ (val >> 7) ^ (val >> 14)) & mask; | ||
570 | } | ||
571 | |||
572 | static struct hlist_head *fib_hash_alloc(int bytes) | ||
573 | { | ||
574 | if (bytes <= PAGE_SIZE) | ||
575 | return kmalloc(bytes, GFP_KERNEL); | ||
576 | else | ||
577 | return (struct hlist_head *) | ||
578 | __get_free_pages(GFP_KERNEL, get_order(bytes)); | ||
579 | } | ||
580 | |||
581 | static void fib_hash_free(struct hlist_head *hash, int bytes) | ||
582 | { | ||
583 | if (!hash) | ||
584 | return; | ||
585 | |||
586 | if (bytes <= PAGE_SIZE) | ||
587 | kfree(hash); | ||
588 | else | ||
589 | free_pages((unsigned long) hash, get_order(bytes)); | ||
590 | } | ||
591 | |||
592 | static void fib_hash_move(struct hlist_head *new_info_hash, | ||
593 | struct hlist_head *new_laddrhash, | ||
594 | unsigned int new_size) | ||
595 | { | ||
596 | unsigned int old_size = fib_hash_size; | ||
597 | unsigned int i; | ||
598 | |||
599 | write_lock(&fib_info_lock); | ||
600 | fib_hash_size = new_size; | ||
601 | |||
602 | for (i = 0; i < old_size; i++) { | ||
603 | struct hlist_head *head = &fib_info_hash[i]; | ||
604 | struct hlist_node *node, *n; | ||
605 | struct fib_info *fi; | ||
606 | |||
607 | hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { | ||
608 | struct hlist_head *dest; | ||
609 | unsigned int new_hash; | ||
610 | |||
611 | hlist_del(&fi->fib_hash); | ||
612 | |||
613 | new_hash = fib_info_hashfn(fi); | ||
614 | dest = &new_info_hash[new_hash]; | ||
615 | hlist_add_head(&fi->fib_hash, dest); | ||
616 | } | ||
617 | } | ||
618 | fib_info_hash = new_info_hash; | ||
619 | |||
620 | for (i = 0; i < old_size; i++) { | ||
621 | struct hlist_head *lhead = &fib_info_laddrhash[i]; | ||
622 | struct hlist_node *node, *n; | ||
623 | struct fib_info *fi; | ||
624 | |||
625 | hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { | ||
626 | struct hlist_head *ldest; | ||
627 | unsigned int new_hash; | ||
628 | |||
629 | hlist_del(&fi->fib_lhash); | ||
630 | |||
631 | new_hash = fib_laddr_hashfn(fi->fib_prefsrc); | ||
632 | ldest = &new_laddrhash[new_hash]; | ||
633 | hlist_add_head(&fi->fib_lhash, ldest); | ||
634 | } | ||
635 | } | ||
636 | fib_info_laddrhash = new_laddrhash; | ||
637 | |||
638 | write_unlock(&fib_info_lock); | ||
639 | } | ||
640 | |||
641 | struct fib_info * | ||
642 | fib_create_info(const struct rtmsg *r, struct kern_rta *rta, | ||
643 | const struct nlmsghdr *nlh, int *errp) | ||
644 | { | ||
645 | int err; | ||
646 | struct fib_info *fi = NULL; | ||
647 | struct fib_info *ofi; | ||
648 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
649 | int nhs = 1; | ||
650 | #else | ||
651 | const int nhs = 1; | ||
652 | #endif | ||
653 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
654 | u32 mp_alg = IP_MP_ALG_NONE; | ||
655 | #endif | ||
656 | |||
657 | /* Fast check to catch the most weird cases */ | ||
658 | if (fib_props[r->rtm_type].scope > r->rtm_scope) | ||
659 | goto err_inval; | ||
660 | |||
661 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
662 | if (rta->rta_mp) { | ||
663 | nhs = fib_count_nexthops(rta->rta_mp); | ||
664 | if (nhs == 0) | ||
665 | goto err_inval; | ||
666 | } | ||
667 | #endif | ||
668 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
669 | if (rta->rta_mp_alg) { | ||
670 | mp_alg = *rta->rta_mp_alg; | ||
671 | |||
672 | if (mp_alg < IP_MP_ALG_NONE || | ||
673 | mp_alg > IP_MP_ALG_MAX) | ||
674 | goto err_inval; | ||
675 | } | ||
676 | #endif | ||
677 | |||
678 | err = -ENOBUFS; | ||
679 | if (fib_info_cnt >= fib_hash_size) { | ||
680 | unsigned int new_size = fib_hash_size << 1; | ||
681 | struct hlist_head *new_info_hash; | ||
682 | struct hlist_head *new_laddrhash; | ||
683 | unsigned int bytes; | ||
684 | |||
685 | if (!new_size) | ||
686 | new_size = 1; | ||
687 | bytes = new_size * sizeof(struct hlist_head *); | ||
688 | new_info_hash = fib_hash_alloc(bytes); | ||
689 | new_laddrhash = fib_hash_alloc(bytes); | ||
690 | if (!new_info_hash || !new_laddrhash) { | ||
691 | fib_hash_free(new_info_hash, bytes); | ||
692 | fib_hash_free(new_laddrhash, bytes); | ||
693 | } else { | ||
694 | memset(new_info_hash, 0, bytes); | ||
695 | memset(new_laddrhash, 0, bytes); | ||
696 | |||
697 | fib_hash_move(new_info_hash, new_laddrhash, new_size); | ||
698 | } | ||
699 | |||
700 | if (!fib_hash_size) | ||
701 | goto failure; | ||
702 | } | ||
703 | |||
704 | fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); | ||
705 | if (fi == NULL) | ||
706 | goto failure; | ||
707 | fib_info_cnt++; | ||
708 | memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh)); | ||
709 | |||
710 | fi->fib_protocol = r->rtm_protocol; | ||
711 | |||
712 | fi->fib_nhs = nhs; | ||
713 | change_nexthops(fi) { | ||
714 | nh->nh_parent = fi; | ||
715 | } endfor_nexthops(fi) | ||
716 | |||
717 | fi->fib_flags = r->rtm_flags; | ||
718 | if (rta->rta_priority) | ||
719 | fi->fib_priority = *rta->rta_priority; | ||
720 | if (rta->rta_mx) { | ||
721 | int attrlen = RTA_PAYLOAD(rta->rta_mx); | ||
722 | struct rtattr *attr = RTA_DATA(rta->rta_mx); | ||
723 | |||
724 | while (RTA_OK(attr, attrlen)) { | ||
725 | unsigned flavor = attr->rta_type; | ||
726 | if (flavor) { | ||
727 | if (flavor > RTAX_MAX) | ||
728 | goto err_inval; | ||
729 | fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); | ||
730 | } | ||
731 | attr = RTA_NEXT(attr, attrlen); | ||
732 | } | ||
733 | } | ||
734 | if (rta->rta_prefsrc) | ||
735 | memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); | ||
736 | |||
737 | if (rta->rta_mp) { | ||
738 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
739 | if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) | ||
740 | goto failure; | ||
741 | if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) | ||
742 | goto err_inval; | ||
743 | if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) | ||
744 | goto err_inval; | ||
745 | #ifdef CONFIG_NET_CLS_ROUTE | ||
746 | if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) | ||
747 | goto err_inval; | ||
748 | #endif | ||
749 | #else | ||
750 | goto err_inval; | ||
751 | #endif | ||
752 | } else { | ||
753 | struct fib_nh *nh = fi->fib_nh; | ||
754 | if (rta->rta_oif) | ||
755 | nh->nh_oif = *rta->rta_oif; | ||
756 | if (rta->rta_gw) | ||
757 | memcpy(&nh->nh_gw, rta->rta_gw, 4); | ||
758 | #ifdef CONFIG_NET_CLS_ROUTE | ||
759 | if (rta->rta_flow) | ||
760 | memcpy(&nh->nh_tclassid, rta->rta_flow, 4); | ||
761 | #endif | ||
762 | nh->nh_flags = r->rtm_flags; | ||
763 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
764 | nh->nh_weight = 1; | ||
765 | #endif | ||
766 | } | ||
767 | |||
768 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
769 | fi->fib_mp_alg = mp_alg; | ||
770 | #endif | ||
771 | |||
772 | if (fib_props[r->rtm_type].error) { | ||
773 | if (rta->rta_gw || rta->rta_oif || rta->rta_mp) | ||
774 | goto err_inval; | ||
775 | goto link_it; | ||
776 | } | ||
777 | |||
778 | if (r->rtm_scope > RT_SCOPE_HOST) | ||
779 | goto err_inval; | ||
780 | |||
781 | if (r->rtm_scope == RT_SCOPE_HOST) { | ||
782 | struct fib_nh *nh = fi->fib_nh; | ||
783 | |||
784 | /* Local address is added. */ | ||
785 | if (nhs != 1 || nh->nh_gw) | ||
786 | goto err_inval; | ||
787 | nh->nh_scope = RT_SCOPE_NOWHERE; | ||
788 | nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); | ||
789 | err = -ENODEV; | ||
790 | if (nh->nh_dev == NULL) | ||
791 | goto failure; | ||
792 | } else { | ||
793 | change_nexthops(fi) { | ||
794 | if ((err = fib_check_nh(r, fi, nh)) != 0) | ||
795 | goto failure; | ||
796 | } endfor_nexthops(fi) | ||
797 | } | ||
798 | |||
799 | if (fi->fib_prefsrc) { | ||
800 | if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || | ||
801 | memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) | ||
802 | if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) | ||
803 | goto err_inval; | ||
804 | } | ||
805 | |||
806 | link_it: | ||
807 | if ((ofi = fib_find_info(fi)) != NULL) { | ||
808 | fi->fib_dead = 1; | ||
809 | free_fib_info(fi); | ||
810 | ofi->fib_treeref++; | ||
811 | return ofi; | ||
812 | } | ||
813 | |||
814 | fi->fib_treeref++; | ||
815 | atomic_inc(&fi->fib_clntref); | ||
816 | write_lock(&fib_info_lock); | ||
817 | hlist_add_head(&fi->fib_hash, | ||
818 | &fib_info_hash[fib_info_hashfn(fi)]); | ||
819 | if (fi->fib_prefsrc) { | ||
820 | struct hlist_head *head; | ||
821 | |||
822 | head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; | ||
823 | hlist_add_head(&fi->fib_lhash, head); | ||
824 | } | ||
825 | change_nexthops(fi) { | ||
826 | struct hlist_head *head; | ||
827 | unsigned int hash; | ||
828 | |||
829 | if (!nh->nh_dev) | ||
830 | continue; | ||
831 | hash = fib_devindex_hashfn(nh->nh_dev->ifindex); | ||
832 | head = &fib_info_devhash[hash]; | ||
833 | hlist_add_head(&nh->nh_hash, head); | ||
834 | } endfor_nexthops(fi) | ||
835 | write_unlock(&fib_info_lock); | ||
836 | return fi; | ||
837 | |||
838 | err_inval: | ||
839 | err = -EINVAL; | ||
840 | |||
841 | failure: | ||
842 | *errp = err; | ||
843 | if (fi) { | ||
844 | fi->fib_dead = 1; | ||
845 | free_fib_info(fi); | ||
846 | } | ||
847 | return NULL; | ||
848 | } | ||
849 | |||
850 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, | ||
851 | struct fib_result *res, __u32 zone, __u32 mask, | ||
852 | int prefixlen) | ||
853 | { | ||
854 | struct fib_alias *fa; | ||
855 | int nh_sel = 0; | ||
856 | |||
857 | list_for_each_entry(fa, head, fa_list) { | ||
858 | int err; | ||
859 | |||
860 | if (fa->fa_tos && | ||
861 | fa->fa_tos != flp->fl4_tos) | ||
862 | continue; | ||
863 | |||
864 | if (fa->fa_scope < flp->fl4_scope) | ||
865 | continue; | ||
866 | |||
867 | fa->fa_state |= FA_S_ACCESSED; | ||
868 | |||
869 | err = fib_props[fa->fa_type].error; | ||
870 | if (err == 0) { | ||
871 | struct fib_info *fi = fa->fa_info; | ||
872 | |||
873 | if (fi->fib_flags & RTNH_F_DEAD) | ||
874 | continue; | ||
875 | |||
876 | switch (fa->fa_type) { | ||
877 | case RTN_UNICAST: | ||
878 | case RTN_LOCAL: | ||
879 | case RTN_BROADCAST: | ||
880 | case RTN_ANYCAST: | ||
881 | case RTN_MULTICAST: | ||
882 | for_nexthops(fi) { | ||
883 | if (nh->nh_flags&RTNH_F_DEAD) | ||
884 | continue; | ||
885 | if (!flp->oif || flp->oif == nh->nh_oif) | ||
886 | break; | ||
887 | } | ||
888 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
889 | if (nhsel < fi->fib_nhs) { | ||
890 | nh_sel = nhsel; | ||
891 | goto out_fill_res; | ||
892 | } | ||
893 | #else | ||
894 | if (nhsel < 1) { | ||
895 | goto out_fill_res; | ||
896 | } | ||
897 | #endif | ||
898 | endfor_nexthops(fi); | ||
899 | continue; | ||
900 | |||
901 | default: | ||
902 | printk(KERN_DEBUG "impossible 102\n"); | ||
903 | return -EINVAL; | ||
904 | }; | ||
905 | } | ||
906 | return err; | ||
907 | } | ||
908 | return 1; | ||
909 | |||
910 | out_fill_res: | ||
911 | res->prefixlen = prefixlen; | ||
912 | res->nh_sel = nh_sel; | ||
913 | res->type = fa->fa_type; | ||
914 | res->scope = fa->fa_scope; | ||
915 | res->fi = fa->fa_info; | ||
916 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
917 | res->netmask = mask; | ||
918 | res->network = zone & | ||
919 | (0xFFFFFFFF >> (32 - prefixlen)); | ||
920 | #endif | ||
921 | atomic_inc(&res->fi->fib_clntref); | ||
922 | return 0; | ||
923 | } | ||
924 | |||
925 | /* Find appropriate source address to this destination */ | ||
926 | |||
927 | u32 __fib_res_prefsrc(struct fib_result *res) | ||
928 | { | ||
929 | return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); | ||
930 | } | ||
931 | |||
932 | int | ||
933 | fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | ||
934 | u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, | ||
935 | struct fib_info *fi) | ||
936 | { | ||
937 | struct rtmsg *rtm; | ||
938 | struct nlmsghdr *nlh; | ||
939 | unsigned char *b = skb->tail; | ||
940 | |||
941 | nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); | ||
942 | rtm = NLMSG_DATA(nlh); | ||
943 | rtm->rtm_family = AF_INET; | ||
944 | rtm->rtm_dst_len = dst_len; | ||
945 | rtm->rtm_src_len = 0; | ||
946 | rtm->rtm_tos = tos; | ||
947 | rtm->rtm_table = tb_id; | ||
948 | rtm->rtm_type = type; | ||
949 | rtm->rtm_flags = fi->fib_flags; | ||
950 | rtm->rtm_scope = scope; | ||
951 | if (rtm->rtm_dst_len) | ||
952 | RTA_PUT(skb, RTA_DST, 4, dst); | ||
953 | rtm->rtm_protocol = fi->fib_protocol; | ||
954 | if (fi->fib_priority) | ||
955 | RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); | ||
956 | #ifdef CONFIG_NET_CLS_ROUTE | ||
957 | if (fi->fib_nh[0].nh_tclassid) | ||
958 | RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); | ||
959 | #endif | ||
960 | if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) | ||
961 | goto rtattr_failure; | ||
962 | if (fi->fib_prefsrc) | ||
963 | RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); | ||
964 | if (fi->fib_nhs == 1) { | ||
965 | if (fi->fib_nh->nh_gw) | ||
966 | RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); | ||
967 | if (fi->fib_nh->nh_oif) | ||
968 | RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); | ||
969 | } | ||
970 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
971 | if (fi->fib_nhs > 1) { | ||
972 | struct rtnexthop *nhp; | ||
973 | struct rtattr *mp_head; | ||
974 | if (skb_tailroom(skb) <= RTA_SPACE(0)) | ||
975 | goto rtattr_failure; | ||
976 | mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); | ||
977 | |||
978 | for_nexthops(fi) { | ||
979 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | ||
980 | goto rtattr_failure; | ||
981 | nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | ||
982 | nhp->rtnh_flags = nh->nh_flags & 0xFF; | ||
983 | nhp->rtnh_hops = nh->nh_weight-1; | ||
984 | nhp->rtnh_ifindex = nh->nh_oif; | ||
985 | if (nh->nh_gw) | ||
986 | RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); | ||
987 | nhp->rtnh_len = skb->tail - (unsigned char*)nhp; | ||
988 | } endfor_nexthops(fi); | ||
989 | mp_head->rta_type = RTA_MULTIPATH; | ||
990 | mp_head->rta_len = skb->tail - (u8*)mp_head; | ||
991 | } | ||
992 | #endif | ||
993 | nlh->nlmsg_len = skb->tail - b; | ||
994 | return skb->len; | ||
995 | |||
996 | nlmsg_failure: | ||
997 | rtattr_failure: | ||
998 | skb_trim(skb, b - skb->data); | ||
999 | return -1; | ||
1000 | } | ||
1001 | |||
1002 | #ifndef CONFIG_IP_NOSIOCRT | ||
1003 | |||
1004 | int | ||
1005 | fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, | ||
1006 | struct kern_rta *rta, struct rtentry *r) | ||
1007 | { | ||
1008 | int plen; | ||
1009 | u32 *ptr; | ||
1010 | |||
1011 | memset(rtm, 0, sizeof(*rtm)); | ||
1012 | memset(rta, 0, sizeof(*rta)); | ||
1013 | |||
1014 | if (r->rt_dst.sa_family != AF_INET) | ||
1015 | return -EAFNOSUPPORT; | ||
1016 | |||
1017 | /* Check mask for validity: | ||
1018 | a) it must be contiguous. | ||
1019 | b) destination must have all host bits clear. | ||
1020 | c) if application forgot to set correct family (AF_INET), | ||
1021 | reject request unless it is absolutely clear i.e. | ||
1022 | both family and mask are zero. | ||
1023 | */ | ||
1024 | plen = 32; | ||
1025 | ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; | ||
1026 | if (!(r->rt_flags&RTF_HOST)) { | ||
1027 | u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; | ||
1028 | if (r->rt_genmask.sa_family != AF_INET) { | ||
1029 | if (mask || r->rt_genmask.sa_family) | ||
1030 | return -EAFNOSUPPORT; | ||
1031 | } | ||
1032 | if (bad_mask(mask, *ptr)) | ||
1033 | return -EINVAL; | ||
1034 | plen = inet_mask_len(mask); | ||
1035 | } | ||
1036 | |||
1037 | nl->nlmsg_flags = NLM_F_REQUEST; | ||
1038 | nl->nlmsg_pid = 0; | ||
1039 | nl->nlmsg_seq = 0; | ||
1040 | nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); | ||
1041 | if (cmd == SIOCDELRT) { | ||
1042 | nl->nlmsg_type = RTM_DELROUTE; | ||
1043 | nl->nlmsg_flags = 0; | ||
1044 | } else { | ||
1045 | nl->nlmsg_type = RTM_NEWROUTE; | ||
1046 | nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; | ||
1047 | rtm->rtm_protocol = RTPROT_BOOT; | ||
1048 | } | ||
1049 | |||
1050 | rtm->rtm_dst_len = plen; | ||
1051 | rta->rta_dst = ptr; | ||
1052 | |||
1053 | if (r->rt_metric) { | ||
1054 | *(u32*)&r->rt_pad3 = r->rt_metric - 1; | ||
1055 | rta->rta_priority = (u32*)&r->rt_pad3; | ||
1056 | } | ||
1057 | if (r->rt_flags&RTF_REJECT) { | ||
1058 | rtm->rtm_scope = RT_SCOPE_HOST; | ||
1059 | rtm->rtm_type = RTN_UNREACHABLE; | ||
1060 | return 0; | ||
1061 | } | ||
1062 | rtm->rtm_scope = RT_SCOPE_NOWHERE; | ||
1063 | rtm->rtm_type = RTN_UNICAST; | ||
1064 | |||
1065 | if (r->rt_dev) { | ||
1066 | char *colon; | ||
1067 | struct net_device *dev; | ||
1068 | char devname[IFNAMSIZ]; | ||
1069 | |||
1070 | if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) | ||
1071 | return -EFAULT; | ||
1072 | devname[IFNAMSIZ-1] = 0; | ||
1073 | colon = strchr(devname, ':'); | ||
1074 | if (colon) | ||
1075 | *colon = 0; | ||
1076 | dev = __dev_get_by_name(devname); | ||
1077 | if (!dev) | ||
1078 | return -ENODEV; | ||
1079 | rta->rta_oif = &dev->ifindex; | ||
1080 | if (colon) { | ||
1081 | struct in_ifaddr *ifa; | ||
1082 | struct in_device *in_dev = __in_dev_get(dev); | ||
1083 | if (!in_dev) | ||
1084 | return -ENODEV; | ||
1085 | *colon = ':'; | ||
1086 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) | ||
1087 | if (strcmp(ifa->ifa_label, devname) == 0) | ||
1088 | break; | ||
1089 | if (ifa == NULL) | ||
1090 | return -ENODEV; | ||
1091 | rta->rta_prefsrc = &ifa->ifa_local; | ||
1092 | } | ||
1093 | } | ||
1094 | |||
1095 | ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; | ||
1096 | if (r->rt_gateway.sa_family == AF_INET && *ptr) { | ||
1097 | rta->rta_gw = ptr; | ||
1098 | if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) | ||
1099 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | ||
1100 | } | ||
1101 | |||
1102 | if (cmd == SIOCDELRT) | ||
1103 | return 0; | ||
1104 | |||
1105 | if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) | ||
1106 | return -EINVAL; | ||
1107 | |||
1108 | if (rtm->rtm_scope == RT_SCOPE_NOWHERE) | ||
1109 | rtm->rtm_scope = RT_SCOPE_LINK; | ||
1110 | |||
1111 | if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { | ||
1112 | struct rtattr *rec; | ||
1113 | struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); | ||
1114 | if (mx == NULL) | ||
1115 | return -ENOMEM; | ||
1116 | rta->rta_mx = mx; | ||
1117 | mx->rta_type = RTA_METRICS; | ||
1118 | mx->rta_len = RTA_LENGTH(0); | ||
1119 | if (r->rt_flags&RTF_MTU) { | ||
1120 | rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); | ||
1121 | rec->rta_type = RTAX_ADVMSS; | ||
1122 | rec->rta_len = RTA_LENGTH(4); | ||
1123 | mx->rta_len += RTA_LENGTH(4); | ||
1124 | *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; | ||
1125 | } | ||
1126 | if (r->rt_flags&RTF_WINDOW) { | ||
1127 | rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); | ||
1128 | rec->rta_type = RTAX_WINDOW; | ||
1129 | rec->rta_len = RTA_LENGTH(4); | ||
1130 | mx->rta_len += RTA_LENGTH(4); | ||
1131 | *(u32*)RTA_DATA(rec) = r->rt_window; | ||
1132 | } | ||
1133 | if (r->rt_flags&RTF_IRTT) { | ||
1134 | rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); | ||
1135 | rec->rta_type = RTAX_RTT; | ||
1136 | rec->rta_len = RTA_LENGTH(4); | ||
1137 | mx->rta_len += RTA_LENGTH(4); | ||
1138 | *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; | ||
1139 | } | ||
1140 | } | ||
1141 | return 0; | ||
1142 | } | ||
1143 | |||
1144 | #endif | ||
1145 | |||
1146 | /* | ||
1147 | Update FIB if: | ||
1148 | - local address disappeared -> we must delete all the entries | ||
1149 | referring to it. | ||
1150 | - device went down -> we must shutdown all nexthops going via it. | ||
1151 | */ | ||
1152 | |||
1153 | int fib_sync_down(u32 local, struct net_device *dev, int force) | ||
1154 | { | ||
1155 | int ret = 0; | ||
1156 | int scope = RT_SCOPE_NOWHERE; | ||
1157 | |||
1158 | if (force) | ||
1159 | scope = -1; | ||
1160 | |||
1161 | if (local && fib_info_laddrhash) { | ||
1162 | unsigned int hash = fib_laddr_hashfn(local); | ||
1163 | struct hlist_head *head = &fib_info_laddrhash[hash]; | ||
1164 | struct hlist_node *node; | ||
1165 | struct fib_info *fi; | ||
1166 | |||
1167 | hlist_for_each_entry(fi, node, head, fib_lhash) { | ||
1168 | if (fi->fib_prefsrc == local) { | ||
1169 | fi->fib_flags |= RTNH_F_DEAD; | ||
1170 | ret++; | ||
1171 | } | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | if (dev) { | ||
1176 | struct fib_info *prev_fi = NULL; | ||
1177 | unsigned int hash = fib_devindex_hashfn(dev->ifindex); | ||
1178 | struct hlist_head *head = &fib_info_devhash[hash]; | ||
1179 | struct hlist_node *node; | ||
1180 | struct fib_nh *nh; | ||
1181 | |||
1182 | hlist_for_each_entry(nh, node, head, nh_hash) { | ||
1183 | struct fib_info *fi = nh->nh_parent; | ||
1184 | int dead; | ||
1185 | |||
1186 | BUG_ON(!fi->fib_nhs); | ||
1187 | if (nh->nh_dev != dev || fi == prev_fi) | ||
1188 | continue; | ||
1189 | prev_fi = fi; | ||
1190 | dead = 0; | ||
1191 | change_nexthops(fi) { | ||
1192 | if (nh->nh_flags&RTNH_F_DEAD) | ||
1193 | dead++; | ||
1194 | else if (nh->nh_dev == dev && | ||
1195 | nh->nh_scope != scope) { | ||
1196 | nh->nh_flags |= RTNH_F_DEAD; | ||
1197 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
1198 | spin_lock_bh(&fib_multipath_lock); | ||
1199 | fi->fib_power -= nh->nh_power; | ||
1200 | nh->nh_power = 0; | ||
1201 | spin_unlock_bh(&fib_multipath_lock); | ||
1202 | #endif | ||
1203 | dead++; | ||
1204 | } | ||
1205 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
1206 | if (force > 1 && nh->nh_dev == dev) { | ||
1207 | dead = fi->fib_nhs; | ||
1208 | break; | ||
1209 | } | ||
1210 | #endif | ||
1211 | } endfor_nexthops(fi) | ||
1212 | if (dead == fi->fib_nhs) { | ||
1213 | fi->fib_flags |= RTNH_F_DEAD; | ||
1214 | ret++; | ||
1215 | } | ||
1216 | } | ||
1217 | } | ||
1218 | |||
1219 | return ret; | ||
1220 | } | ||
1221 | |||
1222 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
1223 | |||
1224 | /* | ||
1225 | Dead device goes up. We wake up dead nexthops. | ||
1226 | It takes sense only on multipath routes. | ||
1227 | */ | ||
1228 | |||
1229 | int fib_sync_up(struct net_device *dev) | ||
1230 | { | ||
1231 | struct fib_info *prev_fi; | ||
1232 | unsigned int hash; | ||
1233 | struct hlist_head *head; | ||
1234 | struct hlist_node *node; | ||
1235 | struct fib_nh *nh; | ||
1236 | int ret; | ||
1237 | |||
1238 | if (!(dev->flags&IFF_UP)) | ||
1239 | return 0; | ||
1240 | |||
1241 | prev_fi = NULL; | ||
1242 | hash = fib_devindex_hashfn(dev->ifindex); | ||
1243 | head = &fib_info_devhash[hash]; | ||
1244 | ret = 0; | ||
1245 | |||
1246 | hlist_for_each_entry(nh, node, head, nh_hash) { | ||
1247 | struct fib_info *fi = nh->nh_parent; | ||
1248 | int alive; | ||
1249 | |||
1250 | BUG_ON(!fi->fib_nhs); | ||
1251 | if (nh->nh_dev != dev || fi == prev_fi) | ||
1252 | continue; | ||
1253 | |||
1254 | prev_fi = fi; | ||
1255 | alive = 0; | ||
1256 | change_nexthops(fi) { | ||
1257 | if (!(nh->nh_flags&RTNH_F_DEAD)) { | ||
1258 | alive++; | ||
1259 | continue; | ||
1260 | } | ||
1261 | if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) | ||
1262 | continue; | ||
1263 | if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) | ||
1264 | continue; | ||
1265 | alive++; | ||
1266 | spin_lock_bh(&fib_multipath_lock); | ||
1267 | nh->nh_power = 0; | ||
1268 | nh->nh_flags &= ~RTNH_F_DEAD; | ||
1269 | spin_unlock_bh(&fib_multipath_lock); | ||
1270 | } endfor_nexthops(fi) | ||
1271 | |||
1272 | if (alive > 0) { | ||
1273 | fi->fib_flags &= ~RTNH_F_DEAD; | ||
1274 | ret++; | ||
1275 | } | ||
1276 | } | ||
1277 | |||
1278 | return ret; | ||
1279 | } | ||
1280 | |||
1281 | /* | ||
1282 | The algorithm is suboptimal, but it provides really | ||
1283 | fair weighted route distribution. | ||
1284 | */ | ||
1285 | |||
1286 | void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | ||
1287 | { | ||
1288 | struct fib_info *fi = res->fi; | ||
1289 | int w; | ||
1290 | |||
1291 | spin_lock_bh(&fib_multipath_lock); | ||
1292 | if (fi->fib_power <= 0) { | ||
1293 | int power = 0; | ||
1294 | change_nexthops(fi) { | ||
1295 | if (!(nh->nh_flags&RTNH_F_DEAD)) { | ||
1296 | power += nh->nh_weight; | ||
1297 | nh->nh_power = nh->nh_weight; | ||
1298 | } | ||
1299 | } endfor_nexthops(fi); | ||
1300 | fi->fib_power = power; | ||
1301 | if (power <= 0) { | ||
1302 | spin_unlock_bh(&fib_multipath_lock); | ||
1303 | /* Race condition: route has just become dead. */ | ||
1304 | res->nh_sel = 0; | ||
1305 | return; | ||
1306 | } | ||
1307 | } | ||
1308 | |||
1309 | |||
1310 | /* w should be random number [0..fi->fib_power-1], | ||
1311 | it is pretty bad approximation. | ||
1312 | */ | ||
1313 | |||
1314 | w = jiffies % fi->fib_power; | ||
1315 | |||
1316 | change_nexthops(fi) { | ||
1317 | if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { | ||
1318 | if ((w -= nh->nh_power) <= 0) { | ||
1319 | nh->nh_power--; | ||
1320 | fi->fib_power--; | ||
1321 | res->nh_sel = nhsel; | ||
1322 | spin_unlock_bh(&fib_multipath_lock); | ||
1323 | return; | ||
1324 | } | ||
1325 | } | ||
1326 | } endfor_nexthops(fi); | ||
1327 | |||
1328 | /* Race condition: route has just become dead. */ | ||
1329 | res->nh_sel = 0; | ||
1330 | spin_unlock_bh(&fib_multipath_lock); | ||
1331 | } | ||
1332 | #endif | ||