aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/fib_semantics.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 14:47:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 14:47:02 -0400
commit5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0 (patch)
tree7851ef1c93aa1aba7ef327ca4b75fd35e6d10f29 /net/ipv4/fib_semantics.c
parent02f36038c568111ad4fc433f6fa760ff5e38fab4 (diff)
parentec37a48d1d16c30b655ac5280209edf52a6775d4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1699 commits) bnx2/bnx2x: Unsupported Ethtool operations should return -EINVAL. vlan: Calling vlan_hwaccel_do_receive() is always valid. tproxy: use the interface primary IP address as a default value for --on-ip tproxy: added IPv6 support to the socket match cxgb3: function namespace cleanup tproxy: added IPv6 support to the TPROXY target tproxy: added IPv6 socket lookup function to nf_tproxy_core be2net: Changes to use only priority codes allowed by f/w tproxy: allow non-local binds of IPv6 sockets if IP_TRANSPARENT is enabled tproxy: added tproxy sockopt interface in the IPV6 layer tproxy: added udp6_lib_lookup function tproxy: added const specifiers to udp lookup functions tproxy: split off ipv6 defragmentation to a separate module l2tp: small cleanup nf_nat: restrict ICMP translation for embedded header can: mcp251x: fix generation of error frames can: mcp251x: fix endless loop in interrupt handler if CANINTF_MERRF is set can-raw: add msg_flags to distinguish local traffic 9p: client code cleanup rds: make local functions/variables static ... Fix up conflicts in net/core/dev.c, drivers/net/pcmcia/smc91c92_cs.c and drivers/net/wireless/ath/ath9k/debug.c as per David
Diffstat (limited to 'net/ipv4/fib_semantics.c')
-rw-r--r--net/ipv4/fib_semantics.c297
1 files changed, 159 insertions, 138 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 20f09c5b31e8..3e0da3ef6116 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -60,21 +60,30 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60 60
61static DEFINE_SPINLOCK(fib_multipath_lock); 61static DEFINE_SPINLOCK(fib_multipath_lock);
62 62
63#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 63#define for_nexthops(fi) { \
64for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 64 int nhsel; const struct fib_nh *nh; \
65 65 for (nhsel = 0, nh = (fi)->fib_nh; \
66#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ 66 nhsel < (fi)->fib_nhs; \
67for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) 67 nh++, nhsel++)
68
69#define change_nexthops(fi) { \
70 int nhsel; struct fib_nh *nexthop_nh; \
71 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
72 nhsel < (fi)->fib_nhs; \
73 nexthop_nh++, nhsel++)
68 74
69#else /* CONFIG_IP_ROUTE_MULTIPATH */ 75#else /* CONFIG_IP_ROUTE_MULTIPATH */
70 76
71/* Hope, that gcc will optimize it to get rid of dummy loop */ 77/* Hope, that gcc will optimize it to get rid of dummy loop */
72 78
73#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ 79#define for_nexthops(fi) { \
74for (nhsel=0; nhsel < 1; nhsel++) 80 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
81 for (nhsel = 0; nhsel < 1; nhsel++)
75 82
76#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 83#define change_nexthops(fi) { \
77for (nhsel=0; nhsel < 1; nhsel++) 84 int nhsel; \
85 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
86 for (nhsel = 0; nhsel < 1; nhsel++)
78 87
79#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 88#endif /* CONFIG_IP_ROUTE_MULTIPATH */
80 89
@@ -86,63 +95,70 @@ static const struct
86 int error; 95 int error;
87 u8 scope; 96 u8 scope;
88} fib_props[RTN_MAX + 1] = { 97} fib_props[RTN_MAX + 1] = {
89 { 98 [RTN_UNSPEC] = {
90 .error = 0, 99 .error = 0,
91 .scope = RT_SCOPE_NOWHERE, 100 .scope = RT_SCOPE_NOWHERE,
92 }, /* RTN_UNSPEC */ 101 },
93 { 102 [RTN_UNICAST] = {
94 .error = 0, 103 .error = 0,
95 .scope = RT_SCOPE_UNIVERSE, 104 .scope = RT_SCOPE_UNIVERSE,
96 }, /* RTN_UNICAST */ 105 },
97 { 106 [RTN_LOCAL] = {
98 .error = 0, 107 .error = 0,
99 .scope = RT_SCOPE_HOST, 108 .scope = RT_SCOPE_HOST,
100 }, /* RTN_LOCAL */ 109 },
101 { 110 [RTN_BROADCAST] = {
102 .error = 0, 111 .error = 0,
103 .scope = RT_SCOPE_LINK, 112 .scope = RT_SCOPE_LINK,
104 }, /* RTN_BROADCAST */ 113 },
105 { 114 [RTN_ANYCAST] = {
106 .error = 0, 115 .error = 0,
107 .scope = RT_SCOPE_LINK, 116 .scope = RT_SCOPE_LINK,
108 }, /* RTN_ANYCAST */ 117 },
109 { 118 [RTN_MULTICAST] = {
110 .error = 0, 119 .error = 0,
111 .scope = RT_SCOPE_UNIVERSE, 120 .scope = RT_SCOPE_UNIVERSE,
112 }, /* RTN_MULTICAST */ 121 },
113 { 122 [RTN_BLACKHOLE] = {
114 .error = -EINVAL, 123 .error = -EINVAL,
115 .scope = RT_SCOPE_UNIVERSE, 124 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_BLACKHOLE */ 125 },
117 { 126 [RTN_UNREACHABLE] = {
118 .error = -EHOSTUNREACH, 127 .error = -EHOSTUNREACH,
119 .scope = RT_SCOPE_UNIVERSE, 128 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_UNREACHABLE */ 129 },
121 { 130 [RTN_PROHIBIT] = {
122 .error = -EACCES, 131 .error = -EACCES,
123 .scope = RT_SCOPE_UNIVERSE, 132 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_PROHIBIT */ 133 },
125 { 134 [RTN_THROW] = {
126 .error = -EAGAIN, 135 .error = -EAGAIN,
127 .scope = RT_SCOPE_UNIVERSE, 136 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_THROW */ 137 },
129 { 138 [RTN_NAT] = {
130 .error = -EINVAL, 139 .error = -EINVAL,
131 .scope = RT_SCOPE_NOWHERE, 140 .scope = RT_SCOPE_NOWHERE,
132 }, /* RTN_NAT */ 141 },
133 { 142 [RTN_XRESOLVE] = {
134 .error = -EINVAL, 143 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE, 144 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_XRESOLVE */ 145 },
137}; 146};
138 147
139 148
140/* Release a nexthop info record */ 149/* Release a nexthop info record */
141 150
151static void free_fib_info_rcu(struct rcu_head *head)
152{
153 struct fib_info *fi = container_of(head, struct fib_info, rcu);
154
155 kfree(fi);
156}
157
142void free_fib_info(struct fib_info *fi) 158void free_fib_info(struct fib_info *fi)
143{ 159{
144 if (fi->fib_dead == 0) { 160 if (fi->fib_dead == 0) {
145 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi); 161 pr_warning("Freeing alive fib_info %p\n", fi);
146 return; 162 return;
147 } 163 }
148 change_nexthops(fi) { 164 change_nexthops(fi) {
@@ -152,7 +168,7 @@ void free_fib_info(struct fib_info *fi)
152 } endfor_nexthops(fi); 168 } endfor_nexthops(fi);
153 fib_info_cnt--; 169 fib_info_cnt--;
154 release_net(fi->fib_net); 170 release_net(fi->fib_net);
155 kfree(fi); 171 call_rcu(&fi->rcu, free_fib_info_rcu);
156} 172}
157 173
158void fib_release_info(struct fib_info *fi) 174void fib_release_info(struct fib_info *fi)
@@ -173,7 +189,7 @@ void fib_release_info(struct fib_info *fi)
173 spin_unlock_bh(&fib_info_lock); 189 spin_unlock_bh(&fib_info_lock);
174} 190}
175 191
176static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 192static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177{ 193{
178 const struct fib_nh *onh = ofi->fib_nh; 194 const struct fib_nh *onh = ofi->fib_nh;
179 195
@@ -187,7 +203,7 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
187#ifdef CONFIG_NET_CLS_ROUTE 203#ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid || 204 nh->nh_tclassid != onh->nh_tclassid ||
189#endif 205#endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
191 return -1; 207 return -1;
192 onh++; 208 onh++;
193 } endfor_nexthops(fi); 209 } endfor_nexthops(fi);
@@ -238,7 +254,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
238 nfi->fib_priority == fi->fib_priority && 254 nfi->fib_priority == fi->fib_priority &&
239 memcmp(nfi->fib_metrics, fi->fib_metrics, 255 memcmp(nfi->fib_metrics, fi->fib_metrics,
240 sizeof(fi->fib_metrics)) == 0 && 256 sizeof(fi->fib_metrics)) == 0 &&
241 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 257 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
242 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 258 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
243 return fi; 259 return fi;
244 } 260 }
@@ -247,9 +263,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
247} 263}
248 264
249/* Check, that the gateway is already configured. 265/* Check, that the gateway is already configured.
250 Used only by redirect accept routine. 266 * Used only by redirect accept routine.
251 */ 267 */
252
253int ip_fib_check_default(__be32 gw, struct net_device *dev) 268int ip_fib_check_default(__be32 gw, struct net_device *dev)
254{ 269{
255 struct hlist_head *head; 270 struct hlist_head *head;
@@ -264,7 +279,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
264 hlist_for_each_entry(nh, node, head, nh_hash) { 279 hlist_for_each_entry(nh, node, head, nh_hash) {
265 if (nh->nh_dev == dev && 280 if (nh->nh_dev == dev &&
266 nh->nh_gw == gw && 281 nh->nh_gw == gw &&
267 !(nh->nh_flags&RTNH_F_DEAD)) { 282 !(nh->nh_flags & RTNH_F_DEAD)) {
268 spin_unlock(&fib_info_lock); 283 spin_unlock(&fib_info_lock);
269 return 0; 284 return 0;
270 } 285 }
@@ -362,10 +377,10 @@ int fib_detect_death(struct fib_info *fi, int order,
362 } 377 }
363 if (state == NUD_REACHABLE) 378 if (state == NUD_REACHABLE)
364 return 0; 379 return 0;
365 if ((state&NUD_VALID) && order != dflt) 380 if ((state & NUD_VALID) && order != dflt)
366 return 0; 381 return 0;
367 if ((state&NUD_VALID) || 382 if ((state & NUD_VALID) ||
368 (*last_idx<0 && order > dflt)) { 383 (*last_idx < 0 && order > dflt)) {
369 *last_resort = fi; 384 *last_resort = fi;
370 *last_idx = order; 385 *last_idx = order;
371 } 386 }
@@ -476,75 +491,76 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
476 491
477 492
478/* 493/*
479 Picture 494 * Picture
480 ------- 495 * -------
481 496 *
482 Semantics of nexthop is very messy by historical reasons. 497 * Semantics of nexthop is very messy by historical reasons.
483 We have to take into account, that: 498 * We have to take into account, that:
484 a) gateway can be actually local interface address, 499 * a) gateway can be actually local interface address,
485 so that gatewayed route is direct. 500 * so that gatewayed route is direct.
486 b) gateway must be on-link address, possibly 501 * b) gateway must be on-link address, possibly
487 described not by an ifaddr, but also by a direct route. 502 * described not by an ifaddr, but also by a direct route.
488 c) If both gateway and interface are specified, they should not 503 * c) If both gateway and interface are specified, they should not
489 contradict. 504 * contradict.
490 d) If we use tunnel routes, gateway could be not on-link. 505 * d) If we use tunnel routes, gateway could be not on-link.
491 506 *
492 Attempt to reconcile all of these (alas, self-contradictory) conditions 507 * Attempt to reconcile all of these (alas, self-contradictory) conditions
493 results in pretty ugly and hairy code with obscure logic. 508 * results in pretty ugly and hairy code with obscure logic.
494 509 *
495 I chose to generalized it instead, so that the size 510 * I chose to generalized it instead, so that the size
496 of code does not increase practically, but it becomes 511 * of code does not increase practically, but it becomes
497 much more general. 512 * much more general.
498 Every prefix is assigned a "scope" value: "host" is local address, 513 * Every prefix is assigned a "scope" value: "host" is local address,
499 "link" is direct route, 514 * "link" is direct route,
500 [ ... "site" ... "interior" ... ] 515 * [ ... "site" ... "interior" ... ]
501 and "universe" is true gateway route with global meaning. 516 * and "universe" is true gateway route with global meaning.
502 517 *
503 Every prefix refers to a set of "nexthop"s (gw, oif), 518 * Every prefix refers to a set of "nexthop"s (gw, oif),
504 where gw must have narrower scope. This recursion stops 519 * where gw must have narrower scope. This recursion stops
505 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 520 * when gw has LOCAL scope or if "nexthop" is declared ONLINK,
506 which means that gw is forced to be on link. 521 * which means that gw is forced to be on link.
507 522 *
508 Code is still hairy, but now it is apparently logically 523 * Code is still hairy, but now it is apparently logically
509 consistent and very flexible. F.e. as by-product it allows 524 * consistent and very flexible. F.e. as by-product it allows
510 to co-exists in peace independent exterior and interior 525 * to co-exists in peace independent exterior and interior
511 routing processes. 526 * routing processes.
512 527 *
513 Normally it looks as following. 528 * Normally it looks as following.
514 529 *
515 {universe prefix} -> (gw, oif) [scope link] 530 * {universe prefix} -> (gw, oif) [scope link]
516 | 531 * |
517 |-> {link prefix} -> (gw, oif) [scope local] 532 * |-> {link prefix} -> (gw, oif) [scope local]
518 | 533 * |
519 |-> {local prefix} (terminal node) 534 * |-> {local prefix} (terminal node)
520 */ 535 */
521
522static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 536static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
523 struct fib_nh *nh) 537 struct fib_nh *nh)
524{ 538{
525 int err; 539 int err;
526 struct net *net; 540 struct net *net;
541 struct net_device *dev;
527 542
528 net = cfg->fc_nlinfo.nl_net; 543 net = cfg->fc_nlinfo.nl_net;
529 if (nh->nh_gw) { 544 if (nh->nh_gw) {
530 struct fib_result res; 545 struct fib_result res;
531 546
532 if (nh->nh_flags&RTNH_F_ONLINK) { 547 if (nh->nh_flags & RTNH_F_ONLINK) {
533 struct net_device *dev;
534 548
535 if (cfg->fc_scope >= RT_SCOPE_LINK) 549 if (cfg->fc_scope >= RT_SCOPE_LINK)
536 return -EINVAL; 550 return -EINVAL;
537 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 551 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
538 return -EINVAL; 552 return -EINVAL;
539 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) 553 dev = __dev_get_by_index(net, nh->nh_oif);
554 if (!dev)
540 return -ENODEV; 555 return -ENODEV;
541 if (!(dev->flags&IFF_UP)) 556 if (!(dev->flags & IFF_UP))
542 return -ENETDOWN; 557 return -ENETDOWN;
543 nh->nh_dev = dev; 558 nh->nh_dev = dev;
544 dev_hold(dev); 559 dev_hold(dev);
545 nh->nh_scope = RT_SCOPE_LINK; 560 nh->nh_scope = RT_SCOPE_LINK;
546 return 0; 561 return 0;
547 } 562 }
563 rcu_read_lock();
548 { 564 {
549 struct flowi fl = { 565 struct flowi fl = {
550 .nl_u = { 566 .nl_u = {
@@ -559,50 +575,53 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
559 /* It is not necessary, but requires a bit of thinking */ 575 /* It is not necessary, but requires a bit of thinking */
560 if (fl.fl4_scope < RT_SCOPE_LINK) 576 if (fl.fl4_scope < RT_SCOPE_LINK)
561 fl.fl4_scope = RT_SCOPE_LINK; 577 fl.fl4_scope = RT_SCOPE_LINK;
562 if ((err = fib_lookup(net, &fl, &res)) != 0) 578 err = fib_lookup(net, &fl, &res);
579 if (err) {
580 rcu_read_unlock();
563 return err; 581 return err;
582 }
564 } 583 }
565 err = -EINVAL; 584 err = -EINVAL;
566 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 585 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567 goto out; 586 goto out;
568 nh->nh_scope = res.scope; 587 nh->nh_scope = res.scope;
569 nh->nh_oif = FIB_RES_OIF(res); 588 nh->nh_oif = FIB_RES_OIF(res);
570 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 589 nh->nh_dev = dev = FIB_RES_DEV(res);
590 if (!dev)
571 goto out; 591 goto out;
572 dev_hold(nh->nh_dev); 592 dev_hold(dev);
573 err = -ENETDOWN; 593 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
574 if (!(nh->nh_dev->flags & IFF_UP))
575 goto out;
576 err = 0;
577out:
578 fib_res_put(&res);
579 return err;
580 } else { 594 } else {
581 struct in_device *in_dev; 595 struct in_device *in_dev;
582 596
583 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 597 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
584 return -EINVAL; 598 return -EINVAL;
585 599
600 rcu_read_lock();
601 err = -ENODEV;
586 in_dev = inetdev_by_index(net, nh->nh_oif); 602 in_dev = inetdev_by_index(net, nh->nh_oif);
587 if (in_dev == NULL) 603 if (in_dev == NULL)
588 return -ENODEV; 604 goto out;
589 if (!(in_dev->dev->flags&IFF_UP)) { 605 err = -ENETDOWN;
590 in_dev_put(in_dev); 606 if (!(in_dev->dev->flags & IFF_UP))
591 return -ENETDOWN; 607 goto out;
592 }
593 nh->nh_dev = in_dev->dev; 608 nh->nh_dev = in_dev->dev;
594 dev_hold(nh->nh_dev); 609 dev_hold(nh->nh_dev);
595 nh->nh_scope = RT_SCOPE_HOST; 610 nh->nh_scope = RT_SCOPE_HOST;
596 in_dev_put(in_dev); 611 err = 0;
597 } 612 }
598 return 0; 613out:
614 rcu_read_unlock();
615 return err;
599} 616}
600 617
601static inline unsigned int fib_laddr_hashfn(__be32 val) 618static inline unsigned int fib_laddr_hashfn(__be32 val)
602{ 619{
603 unsigned int mask = (fib_hash_size - 1); 620 unsigned int mask = (fib_hash_size - 1);
604 621
605 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 622 return ((__force u32)val ^
623 ((__force u32)val >> 7) ^
624 ((__force u32)val >> 14)) & mask;
606} 625}
607 626
608static struct hlist_head *fib_hash_alloc(int bytes) 627static struct hlist_head *fib_hash_alloc(int bytes)
@@ -611,7 +630,8 @@ static struct hlist_head *fib_hash_alloc(int bytes)
611 return kzalloc(bytes, GFP_KERNEL); 630 return kzalloc(bytes, GFP_KERNEL);
612 else 631 else
613 return (struct hlist_head *) 632 return (struct hlist_head *)
614 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes)); 633 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
634 get_order(bytes));
615} 635}
616 636
617static void fib_hash_free(struct hlist_head *hash, int bytes) 637static void fib_hash_free(struct hlist_head *hash, int bytes)
@@ -806,7 +826,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
806 goto failure; 826 goto failure;
807 } else { 827 } else {
808 change_nexthops(fi) { 828 change_nexthops(fi) {
809 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) 829 err = fib_check_nh(cfg, fi, nexthop_nh);
830 if (err != 0)
810 goto failure; 831 goto failure;
811 } endfor_nexthops(fi) 832 } endfor_nexthops(fi)
812 } 833 }
@@ -819,7 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
819 } 840 }
820 841
821link_it: 842link_it:
822 if ((ofi = fib_find_info(fi)) != NULL) { 843 ofi = fib_find_info(fi);
844 if (ofi) {
823 fi->fib_dead = 1; 845 fi->fib_dead = 1;
824 free_fib_info(fi); 846 free_fib_info(fi);
825 ofi->fib_treeref++; 847 ofi->fib_treeref++;
@@ -864,7 +886,7 @@ failure:
864 886
865/* Note! fib_semantic_match intentionally uses RCU list functions. */ 887/* Note! fib_semantic_match intentionally uses RCU list functions. */
866int fib_semantic_match(struct list_head *head, const struct flowi *flp, 888int fib_semantic_match(struct list_head *head, const struct flowi *flp,
867 struct fib_result *res, int prefixlen) 889 struct fib_result *res, int prefixlen, int fib_flags)
868{ 890{
869 struct fib_alias *fa; 891 struct fib_alias *fa;
870 int nh_sel = 0; 892 int nh_sel = 0;
@@ -879,7 +901,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
879 if (fa->fa_scope < flp->fl4_scope) 901 if (fa->fa_scope < flp->fl4_scope)
880 continue; 902 continue;
881 903
882 fa->fa_state |= FA_S_ACCESSED; 904 fib_alias_accessed(fa);
883 905
884 err = fib_props[fa->fa_type].error; 906 err = fib_props[fa->fa_type].error;
885 if (err == 0) { 907 if (err == 0) {
@@ -895,7 +917,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
895 case RTN_ANYCAST: 917 case RTN_ANYCAST:
896 case RTN_MULTICAST: 918 case RTN_MULTICAST:
897 for_nexthops(fi) { 919 for_nexthops(fi) {
898 if (nh->nh_flags&RTNH_F_DEAD) 920 if (nh->nh_flags & RTNH_F_DEAD)
899 continue; 921 continue;
900 if (!flp->oif || flp->oif == nh->nh_oif) 922 if (!flp->oif || flp->oif == nh->nh_oif)
901 break; 923 break;
@@ -906,16 +928,15 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
906 goto out_fill_res; 928 goto out_fill_res;
907 } 929 }
908#else 930#else
909 if (nhsel < 1) { 931 if (nhsel < 1)
910 goto out_fill_res; 932 goto out_fill_res;
911 }
912#endif 933#endif
913 endfor_nexthops(fi); 934 endfor_nexthops(fi);
914 continue; 935 continue;
915 936
916 default: 937 default:
917 printk(KERN_WARNING "fib_semantic_match bad type %#x\n", 938 pr_warning("fib_semantic_match bad type %#x\n",
918 fa->fa_type); 939 fa->fa_type);
919 return -EINVAL; 940 return -EINVAL;
920 } 941 }
921 } 942 }
@@ -929,7 +950,8 @@ out_fill_res:
929 res->type = fa->fa_type; 950 res->type = fa->fa_type;
930 res->scope = fa->fa_scope; 951 res->scope = fa->fa_scope;
931 res->fi = fa->fa_info; 952 res->fi = fa->fa_info;
932 atomic_inc(&res->fi->fib_clntref); 953 if (!(fib_flags & FIB_LOOKUP_NOREF))
954 atomic_inc(&res->fi->fib_clntref);
933 return 0; 955 return 0;
934} 956}
935 957
@@ -1028,10 +1050,10 @@ nla_put_failure:
1028} 1050}
1029 1051
1030/* 1052/*
1031 Update FIB if: 1053 * Update FIB if:
1032 - local address disappeared -> we must delete all the entries 1054 * - local address disappeared -> we must delete all the entries
1033 referring to it. 1055 * referring to it.
1034 - device went down -> we must shutdown all nexthops going via it. 1056 * - device went down -> we must shutdown all nexthops going via it.
1035 */ 1057 */
1036int fib_sync_down_addr(struct net *net, __be32 local) 1058int fib_sync_down_addr(struct net *net, __be32 local)
1037{ 1059{
@@ -1078,7 +1100,7 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1078 prev_fi = fi; 1100 prev_fi = fi;
1079 dead = 0; 1101 dead = 0;
1080 change_nexthops(fi) { 1102 change_nexthops(fi) {
1081 if (nexthop_nh->nh_flags&RTNH_F_DEAD) 1103 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1082 dead++; 1104 dead++;
1083 else if (nexthop_nh->nh_dev == dev && 1105 else if (nexthop_nh->nh_dev == dev &&
1084 nexthop_nh->nh_scope != scope) { 1106 nexthop_nh->nh_scope != scope) {
@@ -1110,10 +1132,9 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1110#ifdef CONFIG_IP_ROUTE_MULTIPATH 1132#ifdef CONFIG_IP_ROUTE_MULTIPATH
1111 1133
1112/* 1134/*
1113 Dead device goes up. We wake up dead nexthops. 1135 * Dead device goes up. We wake up dead nexthops.
1114 It takes sense only on multipath routes. 1136 * It takes sense only on multipath routes.
1115 */ 1137 */
1116
1117int fib_sync_up(struct net_device *dev) 1138int fib_sync_up(struct net_device *dev)
1118{ 1139{
1119 struct fib_info *prev_fi; 1140 struct fib_info *prev_fi;
@@ -1123,7 +1144,7 @@ int fib_sync_up(struct net_device *dev)
1123 struct fib_nh *nh; 1144 struct fib_nh *nh;
1124 int ret; 1145 int ret;
1125 1146
1126 if (!(dev->flags&IFF_UP)) 1147 if (!(dev->flags & IFF_UP))
1127 return 0; 1148 return 0;
1128 1149
1129 prev_fi = NULL; 1150 prev_fi = NULL;
@@ -1142,12 +1163,12 @@ int fib_sync_up(struct net_device *dev)
1142 prev_fi = fi; 1163 prev_fi = fi;
1143 alive = 0; 1164 alive = 0;
1144 change_nexthops(fi) { 1165 change_nexthops(fi) {
1145 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1166 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
1146 alive++; 1167 alive++;
1147 continue; 1168 continue;
1148 } 1169 }
1149 if (nexthop_nh->nh_dev == NULL || 1170 if (nexthop_nh->nh_dev == NULL ||
1150 !(nexthop_nh->nh_dev->flags&IFF_UP)) 1171 !(nexthop_nh->nh_dev->flags & IFF_UP))
1151 continue; 1172 continue;
1152 if (nexthop_nh->nh_dev != dev || 1173 if (nexthop_nh->nh_dev != dev ||
1153 !__in_dev_get_rtnl(dev)) 1174 !__in_dev_get_rtnl(dev))
@@ -1169,10 +1190,9 @@ int fib_sync_up(struct net_device *dev)
1169} 1190}
1170 1191
1171/* 1192/*
1172 The algorithm is suboptimal, but it provides really 1193 * The algorithm is suboptimal, but it provides really
1173 fair weighted route distribution. 1194 * fair weighted route distribution.
1174 */ 1195 */
1175
1176void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1196void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1177{ 1197{
1178 struct fib_info *fi = res->fi; 1198 struct fib_info *fi = res->fi;
@@ -1182,7 +1202,7 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1182 if (fi->fib_power <= 0) { 1202 if (fi->fib_power <= 0) {
1183 int power = 0; 1203 int power = 0;
1184 change_nexthops(fi) { 1204 change_nexthops(fi) {
1185 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1205 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
1186 power += nexthop_nh->nh_weight; 1206 power += nexthop_nh->nh_weight;
1187 nexthop_nh->nh_power = nexthop_nh->nh_weight; 1207 nexthop_nh->nh_power = nexthop_nh->nh_weight;
1188 } 1208 }
@@ -1198,15 +1218,16 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1198 1218
1199 1219
1200 /* w should be random number [0..fi->fib_power-1], 1220 /* w should be random number [0..fi->fib_power-1],
1201 it is pretty bad approximation. 1221 * it is pretty bad approximation.
1202 */ 1222 */
1203 1223
1204 w = jiffies % fi->fib_power; 1224 w = jiffies % fi->fib_power;
1205 1225
1206 change_nexthops(fi) { 1226 change_nexthops(fi) {
1207 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && 1227 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
1208 nexthop_nh->nh_power) { 1228 nexthop_nh->nh_power) {
1209 if ((w -= nexthop_nh->nh_power) <= 0) { 1229 w -= nexthop_nh->nh_power;
1230 if (w <= 0) {
1210 nexthop_nh->nh_power--; 1231 nexthop_nh->nh_power--;
1211 fi->fib_power--; 1232 fi->fib_power--;
1212 res->nh_sel = nhsel; 1233 res->nh_sel = nhsel;