diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/ipv4/fib_frontend.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'net/ipv4/fib_frontend.c')
-rw-r--r-- | net/ipv4/fib_frontend.c | 329 |
1 files changed, 186 insertions, 143 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7d02a9f999fa..22524716fe70 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <net/arp.h> | 44 | #include <net/arp.h> |
45 | #include <net/ip_fib.h> | 45 | #include <net/ip_fib.h> |
46 | #include <net/rtnetlink.h> | 46 | #include <net/rtnetlink.h> |
47 | #include <net/xfrm.h> | ||
47 | 48 | ||
48 | #ifndef CONFIG_IP_MULTIPLE_TABLES | 49 | #ifndef CONFIG_IP_MULTIPLE_TABLES |
49 | 50 | ||
@@ -51,11 +52,11 @@ static int __net_init fib4_rules_init(struct net *net) | |||
51 | { | 52 | { |
52 | struct fib_table *local_table, *main_table; | 53 | struct fib_table *local_table, *main_table; |
53 | 54 | ||
54 | local_table = fib_hash_table(RT_TABLE_LOCAL); | 55 | local_table = fib_trie_table(RT_TABLE_LOCAL); |
55 | if (local_table == NULL) | 56 | if (local_table == NULL) |
56 | return -ENOMEM; | 57 | return -ENOMEM; |
57 | 58 | ||
58 | main_table = fib_hash_table(RT_TABLE_MAIN); | 59 | main_table = fib_trie_table(RT_TABLE_MAIN); |
59 | if (main_table == NULL) | 60 | if (main_table == NULL) |
60 | goto fail; | 61 | goto fail; |
61 | 62 | ||
@@ -82,7 +83,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) | |||
82 | if (tb) | 83 | if (tb) |
83 | return tb; | 84 | return tb; |
84 | 85 | ||
85 | tb = fib_hash_table(id); | 86 | tb = fib_trie_table(id); |
86 | if (!tb) | 87 | if (!tb) |
87 | return NULL; | 88 | return NULL; |
88 | h = id & (FIB_TABLE_HASHSZ - 1); | 89 | h = id & (FIB_TABLE_HASHSZ - 1); |
@@ -114,21 +115,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id) | |||
114 | } | 115 | } |
115 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | 116 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ |
116 | 117 | ||
117 | void fib_select_default(struct net *net, | ||
118 | const struct flowi *flp, struct fib_result *res) | ||
119 | { | ||
120 | struct fib_table *tb; | ||
121 | int table = RT_TABLE_MAIN; | ||
122 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
123 | if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) | ||
124 | return; | ||
125 | table = res->r->table; | ||
126 | #endif | ||
127 | tb = fib_get_table(net, table); | ||
128 | if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | ||
129 | fib_table_select_default(tb, flp, res); | ||
130 | } | ||
131 | |||
132 | static void fib_flush(struct net *net) | 118 | static void fib_flush(struct net *net) |
133 | { | 119 | { |
134 | int flushed = 0; | 120 | int flushed = 0; |
@@ -148,36 +134,6 @@ static void fib_flush(struct net *net) | |||
148 | } | 134 | } |
149 | 135 | ||
150 | /* | 136 | /* |
151 | * Find the first device with a given source address. | ||
152 | */ | ||
153 | |||
154 | struct net_device * ip_dev_find(struct net *net, __be32 addr) | ||
155 | { | ||
156 | struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; | ||
157 | struct fib_result res; | ||
158 | struct net_device *dev = NULL; | ||
159 | struct fib_table *local_table; | ||
160 | |||
161 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
162 | res.r = NULL; | ||
163 | #endif | ||
164 | |||
165 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | ||
166 | if (!local_table || fib_table_lookup(local_table, &fl, &res)) | ||
167 | return NULL; | ||
168 | if (res.type != RTN_LOCAL) | ||
169 | goto out; | ||
170 | dev = FIB_RES_DEV(res); | ||
171 | |||
172 | if (dev) | ||
173 | dev_hold(dev); | ||
174 | out: | ||
175 | fib_res_put(&res); | ||
176 | return dev; | ||
177 | } | ||
178 | EXPORT_SYMBOL(ip_dev_find); | ||
179 | |||
180 | /* | ||
181 | * Find address type as if only "dev" was present in the system. If | 137 | * Find address type as if only "dev" was present in the system. If |
182 | * on_dev is NULL then all interfaces are taken into consideration. | 138 | * on_dev is NULL then all interfaces are taken into consideration. |
183 | */ | 139 | */ |
@@ -185,7 +141,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
185 | const struct net_device *dev, | 141 | const struct net_device *dev, |
186 | __be32 addr) | 142 | __be32 addr) |
187 | { | 143 | { |
188 | struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; | 144 | struct flowi4 fl4 = { .daddr = addr }; |
189 | struct fib_result res; | 145 | struct fib_result res; |
190 | unsigned ret = RTN_BROADCAST; | 146 | unsigned ret = RTN_BROADCAST; |
191 | struct fib_table *local_table; | 147 | struct fib_table *local_table; |
@@ -202,11 +158,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
202 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | 158 | local_table = fib_get_table(net, RT_TABLE_LOCAL); |
203 | if (local_table) { | 159 | if (local_table) { |
204 | ret = RTN_UNICAST; | 160 | ret = RTN_UNICAST; |
205 | if (!fib_table_lookup(local_table, &fl, &res)) { | 161 | rcu_read_lock(); |
162 | if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { | ||
206 | if (!dev || dev == res.fi->fib_dev) | 163 | if (!dev || dev == res.fi->fib_dev) |
207 | ret = res.type; | 164 | ret = res.type; |
208 | fib_res_put(&res); | ||
209 | } | 165 | } |
166 | rcu_read_unlock(); | ||
210 | } | 167 | } |
211 | return ret; | 168 | return ret; |
212 | } | 169 | } |
@@ -220,59 +177,60 @@ EXPORT_SYMBOL(inet_addr_type); | |||
220 | unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, | 177 | unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, |
221 | __be32 addr) | 178 | __be32 addr) |
222 | { | 179 | { |
223 | return __inet_dev_addr_type(net, dev, addr); | 180 | return __inet_dev_addr_type(net, dev, addr); |
224 | } | 181 | } |
225 | EXPORT_SYMBOL(inet_dev_addr_type); | 182 | EXPORT_SYMBOL(inet_dev_addr_type); |
226 | 183 | ||
227 | /* Given (packet source, input interface) and optional (dst, oif, tos): | 184 | /* Given (packet source, input interface) and optional (dst, oif, tos): |
228 | - (main) check, that source is valid i.e. not broadcast or our local | 185 | * - (main) check, that source is valid i.e. not broadcast or our local |
229 | address. | 186 | * address. |
230 | - figure out what "logical" interface this packet arrived | 187 | * - figure out what "logical" interface this packet arrived |
231 | and calculate "specific destination" address. | 188 | * and calculate "specific destination" address. |
232 | - check, that packet arrived from expected physical interface. | 189 | * - check, that packet arrived from expected physical interface. |
190 | * called with rcu_read_lock() | ||
233 | */ | 191 | */ |
234 | 192 | int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, | |
235 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | 193 | int oif, struct net_device *dev, __be32 *spec_dst, |
236 | struct net_device *dev, __be32 *spec_dst, | 194 | u32 *itag) |
237 | u32 *itag, u32 mark) | ||
238 | { | 195 | { |
239 | struct in_device *in_dev; | 196 | struct in_device *in_dev; |
240 | struct flowi fl = { .nl_u = { .ip4_u = | 197 | struct flowi4 fl4; |
241 | { .daddr = src, | ||
242 | .saddr = dst, | ||
243 | .tos = tos } }, | ||
244 | .mark = mark, | ||
245 | .iif = oif }; | ||
246 | |||
247 | struct fib_result res; | 198 | struct fib_result res; |
248 | int no_addr, rpf, accept_local; | 199 | int no_addr, rpf, accept_local; |
249 | bool dev_match; | 200 | bool dev_match; |
250 | int ret; | 201 | int ret; |
251 | struct net *net; | 202 | struct net *net; |
252 | 203 | ||
204 | fl4.flowi4_oif = 0; | ||
205 | fl4.flowi4_iif = oif; | ||
206 | fl4.daddr = src; | ||
207 | fl4.saddr = dst; | ||
208 | fl4.flowi4_tos = tos; | ||
209 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
210 | |||
253 | no_addr = rpf = accept_local = 0; | 211 | no_addr = rpf = accept_local = 0; |
254 | rcu_read_lock(); | ||
255 | in_dev = __in_dev_get_rcu(dev); | 212 | in_dev = __in_dev_get_rcu(dev); |
256 | if (in_dev) { | 213 | if (in_dev) { |
257 | no_addr = in_dev->ifa_list == NULL; | 214 | no_addr = in_dev->ifa_list == NULL; |
258 | rpf = IN_DEV_RPFILTER(in_dev); | 215 | |
216 | /* Ignore rp_filter for packets protected by IPsec. */ | ||
217 | rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); | ||
218 | |||
259 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); | 219 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); |
260 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | 220 | fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; |
261 | fl.mark = 0; | ||
262 | } | 221 | } |
263 | rcu_read_unlock(); | ||
264 | 222 | ||
265 | if (in_dev == NULL) | 223 | if (in_dev == NULL) |
266 | goto e_inval; | 224 | goto e_inval; |
267 | 225 | ||
268 | net = dev_net(dev); | 226 | net = dev_net(dev); |
269 | if (fib_lookup(net, &fl, &res)) | 227 | if (fib_lookup(net, &fl4, &res)) |
270 | goto last_resort; | 228 | goto last_resort; |
271 | if (res.type != RTN_UNICAST) { | 229 | if (res.type != RTN_UNICAST) { |
272 | if (res.type != RTN_LOCAL || !accept_local) | 230 | if (res.type != RTN_LOCAL || !accept_local) |
273 | goto e_inval_res; | 231 | goto e_inval; |
274 | } | 232 | } |
275 | *spec_dst = FIB_RES_PREFSRC(res); | 233 | *spec_dst = FIB_RES_PREFSRC(net, res); |
276 | fib_combine_itag(itag, &res); | 234 | fib_combine_itag(itag, &res); |
277 | dev_match = false; | 235 | dev_match = false; |
278 | 236 | ||
@@ -291,23 +249,20 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
291 | #endif | 249 | #endif |
292 | if (dev_match) { | 250 | if (dev_match) { |
293 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 251 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
294 | fib_res_put(&res); | ||
295 | return ret; | 252 | return ret; |
296 | } | 253 | } |
297 | fib_res_put(&res); | ||
298 | if (no_addr) | 254 | if (no_addr) |
299 | goto last_resort; | 255 | goto last_resort; |
300 | if (rpf == 1) | 256 | if (rpf == 1) |
301 | goto e_rpf; | 257 | goto e_rpf; |
302 | fl.oif = dev->ifindex; | 258 | fl4.flowi4_oif = dev->ifindex; |
303 | 259 | ||
304 | ret = 0; | 260 | ret = 0; |
305 | if (fib_lookup(net, &fl, &res) == 0) { | 261 | if (fib_lookup(net, &fl4, &res) == 0) { |
306 | if (res.type == RTN_UNICAST) { | 262 | if (res.type == RTN_UNICAST) { |
307 | *spec_dst = FIB_RES_PREFSRC(res); | 263 | *spec_dst = FIB_RES_PREFSRC(net, res); |
308 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 264 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
309 | } | 265 | } |
310 | fib_res_put(&res); | ||
311 | } | 266 | } |
312 | return ret; | 267 | return ret; |
313 | 268 | ||
@@ -318,8 +273,6 @@ last_resort: | |||
318 | *itag = 0; | 273 | *itag = 0; |
319 | return 0; | 274 | return 0; |
320 | 275 | ||
321 | e_inval_res: | ||
322 | fib_res_put(&res); | ||
323 | e_inval: | 276 | e_inval: |
324 | return -EINVAL; | 277 | return -EINVAL; |
325 | e_rpf: | 278 | e_rpf: |
@@ -472,9 +425,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, | |||
472 | } | 425 | } |
473 | 426 | ||
474 | /* | 427 | /* |
475 | * Handle IP routing ioctl calls. These are used to manipulate the routing tables | 428 | * Handle IP routing ioctl calls. |
429 | * These are used to manipulate the routing tables | ||
476 | */ | 430 | */ |
477 | |||
478 | int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) | 431 | int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) |
479 | { | 432 | { |
480 | struct fib_config cfg; | 433 | struct fib_config cfg; |
@@ -518,7 +471,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
518 | return -EINVAL; | 471 | return -EINVAL; |
519 | } | 472 | } |
520 | 473 | ||
521 | const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { | 474 | const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { |
522 | [RTA_DST] = { .type = NLA_U32 }, | 475 | [RTA_DST] = { .type = NLA_U32 }, |
523 | [RTA_SRC] = { .type = NLA_U32 }, | 476 | [RTA_SRC] = { .type = NLA_U32 }, |
524 | [RTA_IIF] = { .type = NLA_U32 }, | 477 | [RTA_IIF] = { .type = NLA_U32 }, |
@@ -532,7 +485,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { | |||
532 | }; | 485 | }; |
533 | 486 | ||
534 | static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, | 487 | static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, |
535 | struct nlmsghdr *nlh, struct fib_config *cfg) | 488 | struct nlmsghdr *nlh, struct fib_config *cfg) |
536 | { | 489 | { |
537 | struct nlattr *attr; | 490 | struct nlattr *attr; |
538 | int err, remaining; | 491 | int err, remaining; |
@@ -687,12 +640,11 @@ out: | |||
687 | } | 640 | } |
688 | 641 | ||
689 | /* Prepare and feed intra-kernel routing request. | 642 | /* Prepare and feed intra-kernel routing request. |
690 | Really, it should be netlink message, but :-( netlink | 643 | * Really, it should be netlink message, but :-( netlink |
691 | can be not configured, so that we feed it directly | 644 | * can be not configured, so that we feed it directly |
692 | to fib engine. It is legal, because all events occur | 645 | * to fib engine. It is legal, because all events occur |
693 | only when netlink is already locked. | 646 | * only when netlink is already locked. |
694 | */ | 647 | */ |
695 | |||
696 | static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) | 648 | static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) |
697 | { | 649 | { |
698 | struct net *net = dev_net(ifa->ifa_dev->dev); | 650 | struct net *net = dev_net(ifa->ifa_dev->dev); |
@@ -738,9 +690,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) | |||
738 | struct in_ifaddr *prim = ifa; | 690 | struct in_ifaddr *prim = ifa; |
739 | __be32 mask = ifa->ifa_mask; | 691 | __be32 mask = ifa->ifa_mask; |
740 | __be32 addr = ifa->ifa_local; | 692 | __be32 addr = ifa->ifa_local; |
741 | __be32 prefix = ifa->ifa_address&mask; | 693 | __be32 prefix = ifa->ifa_address & mask; |
742 | 694 | ||
743 | if (ifa->ifa_flags&IFA_F_SECONDARY) { | 695 | if (ifa->ifa_flags & IFA_F_SECONDARY) { |
744 | prim = inet_ifa_byprefix(in_dev, prefix, mask); | 696 | prim = inet_ifa_byprefix(in_dev, prefix, mask); |
745 | if (prim == NULL) { | 697 | if (prim == NULL) { |
746 | printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); | 698 | printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); |
@@ -750,58 +702,118 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) | |||
750 | 702 | ||
751 | fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); | 703 | fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); |
752 | 704 | ||
753 | if (!(dev->flags&IFF_UP)) | 705 | if (!(dev->flags & IFF_UP)) |
754 | return; | 706 | return; |
755 | 707 | ||
756 | /* Add broadcast address, if it is explicitly assigned. */ | 708 | /* Add broadcast address, if it is explicitly assigned. */ |
757 | if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) | 709 | if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) |
758 | fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); | 710 | fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); |
759 | 711 | ||
760 | if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && | 712 | if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && |
761 | (prefix != addr || ifa->ifa_prefixlen < 32)) { | 713 | (prefix != addr || ifa->ifa_prefixlen < 32)) { |
762 | fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : | 714 | fib_magic(RTM_NEWROUTE, |
763 | RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); | 715 | dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, |
716 | prefix, ifa->ifa_prefixlen, prim); | ||
764 | 717 | ||
765 | /* Add network specific broadcasts, when it takes a sense */ | 718 | /* Add network specific broadcasts, when it takes a sense */ |
766 | if (ifa->ifa_prefixlen < 31) { | 719 | if (ifa->ifa_prefixlen < 31) { |
767 | fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); | 720 | fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); |
768 | fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); | 721 | fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, |
722 | 32, prim); | ||
769 | } | 723 | } |
770 | } | 724 | } |
771 | } | 725 | } |
772 | 726 | ||
773 | static void fib_del_ifaddr(struct in_ifaddr *ifa) | 727 | /* Delete primary or secondary address. |
728 | * Optionally, on secondary address promotion consider the addresses | ||
729 | * from subnet iprim as deleted, even if they are in device list. | ||
730 | * In this case the secondary ifa can be in device list. | ||
731 | */ | ||
732 | void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) | ||
774 | { | 733 | { |
775 | struct in_device *in_dev = ifa->ifa_dev; | 734 | struct in_device *in_dev = ifa->ifa_dev; |
776 | struct net_device *dev = in_dev->dev; | 735 | struct net_device *dev = in_dev->dev; |
777 | struct in_ifaddr *ifa1; | 736 | struct in_ifaddr *ifa1; |
778 | struct in_ifaddr *prim = ifa; | 737 | struct in_ifaddr *prim = ifa, *prim1 = NULL; |
779 | __be32 brd = ifa->ifa_address|~ifa->ifa_mask; | 738 | __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; |
780 | __be32 any = ifa->ifa_address&ifa->ifa_mask; | 739 | __be32 any = ifa->ifa_address & ifa->ifa_mask; |
781 | #define LOCAL_OK 1 | 740 | #define LOCAL_OK 1 |
782 | #define BRD_OK 2 | 741 | #define BRD_OK 2 |
783 | #define BRD0_OK 4 | 742 | #define BRD0_OK 4 |
784 | #define BRD1_OK 8 | 743 | #define BRD1_OK 8 |
785 | unsigned ok = 0; | 744 | unsigned ok = 0; |
745 | int subnet = 0; /* Primary network */ | ||
746 | int gone = 1; /* Address is missing */ | ||
747 | int same_prefsrc = 0; /* Another primary with same IP */ | ||
786 | 748 | ||
787 | if (!(ifa->ifa_flags&IFA_F_SECONDARY)) | 749 | if (ifa->ifa_flags & IFA_F_SECONDARY) { |
788 | fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : | ||
789 | RTN_UNICAST, any, ifa->ifa_prefixlen, prim); | ||
790 | else { | ||
791 | prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); | 750 | prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); |
792 | if (prim == NULL) { | 751 | if (prim == NULL) { |
793 | printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); | 752 | printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); |
794 | return; | 753 | return; |
795 | } | 754 | } |
755 | if (iprim && iprim != prim) { | ||
756 | printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n"); | ||
757 | return; | ||
758 | } | ||
759 | } else if (!ipv4_is_zeronet(any) && | ||
760 | (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { | ||
761 | fib_magic(RTM_DELROUTE, | ||
762 | dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, | ||
763 | any, ifa->ifa_prefixlen, prim); | ||
764 | subnet = 1; | ||
796 | } | 765 | } |
797 | 766 | ||
798 | /* Deletion is more complicated than add. | 767 | /* Deletion is more complicated than add. |
799 | We should take care of not to delete too much :-) | 768 | * We should take care of not to delete too much :-) |
800 | 769 | * | |
801 | Scan address list to be sure that addresses are really gone. | 770 | * Scan address list to be sure that addresses are really gone. |
802 | */ | 771 | */ |
803 | 772 | ||
804 | for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { | 773 | for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { |
774 | if (ifa1 == ifa) { | ||
775 | /* promotion, keep the IP */ | ||
776 | gone = 0; | ||
777 | continue; | ||
778 | } | ||
779 | /* Ignore IFAs from our subnet */ | ||
780 | if (iprim && ifa1->ifa_mask == iprim->ifa_mask && | ||
781 | inet_ifa_match(ifa1->ifa_address, iprim)) | ||
782 | continue; | ||
783 | |||
784 | /* Ignore ifa1 if it uses different primary IP (prefsrc) */ | ||
785 | if (ifa1->ifa_flags & IFA_F_SECONDARY) { | ||
786 | /* Another address from our subnet? */ | ||
787 | if (ifa1->ifa_mask == prim->ifa_mask && | ||
788 | inet_ifa_match(ifa1->ifa_address, prim)) | ||
789 | prim1 = prim; | ||
790 | else { | ||
791 | /* We reached the secondaries, so | ||
792 | * same_prefsrc should be determined. | ||
793 | */ | ||
794 | if (!same_prefsrc) | ||
795 | continue; | ||
796 | /* Search new prim1 if ifa1 is not | ||
797 | * using the current prim1 | ||
798 | */ | ||
799 | if (!prim1 || | ||
800 | ifa1->ifa_mask != prim1->ifa_mask || | ||
801 | !inet_ifa_match(ifa1->ifa_address, prim1)) | ||
802 | prim1 = inet_ifa_byprefix(in_dev, | ||
803 | ifa1->ifa_address, | ||
804 | ifa1->ifa_mask); | ||
805 | if (!prim1) | ||
806 | continue; | ||
807 | if (prim1->ifa_local != prim->ifa_local) | ||
808 | continue; | ||
809 | } | ||
810 | } else { | ||
811 | if (prim->ifa_local != ifa1->ifa_local) | ||
812 | continue; | ||
813 | prim1 = ifa1; | ||
814 | if (prim != prim1) | ||
815 | same_prefsrc = 1; | ||
816 | } | ||
805 | if (ifa->ifa_local == ifa1->ifa_local) | 817 | if (ifa->ifa_local == ifa1->ifa_local) |
806 | ok |= LOCAL_OK; | 818 | ok |= LOCAL_OK; |
807 | if (ifa->ifa_broadcast == ifa1->ifa_broadcast) | 819 | if (ifa->ifa_broadcast == ifa1->ifa_broadcast) |
@@ -810,25 +822,43 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) | |||
810 | ok |= BRD1_OK; | 822 | ok |= BRD1_OK; |
811 | if (any == ifa1->ifa_broadcast) | 823 | if (any == ifa1->ifa_broadcast) |
812 | ok |= BRD0_OK; | 824 | ok |= BRD0_OK; |
825 | /* primary has network specific broadcasts */ | ||
826 | if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) { | ||
827 | __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask; | ||
828 | __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask; | ||
829 | |||
830 | if (!ipv4_is_zeronet(any1)) { | ||
831 | if (ifa->ifa_broadcast == brd1 || | ||
832 | ifa->ifa_broadcast == any1) | ||
833 | ok |= BRD_OK; | ||
834 | if (brd == brd1 || brd == any1) | ||
835 | ok |= BRD1_OK; | ||
836 | if (any == brd1 || any == any1) | ||
837 | ok |= BRD0_OK; | ||
838 | } | ||
839 | } | ||
813 | } | 840 | } |
814 | 841 | ||
815 | if (!(ok&BRD_OK)) | 842 | if (!(ok & BRD_OK)) |
816 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); | 843 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); |
817 | if (!(ok&BRD1_OK)) | 844 | if (subnet && ifa->ifa_prefixlen < 31) { |
818 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); | 845 | if (!(ok & BRD1_OK)) |
819 | if (!(ok&BRD0_OK)) | 846 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); |
820 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); | 847 | if (!(ok & BRD0_OK)) |
821 | if (!(ok&LOCAL_OK)) { | 848 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); |
849 | } | ||
850 | if (!(ok & LOCAL_OK)) { | ||
822 | fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); | 851 | fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); |
823 | 852 | ||
824 | /* Check, that this local address finally disappeared. */ | 853 | /* Check, that this local address finally disappeared. */ |
825 | if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { | 854 | if (gone && |
855 | inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { | ||
826 | /* And the last, but not the least thing. | 856 | /* And the last, but not the least thing. |
827 | We must flush stray FIB entries. | 857 | * We must flush stray FIB entries. |
828 | 858 | * | |
829 | First of all, we scan fib_info list searching | 859 | * First of all, we scan fib_info list searching |
830 | for stray nexthop entries, then ignite fib_flush. | 860 | * for stray nexthop entries, then ignite fib_flush. |
831 | */ | 861 | */ |
832 | if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) | 862 | if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) |
833 | fib_flush(dev_net(dev)); | 863 | fib_flush(dev_net(dev)); |
834 | } | 864 | } |
@@ -839,14 +869,16 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) | |||
839 | #undef BRD1_OK | 869 | #undef BRD1_OK |
840 | } | 870 | } |
841 | 871 | ||
842 | static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) | 872 | static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) |
843 | { | 873 | { |
844 | 874 | ||
845 | struct fib_result res; | 875 | struct fib_result res; |
846 | struct flowi fl = { .mark = frn->fl_mark, | 876 | struct flowi4 fl4 = { |
847 | .nl_u = { .ip4_u = { .daddr = frn->fl_addr, | 877 | .flowi4_mark = frn->fl_mark, |
848 | .tos = frn->fl_tos, | 878 | .daddr = frn->fl_addr, |
849 | .scope = frn->fl_scope } } }; | 879 | .flowi4_tos = frn->fl_tos, |
880 | .flowi4_scope = frn->fl_scope, | ||
881 | }; | ||
850 | 882 | ||
851 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 883 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
852 | res.r = NULL; | 884 | res.r = NULL; |
@@ -857,15 +889,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) | |||
857 | local_bh_disable(); | 889 | local_bh_disable(); |
858 | 890 | ||
859 | frn->tb_id = tb->tb_id; | 891 | frn->tb_id = tb->tb_id; |
860 | frn->err = fib_table_lookup(tb, &fl, &res); | 892 | rcu_read_lock(); |
893 | frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); | ||
861 | 894 | ||
862 | if (!frn->err) { | 895 | if (!frn->err) { |
863 | frn->prefixlen = res.prefixlen; | 896 | frn->prefixlen = res.prefixlen; |
864 | frn->nh_sel = res.nh_sel; | 897 | frn->nh_sel = res.nh_sel; |
865 | frn->type = res.type; | 898 | frn->type = res.type; |
866 | frn->scope = res.scope; | 899 | frn->scope = res.scope; |
867 | fib_res_put(&res); | ||
868 | } | 900 | } |
901 | rcu_read_unlock(); | ||
869 | local_bh_enable(); | 902 | local_bh_enable(); |
870 | } | 903 | } |
871 | } | 904 | } |
@@ -894,8 +927,8 @@ static void nl_fib_input(struct sk_buff *skb) | |||
894 | 927 | ||
895 | nl_fib_lookup(frn, tb); | 928 | nl_fib_lookup(frn, tb); |
896 | 929 | ||
897 | pid = NETLINK_CB(skb).pid; /* pid of sending process */ | 930 | pid = NETLINK_CB(skb).pid; /* pid of sending process */ |
898 | NETLINK_CB(skb).pid = 0; /* from kernel */ | 931 | NETLINK_CB(skb).pid = 0; /* from kernel */ |
899 | NETLINK_CB(skb).dst_group = 0; /* unicast */ | 932 | NETLINK_CB(skb).dst_group = 0; /* unicast */ |
900 | netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); | 933 | netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); |
901 | } | 934 | } |
@@ -929,6 +962,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
929 | { | 962 | { |
930 | struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; | 963 | struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; |
931 | struct net_device *dev = ifa->ifa_dev->dev; | 964 | struct net_device *dev = ifa->ifa_dev->dev; |
965 | struct net *net = dev_net(dev); | ||
932 | 966 | ||
933 | switch (event) { | 967 | switch (event) { |
934 | case NETDEV_UP: | 968 | case NETDEV_UP: |
@@ -936,13 +970,15 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
936 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 970 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
937 | fib_sync_up(dev); | 971 | fib_sync_up(dev); |
938 | #endif | 972 | #endif |
973 | atomic_inc(&net->ipv4.dev_addr_genid); | ||
939 | rt_cache_flush(dev_net(dev), -1); | 974 | rt_cache_flush(dev_net(dev), -1); |
940 | break; | 975 | break; |
941 | case NETDEV_DOWN: | 976 | case NETDEV_DOWN: |
942 | fib_del_ifaddr(ifa); | 977 | fib_del_ifaddr(ifa, NULL); |
978 | atomic_inc(&net->ipv4.dev_addr_genid); | ||
943 | if (ifa->ifa_dev->ifa_list == NULL) { | 979 | if (ifa->ifa_dev->ifa_list == NULL) { |
944 | /* Last address was deleted from this interface. | 980 | /* Last address was deleted from this interface. |
945 | Disable IP. | 981 | * Disable IP. |
946 | */ | 982 | */ |
947 | fib_disable_ip(dev, 1, 0); | 983 | fib_disable_ip(dev, 1, 0); |
948 | } else { | 984 | } else { |
@@ -957,6 +993,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
957 | { | 993 | { |
958 | struct net_device *dev = ptr; | 994 | struct net_device *dev = ptr; |
959 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 995 | struct in_device *in_dev = __in_dev_get_rtnl(dev); |
996 | struct net *net = dev_net(dev); | ||
960 | 997 | ||
961 | if (event == NETDEV_UNREGISTER) { | 998 | if (event == NETDEV_UNREGISTER) { |
962 | fib_disable_ip(dev, 2, -1); | 999 | fib_disable_ip(dev, 2, -1); |
@@ -974,6 +1011,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
974 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1011 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
975 | fib_sync_up(dev); | 1012 | fib_sync_up(dev); |
976 | #endif | 1013 | #endif |
1014 | atomic_inc(&net->ipv4.dev_addr_genid); | ||
977 | rt_cache_flush(dev_net(dev), -1); | 1015 | rt_cache_flush(dev_net(dev), -1); |
978 | break; | 1016 | break; |
979 | case NETDEV_DOWN: | 1017 | case NETDEV_DOWN: |
@@ -984,7 +1022,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
984 | rt_cache_flush(dev_net(dev), 0); | 1022 | rt_cache_flush(dev_net(dev), 0); |
985 | break; | 1023 | break; |
986 | case NETDEV_UNREGISTER_BATCH: | 1024 | case NETDEV_UNREGISTER_BATCH: |
987 | rt_cache_flush_batch(); | 1025 | /* The batch unregister is only called on the first |
1026 | * device in the list of devices being unregistered. | ||
1027 | * Therefore we should not pass dev_net(dev) in here. | ||
1028 | */ | ||
1029 | rt_cache_flush_batch(NULL); | ||
988 | break; | 1030 | break; |
989 | } | 1031 | } |
990 | return NOTIFY_DONE; | 1032 | return NOTIFY_DONE; |
@@ -1001,16 +1043,15 @@ static struct notifier_block fib_netdev_notifier = { | |||
1001 | static int __net_init ip_fib_net_init(struct net *net) | 1043 | static int __net_init ip_fib_net_init(struct net *net) |
1002 | { | 1044 | { |
1003 | int err; | 1045 | int err; |
1004 | unsigned int i; | 1046 | size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; |
1047 | |||
1048 | /* Avoid false sharing : Use at least a full cache line */ | ||
1049 | size = max_t(size_t, size, L1_CACHE_BYTES); | ||
1005 | 1050 | ||
1006 | net->ipv4.fib_table_hash = kzalloc( | 1051 | net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); |
1007 | sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); | ||
1008 | if (net->ipv4.fib_table_hash == NULL) | 1052 | if (net->ipv4.fib_table_hash == NULL) |
1009 | return -ENOMEM; | 1053 | return -ENOMEM; |
1010 | 1054 | ||
1011 | for (i = 0; i < FIB_TABLE_HASHSZ; i++) | ||
1012 | INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); | ||
1013 | |||
1014 | err = fib4_rules_init(net); | 1055 | err = fib4_rules_init(net); |
1015 | if (err < 0) | 1056 | if (err < 0) |
1016 | goto fail; | 1057 | goto fail; |
@@ -1029,6 +1070,7 @@ static void ip_fib_net_exit(struct net *net) | |||
1029 | fib4_rules_exit(net); | 1070 | fib4_rules_exit(net); |
1030 | #endif | 1071 | #endif |
1031 | 1072 | ||
1073 | rtnl_lock(); | ||
1032 | for (i = 0; i < FIB_TABLE_HASHSZ; i++) { | 1074 | for (i = 0; i < FIB_TABLE_HASHSZ; i++) { |
1033 | struct fib_table *tb; | 1075 | struct fib_table *tb; |
1034 | struct hlist_head *head; | 1076 | struct hlist_head *head; |
@@ -1038,9 +1080,10 @@ static void ip_fib_net_exit(struct net *net) | |||
1038 | hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { | 1080 | hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { |
1039 | hlist_del(node); | 1081 | hlist_del(node); |
1040 | fib_table_flush(tb); | 1082 | fib_table_flush(tb); |
1041 | kfree(tb); | 1083 | fib_free_table(tb); |
1042 | } | 1084 | } |
1043 | } | 1085 | } |
1086 | rtnl_unlock(); | ||
1044 | kfree(net->ipv4.fib_table_hash); | 1087 | kfree(net->ipv4.fib_table_hash); |
1045 | } | 1088 | } |
1046 | 1089 | ||
@@ -1089,5 +1132,5 @@ void __init ip_fib_init(void) | |||
1089 | register_netdevice_notifier(&fib_netdev_notifier); | 1132 | register_netdevice_notifier(&fib_netdev_notifier); |
1090 | register_inetaddr_notifier(&fib_inetaddr_notifier); | 1133 | register_inetaddr_notifier(&fib_inetaddr_notifier); |
1091 | 1134 | ||
1092 | fib_hash_init(); | 1135 | fib_trie_init(); |
1093 | } | 1136 | } |