aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/fib_frontend.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/ipv4/fib_frontend.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/ipv4/fib_frontend.c')
-rw-r--r--net/ipv4/fib_frontend.c329
1 files changed, 186 insertions, 143 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7d02a9f999fa..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
44#include <net/arp.h> 44#include <net/arp.h>
45#include <net/ip_fib.h> 45#include <net/ip_fib.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/xfrm.h>
47 48
48#ifndef CONFIG_IP_MULTIPLE_TABLES 49#ifndef CONFIG_IP_MULTIPLE_TABLES
49 50
@@ -51,11 +52,11 @@ static int __net_init fib4_rules_init(struct net *net)
51{ 52{
52 struct fib_table *local_table, *main_table; 53 struct fib_table *local_table, *main_table;
53 54
54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 local_table = fib_trie_table(RT_TABLE_LOCAL);
55 if (local_table == NULL) 56 if (local_table == NULL)
56 return -ENOMEM; 57 return -ENOMEM;
57 58
58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 main_table = fib_trie_table(RT_TABLE_MAIN);
59 if (main_table == NULL) 60 if (main_table == NULL)
60 goto fail; 61 goto fail;
61 62
@@ -82,7 +83,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
82 if (tb) 83 if (tb)
83 return tb; 84 return tb;
84 85
85 tb = fib_hash_table(id); 86 tb = fib_trie_table(id);
86 if (!tb) 87 if (!tb)
87 return NULL; 88 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1); 89 h = id & (FIB_TABLE_HASHSZ - 1);
@@ -114,21 +115,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
114} 115}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 116#endif /* CONFIG_IP_MULTIPLE_TABLES */
116 117
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net) 118static void fib_flush(struct net *net)
133{ 119{
134 int flushed = 0; 120 int flushed = 0;
@@ -148,36 +134,6 @@ static void fib_flush(struct net *net)
148} 134}
149 135
150/* 136/*
151 * Find the first device with a given source address.
152 */
153
154struct net_device * ip_dev_find(struct net *net, __be32 addr)
155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
159 struct fib_table *local_table;
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
178EXPORT_SYMBOL(ip_dev_find);
179
180/*
181 * Find address type as if only "dev" was present in the system. If 137 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration. 138 * on_dev is NULL then all interfaces are taken into consideration.
183 */ 139 */
@@ -185,7 +141,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev, 141 const struct net_device *dev,
186 __be32 addr) 142 __be32 addr)
187{ 143{
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 144 struct flowi4 fl4 = { .daddr = addr };
189 struct fib_result res; 145 struct fib_result res;
190 unsigned ret = RTN_BROADCAST; 146 unsigned ret = RTN_BROADCAST;
191 struct fib_table *local_table; 147 struct fib_table *local_table;
@@ -202,11 +158,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
202 local_table = fib_get_table(net, RT_TABLE_LOCAL); 158 local_table = fib_get_table(net, RT_TABLE_LOCAL);
203 if (local_table) { 159 if (local_table) {
204 ret = RTN_UNICAST; 160 ret = RTN_UNICAST;
205 if (!fib_table_lookup(local_table, &fl, &res)) { 161 rcu_read_lock();
162 if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
206 if (!dev || dev == res.fi->fib_dev) 163 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type; 164 ret = res.type;
208 fib_res_put(&res);
209 } 165 }
166 rcu_read_unlock();
210 } 167 }
211 return ret; 168 return ret;
212} 169}
@@ -220,59 +177,60 @@ EXPORT_SYMBOL(inet_addr_type);
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 177unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr) 178 __be32 addr)
222{ 179{
223 return __inet_dev_addr_type(net, dev, addr); 180 return __inet_dev_addr_type(net, dev, addr);
224} 181}
225EXPORT_SYMBOL(inet_dev_addr_type); 182EXPORT_SYMBOL(inet_dev_addr_type);
226 183
227/* Given (packet source, input interface) and optional (dst, oif, tos): 184/* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local 185 * - (main) check, that source is valid i.e. not broadcast or our local
229 address. 186 * address.
230 - figure out what "logical" interface this packet arrived 187 * - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address. 188 * and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface. 189 * - check, that packet arrived from expected physical interface.
190 * called with rcu_read_lock()
233 */ 191 */
234 192int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 193 int oif, struct net_device *dev, __be32 *spec_dst,
236 struct net_device *dev, __be32 *spec_dst, 194 u32 *itag)
237 u32 *itag, u32 mark)
238{ 195{
239 struct in_device *in_dev; 196 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u = 197 struct flowi4 fl4;
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
244 .mark = mark,
245 .iif = oif };
246
247 struct fib_result res; 198 struct fib_result res;
248 int no_addr, rpf, accept_local; 199 int no_addr, rpf, accept_local;
249 bool dev_match; 200 bool dev_match;
250 int ret; 201 int ret;
251 struct net *net; 202 struct net *net;
252 203
204 fl4.flowi4_oif = 0;
205 fl4.flowi4_iif = oif;
206 fl4.daddr = src;
207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos;
209 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
210
253 no_addr = rpf = accept_local = 0; 211 no_addr = rpf = accept_local = 0;
254 rcu_read_lock();
255 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
256 if (in_dev) { 213 if (in_dev) {
257 no_addr = in_dev->ifa_list == NULL; 214 no_addr = in_dev->ifa_list == NULL;
258 rpf = IN_DEV_RPFILTER(in_dev); 215
216 /* Ignore rp_filter for packets protected by IPsec. */
217 rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
218
259 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 219 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
260 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 220 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
261 fl.mark = 0;
262 } 221 }
263 rcu_read_unlock();
264 222
265 if (in_dev == NULL) 223 if (in_dev == NULL)
266 goto e_inval; 224 goto e_inval;
267 225
268 net = dev_net(dev); 226 net = dev_net(dev);
269 if (fib_lookup(net, &fl, &res)) 227 if (fib_lookup(net, &fl4, &res))
270 goto last_resort; 228 goto last_resort;
271 if (res.type != RTN_UNICAST) { 229 if (res.type != RTN_UNICAST) {
272 if (res.type != RTN_LOCAL || !accept_local) 230 if (res.type != RTN_LOCAL || !accept_local)
273 goto e_inval_res; 231 goto e_inval;
274 } 232 }
275 *spec_dst = FIB_RES_PREFSRC(res); 233 *spec_dst = FIB_RES_PREFSRC(net, res);
276 fib_combine_itag(itag, &res); 234 fib_combine_itag(itag, &res);
277 dev_match = false; 235 dev_match = false;
278 236
@@ -291,23 +249,20 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
291#endif 249#endif
292 if (dev_match) { 250 if (dev_match) {
293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 251 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 fib_res_put(&res);
295 return ret; 252 return ret;
296 } 253 }
297 fib_res_put(&res);
298 if (no_addr) 254 if (no_addr)
299 goto last_resort; 255 goto last_resort;
300 if (rpf == 1) 256 if (rpf == 1)
301 goto e_rpf; 257 goto e_rpf;
302 fl.oif = dev->ifindex; 258 fl4.flowi4_oif = dev->ifindex;
303 259
304 ret = 0; 260 ret = 0;
305 if (fib_lookup(net, &fl, &res) == 0) { 261 if (fib_lookup(net, &fl4, &res) == 0) {
306 if (res.type == RTN_UNICAST) { 262 if (res.type == RTN_UNICAST) {
307 *spec_dst = FIB_RES_PREFSRC(res); 263 *spec_dst = FIB_RES_PREFSRC(net, res);
308 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 264 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
309 } 265 }
310 fib_res_put(&res);
311 } 266 }
312 return ret; 267 return ret;
313 268
@@ -318,8 +273,6 @@ last_resort:
318 *itag = 0; 273 *itag = 0;
319 return 0; 274 return 0;
320 275
321e_inval_res:
322 fib_res_put(&res);
323e_inval: 276e_inval:
324 return -EINVAL; 277 return -EINVAL;
325e_rpf: 278e_rpf:
@@ -472,9 +425,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
472} 425}
473 426
474/* 427/*
475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 428 * Handle IP routing ioctl calls.
429 * These are used to manipulate the routing tables
476 */ 430 */
477
478int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 431int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
479{ 432{
480 struct fib_config cfg; 433 struct fib_config cfg;
@@ -518,7 +471,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
518 return -EINVAL; 471 return -EINVAL;
519} 472}
520 473
521const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 474const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
522 [RTA_DST] = { .type = NLA_U32 }, 475 [RTA_DST] = { .type = NLA_U32 },
523 [RTA_SRC] = { .type = NLA_U32 }, 476 [RTA_SRC] = { .type = NLA_U32 },
524 [RTA_IIF] = { .type = NLA_U32 }, 477 [RTA_IIF] = { .type = NLA_U32 },
@@ -532,7 +485,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
532}; 485};
533 486
534static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 487static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
535 struct nlmsghdr *nlh, struct fib_config *cfg) 488 struct nlmsghdr *nlh, struct fib_config *cfg)
536{ 489{
537 struct nlattr *attr; 490 struct nlattr *attr;
538 int err, remaining; 491 int err, remaining;
@@ -687,12 +640,11 @@ out:
687} 640}
688 641
689/* Prepare and feed intra-kernel routing request. 642/* Prepare and feed intra-kernel routing request.
690 Really, it should be netlink message, but :-( netlink 643 * Really, it should be netlink message, but :-( netlink
691 can be not configured, so that we feed it directly 644 * can be not configured, so that we feed it directly
692 to fib engine. It is legal, because all events occur 645 * to fib engine. It is legal, because all events occur
693 only when netlink is already locked. 646 * only when netlink is already locked.
694 */ 647 */
695
696static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 648static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
697{ 649{
698 struct net *net = dev_net(ifa->ifa_dev->dev); 650 struct net *net = dev_net(ifa->ifa_dev->dev);
@@ -738,9 +690,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
738 struct in_ifaddr *prim = ifa; 690 struct in_ifaddr *prim = ifa;
739 __be32 mask = ifa->ifa_mask; 691 __be32 mask = ifa->ifa_mask;
740 __be32 addr = ifa->ifa_local; 692 __be32 addr = ifa->ifa_local;
741 __be32 prefix = ifa->ifa_address&mask; 693 __be32 prefix = ifa->ifa_address & mask;
742 694
743 if (ifa->ifa_flags&IFA_F_SECONDARY) { 695 if (ifa->ifa_flags & IFA_F_SECONDARY) {
744 prim = inet_ifa_byprefix(in_dev, prefix, mask); 696 prim = inet_ifa_byprefix(in_dev, prefix, mask);
745 if (prim == NULL) { 697 if (prim == NULL) {
746 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 698 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
@@ -750,58 +702,118 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
750 702
751 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 703 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
752 704
753 if (!(dev->flags&IFF_UP)) 705 if (!(dev->flags & IFF_UP))
754 return; 706 return;
755 707
756 /* Add broadcast address, if it is explicitly assigned. */ 708 /* Add broadcast address, if it is explicitly assigned. */
757 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 709 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
758 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 710 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
759 711
760 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 712 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
761 (prefix != addr || ifa->ifa_prefixlen < 32)) { 713 (prefix != addr || ifa->ifa_prefixlen < 32)) {
762 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 714 fib_magic(RTM_NEWROUTE,
763 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 715 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
716 prefix, ifa->ifa_prefixlen, prim);
764 717
765 /* Add network specific broadcasts, when it takes a sense */ 718 /* Add network specific broadcasts, when it takes a sense */
766 if (ifa->ifa_prefixlen < 31) { 719 if (ifa->ifa_prefixlen < 31) {
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 720 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
768 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 721 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
722 32, prim);
769 } 723 }
770 } 724 }
771} 725}
772 726
773static void fib_del_ifaddr(struct in_ifaddr *ifa) 727/* Delete primary or secondary address.
728 * Optionally, on secondary address promotion consider the addresses
729 * from subnet iprim as deleted, even if they are in device list.
730 * In this case the secondary ifa can be in device list.
731 */
732void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
774{ 733{
775 struct in_device *in_dev = ifa->ifa_dev; 734 struct in_device *in_dev = ifa->ifa_dev;
776 struct net_device *dev = in_dev->dev; 735 struct net_device *dev = in_dev->dev;
777 struct in_ifaddr *ifa1; 736 struct in_ifaddr *ifa1;
778 struct in_ifaddr *prim = ifa; 737 struct in_ifaddr *prim = ifa, *prim1 = NULL;
779 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 738 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
780 __be32 any = ifa->ifa_address&ifa->ifa_mask; 739 __be32 any = ifa->ifa_address & ifa->ifa_mask;
781#define LOCAL_OK 1 740#define LOCAL_OK 1
782#define BRD_OK 2 741#define BRD_OK 2
783#define BRD0_OK 4 742#define BRD0_OK 4
784#define BRD1_OK 8 743#define BRD1_OK 8
785 unsigned ok = 0; 744 unsigned ok = 0;
745 int subnet = 0; /* Primary network */
746 int gone = 1; /* Address is missing */
747 int same_prefsrc = 0; /* Another primary with same IP */
786 748
787 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 749 if (ifa->ifa_flags & IFA_F_SECONDARY) {
788 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
789 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
790 else {
791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 750 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
792 if (prim == NULL) { 751 if (prim == NULL) {
793 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 752 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
794 return; 753 return;
795 } 754 }
755 if (iprim && iprim != prim) {
756 printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
757 return;
758 }
759 } else if (!ipv4_is_zeronet(any) &&
760 (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
761 fib_magic(RTM_DELROUTE,
762 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
763 any, ifa->ifa_prefixlen, prim);
764 subnet = 1;
796 } 765 }
797 766
798 /* Deletion is more complicated than add. 767 /* Deletion is more complicated than add.
799 We should take care of not to delete too much :-) 768 * We should take care of not to delete too much :-)
800 769 *
801 Scan address list to be sure that addresses are really gone. 770 * Scan address list to be sure that addresses are really gone.
802 */ 771 */
803 772
804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 773 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
774 if (ifa1 == ifa) {
775 /* promotion, keep the IP */
776 gone = 0;
777 continue;
778 }
779 /* Ignore IFAs from our subnet */
780 if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
781 inet_ifa_match(ifa1->ifa_address, iprim))
782 continue;
783
784 /* Ignore ifa1 if it uses different primary IP (prefsrc) */
785 if (ifa1->ifa_flags & IFA_F_SECONDARY) {
786 /* Another address from our subnet? */
787 if (ifa1->ifa_mask == prim->ifa_mask &&
788 inet_ifa_match(ifa1->ifa_address, prim))
789 prim1 = prim;
790 else {
791 /* We reached the secondaries, so
792 * same_prefsrc should be determined.
793 */
794 if (!same_prefsrc)
795 continue;
796 /* Search new prim1 if ifa1 is not
797 * using the current prim1
798 */
799 if (!prim1 ||
800 ifa1->ifa_mask != prim1->ifa_mask ||
801 !inet_ifa_match(ifa1->ifa_address, prim1))
802 prim1 = inet_ifa_byprefix(in_dev,
803 ifa1->ifa_address,
804 ifa1->ifa_mask);
805 if (!prim1)
806 continue;
807 if (prim1->ifa_local != prim->ifa_local)
808 continue;
809 }
810 } else {
811 if (prim->ifa_local != ifa1->ifa_local)
812 continue;
813 prim1 = ifa1;
814 if (prim != prim1)
815 same_prefsrc = 1;
816 }
805 if (ifa->ifa_local == ifa1->ifa_local) 817 if (ifa->ifa_local == ifa1->ifa_local)
806 ok |= LOCAL_OK; 818 ok |= LOCAL_OK;
807 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 819 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -810,25 +822,43 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
810 ok |= BRD1_OK; 822 ok |= BRD1_OK;
811 if (any == ifa1->ifa_broadcast) 823 if (any == ifa1->ifa_broadcast)
812 ok |= BRD0_OK; 824 ok |= BRD0_OK;
825 /* primary has network specific broadcasts */
826 if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
827 __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
828 __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
829
830 if (!ipv4_is_zeronet(any1)) {
831 if (ifa->ifa_broadcast == brd1 ||
832 ifa->ifa_broadcast == any1)
833 ok |= BRD_OK;
834 if (brd == brd1 || brd == any1)
835 ok |= BRD1_OK;
836 if (any == brd1 || any == any1)
837 ok |= BRD0_OK;
838 }
839 }
813 } 840 }
814 841
815 if (!(ok&BRD_OK)) 842 if (!(ok & BRD_OK))
816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 843 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
817 if (!(ok&BRD1_OK)) 844 if (subnet && ifa->ifa_prefixlen < 31) {
818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 845 if (!(ok & BRD1_OK))
819 if (!(ok&BRD0_OK)) 846 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 847 if (!(ok & BRD0_OK))
821 if (!(ok&LOCAL_OK)) { 848 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
849 }
850 if (!(ok & LOCAL_OK)) {
822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 851 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
823 852
824 /* Check, that this local address finally disappeared. */ 853 /* Check, that this local address finally disappeared. */
825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 854 if (gone &&
855 inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
826 /* And the last, but not the least thing. 856 /* And the last, but not the least thing.
827 We must flush stray FIB entries. 857 * We must flush stray FIB entries.
828 858 *
829 First of all, we scan fib_info list searching 859 * First of all, we scan fib_info list searching
830 for stray nexthop entries, then ignite fib_flush. 860 * for stray nexthop entries, then ignite fib_flush.
831 */ 861 */
832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 862 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
833 fib_flush(dev_net(dev)); 863 fib_flush(dev_net(dev));
834 } 864 }
@@ -839,14 +869,16 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
839#undef BRD1_OK 869#undef BRD1_OK
840} 870}
841 871
842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 872static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
843{ 873{
844 874
845 struct fib_result res; 875 struct fib_result res;
846 struct flowi fl = { .mark = frn->fl_mark, 876 struct flowi4 fl4 = {
847 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 877 .flowi4_mark = frn->fl_mark,
848 .tos = frn->fl_tos, 878 .daddr = frn->fl_addr,
849 .scope = frn->fl_scope } } }; 879 .flowi4_tos = frn->fl_tos,
880 .flowi4_scope = frn->fl_scope,
881 };
850 882
851#ifdef CONFIG_IP_MULTIPLE_TABLES 883#ifdef CONFIG_IP_MULTIPLE_TABLES
852 res.r = NULL; 884 res.r = NULL;
@@ -857,15 +889,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
857 local_bh_disable(); 889 local_bh_disable();
858 890
859 frn->tb_id = tb->tb_id; 891 frn->tb_id = tb->tb_id;
860 frn->err = fib_table_lookup(tb, &fl, &res); 892 rcu_read_lock();
893 frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
861 894
862 if (!frn->err) { 895 if (!frn->err) {
863 frn->prefixlen = res.prefixlen; 896 frn->prefixlen = res.prefixlen;
864 frn->nh_sel = res.nh_sel; 897 frn->nh_sel = res.nh_sel;
865 frn->type = res.type; 898 frn->type = res.type;
866 frn->scope = res.scope; 899 frn->scope = res.scope;
867 fib_res_put(&res);
868 } 900 }
901 rcu_read_unlock();
869 local_bh_enable(); 902 local_bh_enable();
870 } 903 }
871} 904}
@@ -894,8 +927,8 @@ static void nl_fib_input(struct sk_buff *skb)
894 927
895 nl_fib_lookup(frn, tb); 928 nl_fib_lookup(frn, tb);
896 929
897 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 930 pid = NETLINK_CB(skb).pid; /* pid of sending process */
898 NETLINK_CB(skb).pid = 0; /* from kernel */ 931 NETLINK_CB(skb).pid = 0; /* from kernel */
899 NETLINK_CB(skb).dst_group = 0; /* unicast */ 932 NETLINK_CB(skb).dst_group = 0; /* unicast */
900 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 933 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
901} 934}
@@ -929,6 +962,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
929{ 962{
930 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 963 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
931 struct net_device *dev = ifa->ifa_dev->dev; 964 struct net_device *dev = ifa->ifa_dev->dev;
965 struct net *net = dev_net(dev);
932 966
933 switch (event) { 967 switch (event) {
934 case NETDEV_UP: 968 case NETDEV_UP:
@@ -936,13 +970,15 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
936#ifdef CONFIG_IP_ROUTE_MULTIPATH 970#ifdef CONFIG_IP_ROUTE_MULTIPATH
937 fib_sync_up(dev); 971 fib_sync_up(dev);
938#endif 972#endif
973 atomic_inc(&net->ipv4.dev_addr_genid);
939 rt_cache_flush(dev_net(dev), -1); 974 rt_cache_flush(dev_net(dev), -1);
940 break; 975 break;
941 case NETDEV_DOWN: 976 case NETDEV_DOWN:
942 fib_del_ifaddr(ifa); 977 fib_del_ifaddr(ifa, NULL);
978 atomic_inc(&net->ipv4.dev_addr_genid);
943 if (ifa->ifa_dev->ifa_list == NULL) { 979 if (ifa->ifa_dev->ifa_list == NULL) {
944 /* Last address was deleted from this interface. 980 /* Last address was deleted from this interface.
945 Disable IP. 981 * Disable IP.
946 */ 982 */
947 fib_disable_ip(dev, 1, 0); 983 fib_disable_ip(dev, 1, 0);
948 } else { 984 } else {
@@ -957,6 +993,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
957{ 993{
958 struct net_device *dev = ptr; 994 struct net_device *dev = ptr;
959 struct in_device *in_dev = __in_dev_get_rtnl(dev); 995 struct in_device *in_dev = __in_dev_get_rtnl(dev);
996 struct net *net = dev_net(dev);
960 997
961 if (event == NETDEV_UNREGISTER) { 998 if (event == NETDEV_UNREGISTER) {
962 fib_disable_ip(dev, 2, -1); 999 fib_disable_ip(dev, 2, -1);
@@ -974,6 +1011,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
974#ifdef CONFIG_IP_ROUTE_MULTIPATH 1011#ifdef CONFIG_IP_ROUTE_MULTIPATH
975 fib_sync_up(dev); 1012 fib_sync_up(dev);
976#endif 1013#endif
1014 atomic_inc(&net->ipv4.dev_addr_genid);
977 rt_cache_flush(dev_net(dev), -1); 1015 rt_cache_flush(dev_net(dev), -1);
978 break; 1016 break;
979 case NETDEV_DOWN: 1017 case NETDEV_DOWN:
@@ -984,7 +1022,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
984 rt_cache_flush(dev_net(dev), 0); 1022 rt_cache_flush(dev_net(dev), 0);
985 break; 1023 break;
986 case NETDEV_UNREGISTER_BATCH: 1024 case NETDEV_UNREGISTER_BATCH:
987 rt_cache_flush_batch(); 1025 /* The batch unregister is only called on the first
1026 * device in the list of devices being unregistered.
1027 * Therefore we should not pass dev_net(dev) in here.
1028 */
1029 rt_cache_flush_batch(NULL);
988 break; 1030 break;
989 } 1031 }
990 return NOTIFY_DONE; 1032 return NOTIFY_DONE;
@@ -1001,16 +1043,15 @@ static struct notifier_block fib_netdev_notifier = {
1001static int __net_init ip_fib_net_init(struct net *net) 1043static int __net_init ip_fib_net_init(struct net *net)
1002{ 1044{
1003 int err; 1045 int err;
1004 unsigned int i; 1046 size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
1047
1048 /* Avoid false sharing : Use at least a full cache line */
1049 size = max_t(size_t, size, L1_CACHE_BYTES);
1005 1050
1006 net->ipv4.fib_table_hash = kzalloc( 1051 net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
1007 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1008 if (net->ipv4.fib_table_hash == NULL) 1052 if (net->ipv4.fib_table_hash == NULL)
1009 return -ENOMEM; 1053 return -ENOMEM;
1010 1054
1011 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1012 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1013
1014 err = fib4_rules_init(net); 1055 err = fib4_rules_init(net);
1015 if (err < 0) 1056 if (err < 0)
1016 goto fail; 1057 goto fail;
@@ -1029,6 +1070,7 @@ static void ip_fib_net_exit(struct net *net)
1029 fib4_rules_exit(net); 1070 fib4_rules_exit(net);
1030#endif 1071#endif
1031 1072
1073 rtnl_lock();
1032 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1074 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1033 struct fib_table *tb; 1075 struct fib_table *tb;
1034 struct hlist_head *head; 1076 struct hlist_head *head;
@@ -1038,9 +1080,10 @@ static void ip_fib_net_exit(struct net *net)
1038 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1080 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1039 hlist_del(node); 1081 hlist_del(node);
1040 fib_table_flush(tb); 1082 fib_table_flush(tb);
1041 kfree(tb); 1083 fib_free_table(tb);
1042 } 1084 }
1043 } 1085 }
1086 rtnl_unlock();
1044 kfree(net->ipv4.fib_table_hash); 1087 kfree(net->ipv4.fib_table_hash);
1045} 1088}
1046 1089
@@ -1089,5 +1132,5 @@ void __init ip_fib_init(void)
1089 register_netdevice_notifier(&fib_netdev_notifier); 1132 register_netdevice_notifier(&fib_netdev_notifier);
1090 register_inetaddr_notifier(&fib_inetaddr_notifier); 1133 register_inetaddr_notifier(&fib_inetaddr_notifier);
1091 1134
1092 fib_hash_init(); 1135 fib_trie_init();
1093} 1136}