aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/fib_frontend.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/fib_frontend.c')
-rw-r--r--net/ipv4/fib_frontend.c215
1 files changed, 122 insertions, 93 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1d2cdd43a878..451088330bbb 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net)
51{ 51{
52 struct fib_table *local_table, *main_table; 52 struct fib_table *local_table, *main_table;
53 53
54 local_table = fib_hash_table(RT_TABLE_LOCAL); 54 local_table = fib_trie_table(RT_TABLE_LOCAL);
55 if (local_table == NULL) 55 if (local_table == NULL)
56 return -ENOMEM; 56 return -ENOMEM;
57 57
58 main_table = fib_hash_table(RT_TABLE_MAIN); 58 main_table = fib_trie_table(RT_TABLE_MAIN);
59 if (main_table == NULL) 59 if (main_table == NULL)
60 goto fail; 60 goto fail;
61 61
@@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
82 if (tb) 82 if (tb)
83 return tb; 83 return tb;
84 84
85 tb = fib_hash_table(id); 85 tb = fib_trie_table(id);
86 if (!tb) 86 if (!tb)
87 return NULL; 87 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1); 88 h = id & (FIB_TABLE_HASHSZ - 1);
@@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
114} 114}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116 116
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net) 117static void fib_flush(struct net *net)
133{ 118{
134 int flushed = 0; 119 int flushed = 0;
@@ -147,46 +132,6 @@ static void fib_flush(struct net *net)
147 rt_cache_flush(net, -1); 132 rt_cache_flush(net, -1);
148} 133}
149 134
150/**
151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU, or RTNL
157 */
158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
159{
160 struct flowi fl = {
161 .fl4_dst = addr,
162 };
163 struct fib_result res = { 0 };
164 struct net_device *dev = NULL;
165 struct fib_table *local_table;
166
167#ifdef CONFIG_IP_MULTIPLE_TABLES
168 res.r = NULL;
169#endif
170
171 rcu_read_lock();
172 local_table = fib_get_table(net, RT_TABLE_LOCAL);
173 if (!local_table ||
174 fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
175 rcu_read_unlock();
176 return NULL;
177 }
178 if (res.type != RTN_LOCAL)
179 goto out;
180 dev = FIB_RES_DEV(res);
181
182 if (dev && devref)
183 dev_hold(dev);
184out:
185 rcu_read_unlock();
186 return dev;
187}
188EXPORT_SYMBOL(__ip_dev_find);
189
190/* 135/*
191 * Find address type as if only "dev" was present in the system. If 136 * Find address type as if only "dev" was present in the system. If
192 * on_dev is NULL then all interfaces are taken into consideration. 137 * on_dev is NULL then all interfaces are taken into consideration.
@@ -195,7 +140,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
195 const struct net_device *dev, 140 const struct net_device *dev,
196 __be32 addr) 141 __be32 addr)
197{ 142{
198 struct flowi fl = { .fl4_dst = addr }; 143 struct flowi4 fl4 = { .daddr = addr };
199 struct fib_result res; 144 struct fib_result res;
200 unsigned ret = RTN_BROADCAST; 145 unsigned ret = RTN_BROADCAST;
201 struct fib_table *local_table; 146 struct fib_table *local_table;
@@ -213,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
213 if (local_table) { 158 if (local_table) {
214 ret = RTN_UNICAST; 159 ret = RTN_UNICAST;
215 rcu_read_lock(); 160 rcu_read_lock();
216 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { 161 if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
217 if (!dev || dev == res.fi->fib_dev) 162 if (!dev || dev == res.fi->fib_dev)
218 ret = res.type; 163 ret = res.type;
219 } 164 }
@@ -248,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
248 u32 *itag, u32 mark) 193 u32 *itag, u32 mark)
249{ 194{
250 struct in_device *in_dev; 195 struct in_device *in_dev;
251 struct flowi fl = { 196 struct flowi4 fl4;
252 .fl4_dst = src,
253 .fl4_src = dst,
254 .fl4_tos = tos,
255 .mark = mark,
256 .iif = oif
257 };
258 struct fib_result res; 197 struct fib_result res;
259 int no_addr, rpf, accept_local; 198 int no_addr, rpf, accept_local;
260 bool dev_match; 199 bool dev_match;
261 int ret; 200 int ret;
262 struct net *net; 201 struct net *net;
263 202
203 fl4.flowi4_oif = 0;
204 fl4.flowi4_iif = oif;
205 fl4.flowi4_mark = mark;
206 fl4.daddr = src;
207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos;
209 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
210
264 no_addr = rpf = accept_local = 0; 211 no_addr = rpf = accept_local = 0;
265 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
266 if (in_dev) { 213 if (in_dev) {
@@ -268,20 +215,20 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
268 rpf = IN_DEV_RPFILTER(in_dev); 215 rpf = IN_DEV_RPFILTER(in_dev);
269 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 216 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
270 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 217 if (mark && !IN_DEV_SRC_VMARK(in_dev))
271 fl.mark = 0; 218 fl4.flowi4_mark = 0;
272 } 219 }
273 220
274 if (in_dev == NULL) 221 if (in_dev == NULL)
275 goto e_inval; 222 goto e_inval;
276 223
277 net = dev_net(dev); 224 net = dev_net(dev);
278 if (fib_lookup(net, &fl, &res)) 225 if (fib_lookup(net, &fl4, &res))
279 goto last_resort; 226 goto last_resort;
280 if (res.type != RTN_UNICAST) { 227 if (res.type != RTN_UNICAST) {
281 if (res.type != RTN_LOCAL || !accept_local) 228 if (res.type != RTN_LOCAL || !accept_local)
282 goto e_inval; 229 goto e_inval;
283 } 230 }
284 *spec_dst = FIB_RES_PREFSRC(res); 231 *spec_dst = FIB_RES_PREFSRC(net, res);
285 fib_combine_itag(itag, &res); 232 fib_combine_itag(itag, &res);
286 dev_match = false; 233 dev_match = false;
287 234
@@ -306,12 +253,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
306 goto last_resort; 253 goto last_resort;
307 if (rpf == 1) 254 if (rpf == 1)
308 goto e_rpf; 255 goto e_rpf;
309 fl.oif = dev->ifindex; 256 fl4.flowi4_oif = dev->ifindex;
310 257
311 ret = 0; 258 ret = 0;
312 if (fib_lookup(net, &fl, &res) == 0) { 259 if (fib_lookup(net, &fl4, &res) == 0) {
313 if (res.type == RTN_UNICAST) { 260 if (res.type == RTN_UNICAST) {
314 *spec_dst = FIB_RES_PREFSRC(res); 261 *spec_dst = FIB_RES_PREFSRC(net, res);
315 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 262 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
316 } 263 }
317 } 264 }
@@ -775,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
775 } 722 }
776} 723}
777 724
778static void fib_del_ifaddr(struct in_ifaddr *ifa) 725/* Delete primary or secondary address.
726 * Optionally, on secondary address promotion consider the addresses
727 * from subnet iprim as deleted, even if they are in device list.
728 * In this case the secondary ifa can be in device list.
729 */
730void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
779{ 731{
780 struct in_device *in_dev = ifa->ifa_dev; 732 struct in_device *in_dev = ifa->ifa_dev;
781 struct net_device *dev = in_dev->dev; 733 struct net_device *dev = in_dev->dev;
782 struct in_ifaddr *ifa1; 734 struct in_ifaddr *ifa1;
783 struct in_ifaddr *prim = ifa; 735 struct in_ifaddr *prim = ifa, *prim1 = NULL;
784 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; 736 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
785 __be32 any = ifa->ifa_address & ifa->ifa_mask; 737 __be32 any = ifa->ifa_address & ifa->ifa_mask;
786#define LOCAL_OK 1 738#define LOCAL_OK 1
@@ -788,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
788#define BRD0_OK 4 740#define BRD0_OK 4
789#define BRD1_OK 8 741#define BRD1_OK 8
790 unsigned ok = 0; 742 unsigned ok = 0;
743 int subnet = 0; /* Primary network */
744 int gone = 1; /* Address is missing */
745 int same_prefsrc = 0; /* Another primary with same IP */
791 746
792 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) 747 if (ifa->ifa_flags & IFA_F_SECONDARY) {
793 fib_magic(RTM_DELROUTE,
794 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
795 any, ifa->ifa_prefixlen, prim);
796 else {
797 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 748 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
798 if (prim == NULL) { 749 if (prim == NULL) {
799 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 750 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
800 return; 751 return;
801 } 752 }
753 if (iprim && iprim != prim) {
754 printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
755 return;
756 }
757 } else if (!ipv4_is_zeronet(any) &&
758 (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
759 fib_magic(RTM_DELROUTE,
760 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
761 any, ifa->ifa_prefixlen, prim);
762 subnet = 1;
802 } 763 }
803 764
804 /* Deletion is more complicated than add. 765 /* Deletion is more complicated than add.
@@ -808,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
808 */ 769 */
809 770
810 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 771 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
772 if (ifa1 == ifa) {
773 /* promotion, keep the IP */
774 gone = 0;
775 continue;
776 }
777 /* Ignore IFAs from our subnet */
778 if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
779 inet_ifa_match(ifa1->ifa_address, iprim))
780 continue;
781
782 /* Ignore ifa1 if it uses different primary IP (prefsrc) */
783 if (ifa1->ifa_flags & IFA_F_SECONDARY) {
784 /* Another address from our subnet? */
785 if (ifa1->ifa_mask == prim->ifa_mask &&
786 inet_ifa_match(ifa1->ifa_address, prim))
787 prim1 = prim;
788 else {
789 /* We reached the secondaries, so
790 * same_prefsrc should be determined.
791 */
792 if (!same_prefsrc)
793 continue;
794 /* Search new prim1 if ifa1 is not
795 * using the current prim1
796 */
797 if (!prim1 ||
798 ifa1->ifa_mask != prim1->ifa_mask ||
799 !inet_ifa_match(ifa1->ifa_address, prim1))
800 prim1 = inet_ifa_byprefix(in_dev,
801 ifa1->ifa_address,
802 ifa1->ifa_mask);
803 if (!prim1)
804 continue;
805 if (prim1->ifa_local != prim->ifa_local)
806 continue;
807 }
808 } else {
809 if (prim->ifa_local != ifa1->ifa_local)
810 continue;
811 prim1 = ifa1;
812 if (prim != prim1)
813 same_prefsrc = 1;
814 }
811 if (ifa->ifa_local == ifa1->ifa_local) 815 if (ifa->ifa_local == ifa1->ifa_local)
812 ok |= LOCAL_OK; 816 ok |= LOCAL_OK;
813 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 817 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -816,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
816 ok |= BRD1_OK; 820 ok |= BRD1_OK;
817 if (any == ifa1->ifa_broadcast) 821 if (any == ifa1->ifa_broadcast)
818 ok |= BRD0_OK; 822 ok |= BRD0_OK;
823 /* primary has network specific broadcasts */
824 if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
825 __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
826 __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
827
828 if (!ipv4_is_zeronet(any1)) {
829 if (ifa->ifa_broadcast == brd1 ||
830 ifa->ifa_broadcast == any1)
831 ok |= BRD_OK;
832 if (brd == brd1 || brd == any1)
833 ok |= BRD1_OK;
834 if (any == brd1 || any == any1)
835 ok |= BRD0_OK;
836 }
837 }
819 } 838 }
820 839
821 if (!(ok & BRD_OK)) 840 if (!(ok & BRD_OK))
822 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 841 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
823 if (!(ok & BRD1_OK)) 842 if (subnet && ifa->ifa_prefixlen < 31) {
824 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 843 if (!(ok & BRD1_OK))
825 if (!(ok & BRD0_OK)) 844 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
826 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 845 if (!(ok & BRD0_OK))
846 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
847 }
827 if (!(ok & LOCAL_OK)) { 848 if (!(ok & LOCAL_OK)) {
828 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 849 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
829 850
830 /* Check, that this local address finally disappeared. */ 851 /* Check, that this local address finally disappeared. */
831 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 852 if (gone &&
853 inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
832 /* And the last, but not the least thing. 854 /* And the last, but not the least thing.
833 * We must flush stray FIB entries. 855 * We must flush stray FIB entries.
834 * 856 *
@@ -849,11 +871,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
849{ 871{
850 872
851 struct fib_result res; 873 struct fib_result res;
852 struct flowi fl = { 874 struct flowi4 fl4 = {
853 .mark = frn->fl_mark, 875 .flowi4_mark = frn->fl_mark,
854 .fl4_dst = frn->fl_addr, 876 .daddr = frn->fl_addr,
855 .fl4_tos = frn->fl_tos, 877 .flowi4_tos = frn->fl_tos,
856 .fl4_scope = frn->fl_scope, 878 .flowi4_scope = frn->fl_scope,
857 }; 879 };
858 880
859#ifdef CONFIG_IP_MULTIPLE_TABLES 881#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -866,7 +888,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
866 888
867 frn->tb_id = tb->tb_id; 889 frn->tb_id = tb->tb_id;
868 rcu_read_lock(); 890 rcu_read_lock();
869 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); 891 frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
870 892
871 if (!frn->err) { 893 if (!frn->err) {
872 frn->prefixlen = res.prefixlen; 894 frn->prefixlen = res.prefixlen;
@@ -938,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
938{ 960{
939 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 961 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
940 struct net_device *dev = ifa->ifa_dev->dev; 962 struct net_device *dev = ifa->ifa_dev->dev;
963 struct net *net = dev_net(dev);
941 964
942 switch (event) { 965 switch (event) {
943 case NETDEV_UP: 966 case NETDEV_UP:
@@ -945,10 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
945#ifdef CONFIG_IP_ROUTE_MULTIPATH 968#ifdef CONFIG_IP_ROUTE_MULTIPATH
946 fib_sync_up(dev); 969 fib_sync_up(dev);
947#endif 970#endif
971 atomic_inc(&net->ipv4.dev_addr_genid);
948 rt_cache_flush(dev_net(dev), -1); 972 rt_cache_flush(dev_net(dev), -1);
949 break; 973 break;
950 case NETDEV_DOWN: 974 case NETDEV_DOWN:
951 fib_del_ifaddr(ifa); 975 fib_del_ifaddr(ifa, NULL);
976 atomic_inc(&net->ipv4.dev_addr_genid);
952 if (ifa->ifa_dev->ifa_list == NULL) { 977 if (ifa->ifa_dev->ifa_list == NULL) {
953 /* Last address was deleted from this interface. 978 /* Last address was deleted from this interface.
954 * Disable IP. 979 * Disable IP.
@@ -966,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
966{ 991{
967 struct net_device *dev = ptr; 992 struct net_device *dev = ptr;
968 struct in_device *in_dev = __in_dev_get_rtnl(dev); 993 struct in_device *in_dev = __in_dev_get_rtnl(dev);
994 struct net *net = dev_net(dev);
969 995
970 if (event == NETDEV_UNREGISTER) { 996 if (event == NETDEV_UNREGISTER) {
971 fib_disable_ip(dev, 2, -1); 997 fib_disable_ip(dev, 2, -1);
@@ -983,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
983#ifdef CONFIG_IP_ROUTE_MULTIPATH 1009#ifdef CONFIG_IP_ROUTE_MULTIPATH
984 fib_sync_up(dev); 1010 fib_sync_up(dev);
985#endif 1011#endif
1012 atomic_inc(&net->ipv4.dev_addr_genid);
986 rt_cache_flush(dev_net(dev), -1); 1013 rt_cache_flush(dev_net(dev), -1);
987 break; 1014 break;
988 case NETDEV_DOWN: 1015 case NETDEV_DOWN:
@@ -1041,6 +1068,7 @@ static void ip_fib_net_exit(struct net *net)
1041 fib4_rules_exit(net); 1068 fib4_rules_exit(net);
1042#endif 1069#endif
1043 1070
1071 rtnl_lock();
1044 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1072 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1045 struct fib_table *tb; 1073 struct fib_table *tb;
1046 struct hlist_head *head; 1074 struct hlist_head *head;
@@ -1053,6 +1081,7 @@ static void ip_fib_net_exit(struct net *net)
1053 fib_free_table(tb); 1081 fib_free_table(tb);
1054 } 1082 }
1055 } 1083 }
1084 rtnl_unlock();
1056 kfree(net->ipv4.fib_table_hash); 1085 kfree(net->ipv4.fib_table_hash);
1057} 1086}
1058 1087
@@ -1101,5 +1130,5 @@ void __init ip_fib_init(void)
1101 register_netdevice_notifier(&fib_netdev_notifier); 1130 register_netdevice_notifier(&fib_netdev_notifier);
1102 register_inetaddr_notifier(&fib_inetaddr_notifier); 1131 register_inetaddr_notifier(&fib_inetaddr_notifier);
1103 1132
1104 fib_hash_init(); 1133 fib_trie_init();
1105} 1134}