aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/fib_frontend.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/fib_frontend.c')
-rw-r--r--net/ipv4/fib_frontend.c227
1 files changed, 129 insertions, 98 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1d2cdd43a878..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
44#include <net/arp.h> 44#include <net/arp.h>
45#include <net/ip_fib.h> 45#include <net/ip_fib.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/xfrm.h>
47 48
48#ifndef CONFIG_IP_MULTIPLE_TABLES 49#ifndef CONFIG_IP_MULTIPLE_TABLES
49 50
@@ -51,11 +52,11 @@ static int __net_init fib4_rules_init(struct net *net)
51{ 52{
52 struct fib_table *local_table, *main_table; 53 struct fib_table *local_table, *main_table;
53 54
54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 local_table = fib_trie_table(RT_TABLE_LOCAL);
55 if (local_table == NULL) 56 if (local_table == NULL)
56 return -ENOMEM; 57 return -ENOMEM;
57 58
58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 main_table = fib_trie_table(RT_TABLE_MAIN);
59 if (main_table == NULL) 60 if (main_table == NULL)
60 goto fail; 61 goto fail;
61 62
@@ -82,7 +83,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
82 if (tb) 83 if (tb)
83 return tb; 84 return tb;
84 85
85 tb = fib_hash_table(id); 86 tb = fib_trie_table(id);
86 if (!tb) 87 if (!tb)
87 return NULL; 88 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1); 89 h = id & (FIB_TABLE_HASHSZ - 1);
@@ -114,21 +115,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
114} 115}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 116#endif /* CONFIG_IP_MULTIPLE_TABLES */
116 117
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net) 118static void fib_flush(struct net *net)
133{ 119{
134 int flushed = 0; 120 int flushed = 0;
@@ -147,46 +133,6 @@ static void fib_flush(struct net *net)
147 rt_cache_flush(net, -1); 133 rt_cache_flush(net, -1);
148} 134}
149 135
150/**
151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU, or RTNL
157 */
158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
159{
160 struct flowi fl = {
161 .fl4_dst = addr,
162 };
163 struct fib_result res = { 0 };
164 struct net_device *dev = NULL;
165 struct fib_table *local_table;
166
167#ifdef CONFIG_IP_MULTIPLE_TABLES
168 res.r = NULL;
169#endif
170
171 rcu_read_lock();
172 local_table = fib_get_table(net, RT_TABLE_LOCAL);
173 if (!local_table ||
174 fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
175 rcu_read_unlock();
176 return NULL;
177 }
178 if (res.type != RTN_LOCAL)
179 goto out;
180 dev = FIB_RES_DEV(res);
181
182 if (dev && devref)
183 dev_hold(dev);
184out:
185 rcu_read_unlock();
186 return dev;
187}
188EXPORT_SYMBOL(__ip_dev_find);
189
190/* 136/*
191 * Find address type as if only "dev" was present in the system. If 137 * Find address type as if only "dev" was present in the system. If
192 * on_dev is NULL then all interfaces are taken into consideration. 138 * on_dev is NULL then all interfaces are taken into consideration.
@@ -195,7 +141,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
195 const struct net_device *dev, 141 const struct net_device *dev,
196 __be32 addr) 142 __be32 addr)
197{ 143{
198 struct flowi fl = { .fl4_dst = addr }; 144 struct flowi4 fl4 = { .daddr = addr };
199 struct fib_result res; 145 struct fib_result res;
200 unsigned ret = RTN_BROADCAST; 146 unsigned ret = RTN_BROADCAST;
201 struct fib_table *local_table; 147 struct fib_table *local_table;
@@ -213,7 +159,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
213 if (local_table) { 159 if (local_table) {
214 ret = RTN_UNICAST; 160 ret = RTN_UNICAST;
215 rcu_read_lock(); 161 rcu_read_lock();
216 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { 162 if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
217 if (!dev || dev == res.fi->fib_dev) 163 if (!dev || dev == res.fi->fib_dev)
218 ret = res.type; 164 ret = res.type;
219 } 165 }
@@ -243,45 +189,48 @@ EXPORT_SYMBOL(inet_dev_addr_type);
243 * - check, that packet arrived from expected physical interface. 189 * - check, that packet arrived from expected physical interface.
244 * called with rcu_read_lock() 190 * called with rcu_read_lock()
245 */ 191 */
246int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 192int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
247 struct net_device *dev, __be32 *spec_dst, 193 int oif, struct net_device *dev, __be32 *spec_dst,
248 u32 *itag, u32 mark) 194 u32 *itag)
249{ 195{
250 struct in_device *in_dev; 196 struct in_device *in_dev;
251 struct flowi fl = { 197 struct flowi4 fl4;
252 .fl4_dst = src,
253 .fl4_src = dst,
254 .fl4_tos = tos,
255 .mark = mark,
256 .iif = oif
257 };
258 struct fib_result res; 198 struct fib_result res;
259 int no_addr, rpf, accept_local; 199 int no_addr, rpf, accept_local;
260 bool dev_match; 200 bool dev_match;
261 int ret; 201 int ret;
262 struct net *net; 202 struct net *net;
263 203
204 fl4.flowi4_oif = 0;
205 fl4.flowi4_iif = oif;
206 fl4.daddr = src;
207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos;
209 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
210
264 no_addr = rpf = accept_local = 0; 211 no_addr = rpf = accept_local = 0;
265 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
266 if (in_dev) { 213 if (in_dev) {
267 no_addr = in_dev->ifa_list == NULL; 214 no_addr = in_dev->ifa_list == NULL;
268 rpf = IN_DEV_RPFILTER(in_dev); 215
216 /* Ignore rp_filter for packets protected by IPsec. */
217 rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
218
269 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 219 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
270 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 220 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
271 fl.mark = 0;
272 } 221 }
273 222
274 if (in_dev == NULL) 223 if (in_dev == NULL)
275 goto e_inval; 224 goto e_inval;
276 225
277 net = dev_net(dev); 226 net = dev_net(dev);
278 if (fib_lookup(net, &fl, &res)) 227 if (fib_lookup(net, &fl4, &res))
279 goto last_resort; 228 goto last_resort;
280 if (res.type != RTN_UNICAST) { 229 if (res.type != RTN_UNICAST) {
281 if (res.type != RTN_LOCAL || !accept_local) 230 if (res.type != RTN_LOCAL || !accept_local)
282 goto e_inval; 231 goto e_inval;
283 } 232 }
284 *spec_dst = FIB_RES_PREFSRC(res); 233 *spec_dst = FIB_RES_PREFSRC(net, res);
285 fib_combine_itag(itag, &res); 234 fib_combine_itag(itag, &res);
286 dev_match = false; 235 dev_match = false;
287 236
@@ -306,12 +255,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
306 goto last_resort; 255 goto last_resort;
307 if (rpf == 1) 256 if (rpf == 1)
308 goto e_rpf; 257 goto e_rpf;
309 fl.oif = dev->ifindex; 258 fl4.flowi4_oif = dev->ifindex;
310 259
311 ret = 0; 260 ret = 0;
312 if (fib_lookup(net, &fl, &res) == 0) { 261 if (fib_lookup(net, &fl4, &res) == 0) {
313 if (res.type == RTN_UNICAST) { 262 if (res.type == RTN_UNICAST) {
314 *spec_dst = FIB_RES_PREFSRC(res); 263 *spec_dst = FIB_RES_PREFSRC(net, res);
315 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 264 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
316 } 265 }
317 } 266 }
@@ -775,12 +724,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
775 } 724 }
776} 725}
777 726
778static void fib_del_ifaddr(struct in_ifaddr *ifa) 727/* Delete primary or secondary address.
728 * Optionally, on secondary address promotion consider the addresses
729 * from subnet iprim as deleted, even if they are in device list.
730 * In this case the secondary ifa can be in device list.
731 */
732void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
779{ 733{
780 struct in_device *in_dev = ifa->ifa_dev; 734 struct in_device *in_dev = ifa->ifa_dev;
781 struct net_device *dev = in_dev->dev; 735 struct net_device *dev = in_dev->dev;
782 struct in_ifaddr *ifa1; 736 struct in_ifaddr *ifa1;
783 struct in_ifaddr *prim = ifa; 737 struct in_ifaddr *prim = ifa, *prim1 = NULL;
784 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; 738 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
785 __be32 any = ifa->ifa_address & ifa->ifa_mask; 739 __be32 any = ifa->ifa_address & ifa->ifa_mask;
786#define LOCAL_OK 1 740#define LOCAL_OK 1
@@ -788,17 +742,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
788#define BRD0_OK 4 742#define BRD0_OK 4
789#define BRD1_OK 8 743#define BRD1_OK 8
790 unsigned ok = 0; 744 unsigned ok = 0;
745 int subnet = 0; /* Primary network */
746 int gone = 1; /* Address is missing */
747 int same_prefsrc = 0; /* Another primary with same IP */
791 748
792 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) 749 if (ifa->ifa_flags & IFA_F_SECONDARY) {
793 fib_magic(RTM_DELROUTE,
794 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
795 any, ifa->ifa_prefixlen, prim);
796 else {
797 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 750 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
798 if (prim == NULL) { 751 if (prim == NULL) {
799 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 752 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
800 return; 753 return;
801 } 754 }
755 if (iprim && iprim != prim) {
756 printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
757 return;
758 }
759 } else if (!ipv4_is_zeronet(any) &&
760 (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
761 fib_magic(RTM_DELROUTE,
762 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
763 any, ifa->ifa_prefixlen, prim);
764 subnet = 1;
802 } 765 }
803 766
804 /* Deletion is more complicated than add. 767 /* Deletion is more complicated than add.
@@ -808,6 +771,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
808 */ 771 */
809 772
810 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 773 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
774 if (ifa1 == ifa) {
775 /* promotion, keep the IP */
776 gone = 0;
777 continue;
778 }
779 /* Ignore IFAs from our subnet */
780 if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
781 inet_ifa_match(ifa1->ifa_address, iprim))
782 continue;
783
784 /* Ignore ifa1 if it uses different primary IP (prefsrc) */
785 if (ifa1->ifa_flags & IFA_F_SECONDARY) {
786 /* Another address from our subnet? */
787 if (ifa1->ifa_mask == prim->ifa_mask &&
788 inet_ifa_match(ifa1->ifa_address, prim))
789 prim1 = prim;
790 else {
791 /* We reached the secondaries, so
792 * same_prefsrc should be determined.
793 */
794 if (!same_prefsrc)
795 continue;
796 /* Search new prim1 if ifa1 is not
797 * using the current prim1
798 */
799 if (!prim1 ||
800 ifa1->ifa_mask != prim1->ifa_mask ||
801 !inet_ifa_match(ifa1->ifa_address, prim1))
802 prim1 = inet_ifa_byprefix(in_dev,
803 ifa1->ifa_address,
804 ifa1->ifa_mask);
805 if (!prim1)
806 continue;
807 if (prim1->ifa_local != prim->ifa_local)
808 continue;
809 }
810 } else {
811 if (prim->ifa_local != ifa1->ifa_local)
812 continue;
813 prim1 = ifa1;
814 if (prim != prim1)
815 same_prefsrc = 1;
816 }
811 if (ifa->ifa_local == ifa1->ifa_local) 817 if (ifa->ifa_local == ifa1->ifa_local)
812 ok |= LOCAL_OK; 818 ok |= LOCAL_OK;
813 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 819 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -816,19 +822,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
816 ok |= BRD1_OK; 822 ok |= BRD1_OK;
817 if (any == ifa1->ifa_broadcast) 823 if (any == ifa1->ifa_broadcast)
818 ok |= BRD0_OK; 824 ok |= BRD0_OK;
825 /* primary has network specific broadcasts */
826 if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
827 __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
828 __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
829
830 if (!ipv4_is_zeronet(any1)) {
831 if (ifa->ifa_broadcast == brd1 ||
832 ifa->ifa_broadcast == any1)
833 ok |= BRD_OK;
834 if (brd == brd1 || brd == any1)
835 ok |= BRD1_OK;
836 if (any == brd1 || any == any1)
837 ok |= BRD0_OK;
838 }
839 }
819 } 840 }
820 841
821 if (!(ok & BRD_OK)) 842 if (!(ok & BRD_OK))
822 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 843 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
823 if (!(ok & BRD1_OK)) 844 if (subnet && ifa->ifa_prefixlen < 31) {
824 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 845 if (!(ok & BRD1_OK))
825 if (!(ok & BRD0_OK)) 846 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
826 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 847 if (!(ok & BRD0_OK))
848 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
849 }
827 if (!(ok & LOCAL_OK)) { 850 if (!(ok & LOCAL_OK)) {
828 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 851 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
829 852
830 /* Check, that this local address finally disappeared. */ 853 /* Check, that this local address finally disappeared. */
831 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 854 if (gone &&
855 inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
832 /* And the last, but not the least thing. 856 /* And the last, but not the least thing.
833 * We must flush stray FIB entries. 857 * We must flush stray FIB entries.
834 * 858 *
@@ -849,11 +873,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
849{ 873{
850 874
851 struct fib_result res; 875 struct fib_result res;
852 struct flowi fl = { 876 struct flowi4 fl4 = {
853 .mark = frn->fl_mark, 877 .flowi4_mark = frn->fl_mark,
854 .fl4_dst = frn->fl_addr, 878 .daddr = frn->fl_addr,
855 .fl4_tos = frn->fl_tos, 879 .flowi4_tos = frn->fl_tos,
856 .fl4_scope = frn->fl_scope, 880 .flowi4_scope = frn->fl_scope,
857 }; 881 };
858 882
859#ifdef CONFIG_IP_MULTIPLE_TABLES 883#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -866,7 +890,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
866 890
867 frn->tb_id = tb->tb_id; 891 frn->tb_id = tb->tb_id;
868 rcu_read_lock(); 892 rcu_read_lock();
869 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); 893 frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
870 894
871 if (!frn->err) { 895 if (!frn->err) {
872 frn->prefixlen = res.prefixlen; 896 frn->prefixlen = res.prefixlen;
@@ -938,6 +962,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
938{ 962{
939 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 963 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
940 struct net_device *dev = ifa->ifa_dev->dev; 964 struct net_device *dev = ifa->ifa_dev->dev;
965 struct net *net = dev_net(dev);
941 966
942 switch (event) { 967 switch (event) {
943 case NETDEV_UP: 968 case NETDEV_UP:
@@ -945,10 +970,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
945#ifdef CONFIG_IP_ROUTE_MULTIPATH 970#ifdef CONFIG_IP_ROUTE_MULTIPATH
946 fib_sync_up(dev); 971 fib_sync_up(dev);
947#endif 972#endif
973 atomic_inc(&net->ipv4.dev_addr_genid);
948 rt_cache_flush(dev_net(dev), -1); 974 rt_cache_flush(dev_net(dev), -1);
949 break; 975 break;
950 case NETDEV_DOWN: 976 case NETDEV_DOWN:
951 fib_del_ifaddr(ifa); 977 fib_del_ifaddr(ifa, NULL);
978 atomic_inc(&net->ipv4.dev_addr_genid);
952 if (ifa->ifa_dev->ifa_list == NULL) { 979 if (ifa->ifa_dev->ifa_list == NULL) {
953 /* Last address was deleted from this interface. 980 /* Last address was deleted from this interface.
954 * Disable IP. 981 * Disable IP.
@@ -966,6 +993,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
966{ 993{
967 struct net_device *dev = ptr; 994 struct net_device *dev = ptr;
968 struct in_device *in_dev = __in_dev_get_rtnl(dev); 995 struct in_device *in_dev = __in_dev_get_rtnl(dev);
996 struct net *net = dev_net(dev);
969 997
970 if (event == NETDEV_UNREGISTER) { 998 if (event == NETDEV_UNREGISTER) {
971 fib_disable_ip(dev, 2, -1); 999 fib_disable_ip(dev, 2, -1);
@@ -983,6 +1011,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
983#ifdef CONFIG_IP_ROUTE_MULTIPATH 1011#ifdef CONFIG_IP_ROUTE_MULTIPATH
984 fib_sync_up(dev); 1012 fib_sync_up(dev);
985#endif 1013#endif
1014 atomic_inc(&net->ipv4.dev_addr_genid);
986 rt_cache_flush(dev_net(dev), -1); 1015 rt_cache_flush(dev_net(dev), -1);
987 break; 1016 break;
988 case NETDEV_DOWN: 1017 case NETDEV_DOWN:
@@ -1041,6 +1070,7 @@ static void ip_fib_net_exit(struct net *net)
1041 fib4_rules_exit(net); 1070 fib4_rules_exit(net);
1042#endif 1071#endif
1043 1072
1073 rtnl_lock();
1044 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1074 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1045 struct fib_table *tb; 1075 struct fib_table *tb;
1046 struct hlist_head *head; 1076 struct hlist_head *head;
@@ -1053,6 +1083,7 @@ static void ip_fib_net_exit(struct net *net)
1053 fib_free_table(tb); 1083 fib_free_table(tb);
1054 } 1084 }
1055 } 1085 }
1086 rtnl_unlock();
1056 kfree(net->ipv4.fib_table_hash); 1087 kfree(net->ipv4.fib_table_hash);
1057} 1088}
1058 1089
@@ -1101,5 +1132,5 @@ void __init ip_fib_init(void)
1101 register_netdevice_notifier(&fib_netdev_notifier); 1132 register_netdevice_notifier(&fib_netdev_notifier);
1102 register_inetaddr_notifier(&fib_inetaddr_notifier); 1133 register_inetaddr_notifier(&fib_inetaddr_notifier);
1103 1134
1104 fib_hash_init(); 1135 fib_trie_init();
1105} 1136}