aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Ahern <dsa@cumulusnetworks.com>2016-02-24 14:47:02 -0500
committerDavid S. Miller <davem@davemloft.net>2016-02-26 14:22:26 -0500
commit3f2fb9a834cb1fcddbae22deca7fde136944dc89 (patch)
treee1c9a118cd24d84d3b53d0ad959b5d0eda0c570d
parent8d3f2806f8fbd9b222b3504580b5eaa9ba2964b8 (diff)
net: l3mdev: address selection should only consider devices in L3 domain
David Lamparter noted a use case where the source address selection fails to pick an address from a VRF interface - unnumbered interfaces. Relevant commands from his script: ip addr add 9.9.9.9/32 dev lo ip link set lo up ip link add name vrf0 type vrf table 101 ip rule add oif vrf0 table 101 ip rule add iif vrf0 table 101 ip link set vrf0 up ip addr add 10.0.0.3/32 dev vrf0 ip link add name dummy2 type dummy ip link set dummy2 master vrf0 up --> note dummy2 has no address - unnumbered device ip route add 10.2.2.2/32 dev dummy2 table 101 ip neigh add 10.2.2.2 dev dummy2 lladdr 02:00:00:00:00:02 tcpdump -ni dummy2 & And using ping instead of his socat example: $ ping -I vrf0 -c1 10.2.2.2 ping: Warning: source address might be selected on device other than vrf0. PING 10.2.2.2 (10.2.2.2) from 9.9.9.9 vrf0: 56(84) bytes of data. >From tcpdump: 12:57:29.449128 IP 9.9.9.9 > 10.2.2.2: ICMP echo request, id 2491, seq 1, length 64 Note the source address is from lo and is not a VRF local address. With this patch: $ ping -I vrf0 -c1 10.2.2.2 PING 10.2.2.2 (10.2.2.2) from 10.0.0.3 vrf0: 56(84) bytes of data. >From tcpdump: 12:59:25.096426 IP 10.0.0.3 > 10.2.2.2: ICMP echo request, id 2113, seq 1, length 64 Now the source address comes from vrf0. The ipv4 function for selecting source address takes a const argument. Removing the const requires touching a lot of places, so instead l3mdev_master_ifindex_rcu is changed to take a const argument and then do the typecast to non-const as required by netdev_master_upper_dev_get_rcu. This is similar to what l3mdev_fib_table_rcu does. IPv6 for unnumbered interfaces appears to be selecting the addresses properly. Cc: David Lamparter <david@opensourcerouting.org> Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/l3mdev.h4
-rw-r--r--net/ipv4/devinet.c5
-rw-r--r--net/l3mdev/l3mdev.c11
3 files changed, 16 insertions, 4 deletions
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 5567d46b3cff..c43a9c73de5e 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -39,7 +39,7 @@ struct l3mdev_ops {
39 39
40#ifdef CONFIG_NET_L3_MASTER_DEV 40#ifdef CONFIG_NET_L3_MASTER_DEV
41 41
42int l3mdev_master_ifindex_rcu(struct net_device *dev); 42int l3mdev_master_ifindex_rcu(const struct net_device *dev);
43static inline int l3mdev_master_ifindex(struct net_device *dev) 43static inline int l3mdev_master_ifindex(struct net_device *dev)
44{ 44{
45 int ifindex; 45 int ifindex;
@@ -179,7 +179,7 @@ struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net,
179 179
180#else 180#else
181 181
182static inline int l3mdev_master_ifindex_rcu(struct net_device *dev) 182static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
183{ 183{
184 return 0; 184 return 0;
185} 185}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 29b8d3a7b19b..18d510fa7ee2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1194,6 +1194,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1194 __be32 addr = 0; 1194 __be32 addr = 0;
1195 struct in_device *in_dev; 1195 struct in_device *in_dev;
1196 struct net *net = dev_net(dev); 1196 struct net *net = dev_net(dev);
1197 int master_idx;
1197 1198
1198 rcu_read_lock(); 1199 rcu_read_lock();
1199 in_dev = __in_dev_get_rcu(dev); 1200 in_dev = __in_dev_get_rcu(dev);
@@ -1214,12 +1215,16 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1214 if (addr) 1215 if (addr)
1215 goto out_unlock; 1216 goto out_unlock;
1216no_in_dev: 1217no_in_dev:
1218 master_idx = l3mdev_master_ifindex_rcu(dev);
1217 1219
1218 /* Not loopback addresses on loopback should be preferred 1220 /* Not loopback addresses on loopback should be preferred
1219 in this case. It is important that lo is the first interface 1221 in this case. It is important that lo is the first interface
1220 in dev_base list. 1222 in dev_base list.
1221 */ 1223 */
1222 for_each_netdev_rcu(net, dev) { 1224 for_each_netdev_rcu(net, dev) {
1225 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1226 continue;
1227
1223 in_dev = __in_dev_get_rcu(dev); 1228 in_dev = __in_dev_get_rcu(dev);
1224 if (!in_dev) 1229 if (!in_dev)
1225 continue; 1230 continue;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8e5ead366e7f..e925037fa0df 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -17,7 +17,7 @@
17 * @dev: targeted interface 17 * @dev: targeted interface
18 */ 18 */
19 19
20int l3mdev_master_ifindex_rcu(struct net_device *dev) 20int l3mdev_master_ifindex_rcu(const struct net_device *dev)
21{ 21{
22 int ifindex = 0; 22 int ifindex = 0;
23 23
@@ -28,8 +28,15 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev)
28 ifindex = dev->ifindex; 28 ifindex = dev->ifindex;
29 } else if (netif_is_l3_slave(dev)) { 29 } else if (netif_is_l3_slave(dev)) {
30 struct net_device *master; 30 struct net_device *master;
31 struct net_device *_dev = (struct net_device *)dev;
31 32
32 master = netdev_master_upper_dev_get_rcu(dev); 33 /* netdev_master_upper_dev_get_rcu calls
34 * list_first_or_null_rcu to walk the upper dev list.
35 * list_first_or_null_rcu does not handle a const arg. We aren't
36 * making changes, just want the master device from that list so
37 * typecast to remove the const
38 */
39 master = netdev_master_upper_dev_get_rcu(_dev);
33 if (master) 40 if (master)
34 ifindex = master->ifindex; 41 ifindex = master->ifindex;
35 } 42 }