aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2017-09-28 09:51:37 -0400
committerDavid S. Miller <davem@davemloft.net>2017-09-30 22:55:47 -0400
commitbc044e8db7962e727a75b591b9851ff2ac5cf846 (patch)
treee5cfa29af9e217039d26148e80b6b4546f39494f
parent7487449c86c65202b3b725c4524cb48dd65e4e6f (diff)
udp: perform source validation for mcast early demux
The UDP early demux can leverate the rx dst cache even for multicast unconnected sockets. In such scenario the ipv4 source address is validated only on the first packet in the given flow. After that, when we fetch the dst entry from the socket rx cache, we stop enforcing the rp_filter and we even start accepting any kind of martian addresses. Disabling the dst cache for unconnected multicast socket will cause large performace regression, nearly reducing by half the max ingress tput. Instead we factor out a route helper to completely validate an skb source address for multicast packets and we call it from the UDP early demux for mcast packets landing on unconnected sockets, after successful fetching the related cached dst entry. This still gives a measurable, but limited performance regression: rp_filter = 0 rp_filter = 1 edmux disabled: 1182 Kpps 1127 Kpps edmux before: 2238 Kpps 2238 Kpps edmux after: 2037 Kpps 2019 Kpps The above figures are on top of current net tree. Applying the net-next commit 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") the delta with rp_filter == 0 will decrease even more. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/route.h4
-rw-r--r--net/ipv4/route.c46
-rw-r--r--net/ipv4/udp.c13
3 files changed, 41 insertions, 22 deletions
diff --git a/include/net/route.h b/include/net/route.h
index 57dfc6850d37..d538e6db1afe 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
175 fl4->fl4_gre_key = gre_key; 175 fl4->fl4_gre_key = gre_key;
176 return ip_route_output_key(net, fl4); 176 return ip_route_output_key(net, fl4);
177} 177}
178 178int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
179 u8 tos, struct net_device *dev,
180 struct in_device *in_dev, u32 *itag);
179int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, 181int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
180 u8 tos, struct net_device *devin); 182 u8 tos, struct net_device *devin);
181int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, 183int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94d4cd2d5ea4..ac6fde5d45f1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
1520EXPORT_SYMBOL(rt_dst_alloc); 1520EXPORT_SYMBOL(rt_dst_alloc);
1521 1521
1522/* called in rcu_read_lock() section */ 1522/* called in rcu_read_lock() section */
1523static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1523int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1524 u8 tos, struct net_device *dev, int our) 1524 u8 tos, struct net_device *dev,
1525 struct in_device *in_dev, u32 *itag)
1525{ 1526{
1526 struct rtable *rth;
1527 struct in_device *in_dev = __in_dev_get_rcu(dev);
1528 unsigned int flags = RTCF_MULTICAST;
1529 u32 itag = 0;
1530 int err; 1527 int err;
1531 1528
1532 /* Primary sanity checks. */ 1529 /* Primary sanity checks. */
1533
1534 if (!in_dev) 1530 if (!in_dev)
1535 return -EINVAL; 1531 return -EINVAL;
1536 1532
1537 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1533 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1538 skb->protocol != htons(ETH_P_IP)) 1534 skb->protocol != htons(ETH_P_IP))
1539 goto e_inval; 1535 return -EINVAL;
1540 1536
1541 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) 1537 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1542 goto e_inval; 1538 return -EINVAL;
1543 1539
1544 if (ipv4_is_zeronet(saddr)) { 1540 if (ipv4_is_zeronet(saddr)) {
1545 if (!ipv4_is_local_multicast(daddr)) 1541 if (!ipv4_is_local_multicast(daddr))
1546 goto e_inval; 1542 return -EINVAL;
1547 } else { 1543 } else {
1548 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 1544 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1549 in_dev, &itag); 1545 in_dev, itag);
1550 if (err < 0) 1546 if (err < 0)
1551 goto e_err; 1547 return err;
1552 } 1548 }
1549 return 0;
1550}
1551
1552/* called in rcu_read_lock() section */
1553static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1554 u8 tos, struct net_device *dev, int our)
1555{
1556 struct in_device *in_dev = __in_dev_get_rcu(dev);
1557 unsigned int flags = RTCF_MULTICAST;
1558 struct rtable *rth;
1559 u32 itag = 0;
1560 int err;
1561
1562 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1563 if (err)
1564 return err;
1565
1553 if (our) 1566 if (our)
1554 flags |= RTCF_LOCAL; 1567 flags |= RTCF_LOCAL;
1555 1568
1556 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, 1569 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1557 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); 1570 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1558 if (!rth) 1571 if (!rth)
1559 goto e_nobufs; 1572 return -ENOBUFS;
1560 1573
1561#ifdef CONFIG_IP_ROUTE_CLASSID 1574#ifdef CONFIG_IP_ROUTE_CLASSID
1562 rth->dst.tclassid = itag; 1575 rth->dst.tclassid = itag;
@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1572 1585
1573 skb_dst_set(skb, &rth->dst); 1586 skb_dst_set(skb, &rth->dst);
1574 return 0; 1587 return 0;
1575
1576e_nobufs:
1577 return -ENOBUFS;
1578e_inval:
1579 return -EINVAL;
1580e_err:
1581 return err;
1582} 1588}
1583 1589
1584 1590
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9b30f821fe96..5676237d2b0f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2224,6 +2224,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
2224int udp_v4_early_demux(struct sk_buff *skb) 2224int udp_v4_early_demux(struct sk_buff *skb)
2225{ 2225{
2226 struct net *net = dev_net(skb->dev); 2226 struct net *net = dev_net(skb->dev);
2227 struct in_device *in_dev = NULL;
2227 const struct iphdr *iph; 2228 const struct iphdr *iph;
2228 const struct udphdr *uh; 2229 const struct udphdr *uh;
2229 struct sock *sk = NULL; 2230 struct sock *sk = NULL;
@@ -2241,7 +2242,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
2241 2242
2242 if (skb->pkt_type == PACKET_BROADCAST || 2243 if (skb->pkt_type == PACKET_BROADCAST ||
2243 skb->pkt_type == PACKET_MULTICAST) { 2244 skb->pkt_type == PACKET_MULTICAST) {
2244 struct in_device *in_dev = __in_dev_get_rcu(skb->dev); 2245 in_dev = __in_dev_get_rcu(skb->dev);
2245 2246
2246 if (!in_dev) 2247 if (!in_dev)
2247 return 0; 2248 return 0;
@@ -2272,11 +2273,21 @@ int udp_v4_early_demux(struct sk_buff *skb)
2272 if (dst) 2273 if (dst)
2273 dst = dst_check(dst, 0); 2274 dst = dst_check(dst, 0);
2274 if (dst) { 2275 if (dst) {
2276 u32 itag = 0;
2277
2275 /* set noref for now. 2278 /* set noref for now.
2276 * any place which wants to hold dst has to call 2279 * any place which wants to hold dst has to call
2277 * dst_hold_safe() 2280 * dst_hold_safe()
2278 */ 2281 */
2279 skb_dst_set_noref(skb, dst); 2282 skb_dst_set_noref(skb, dst);
2283
2284 /* for unconnected multicast sockets we need to validate
2285 * the source on each packet
2286 */
2287 if (!inet_sk(sk)->inet_daddr && in_dev)
2288 return ip_mc_validate_source(skb, iph->daddr,
2289 iph->saddr, iph->tos,
2290 skb->dev, in_dev, &itag);
2280 } 2291 }
2281 return 0; 2292 return 0;
2282} 2293}