summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWei Wang <weiwan@google.com>2019-06-20 20:36:41 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-23 16:24:17 -0400
commit7d9e5f422150ed00de744e02a80734d74cc9704d (patch)
tree5649667efad829c536543d0feb5374ad42b93483
parent67f415dd29063a5906c560051c00e42dcf01a4dd (diff)
ipv6: convert major tx path to use RT6_LOOKUP_F_DST_NOREF
For tx path, in most cases, we still have to take refcnt on the dst cause the caller is caching the dst somewhere. But it still is beneficial to make use of RT6_LOOKUP_F_DST_NOREF flag while doing the route lookup. It is cause this flag prevents manipulating refcnt on net->ipv6.ip6_null_entry when doing fib6_rule_lookup() to traverse each routing table. The null_entry is a shared object and constant updates on it cause false sharing. We converted the current major lookup function ip6_route_output_flags() to make use of RT6_LOOKUP_F_DST_NOREF. Together with the change in the rx path, we see noticable performance boost: I ran synflood tests between 2 hosts under the same switch. Both hosts have 20G mlx NIC, and 8 tx/rx queues. Sender sends pure SYN flood with random src IPs and ports using trafgen. Receiver has a simple TCP listener on the target port. Both hosts have multiple custom rules: - For incoming packets, only local table is traversed. - For outgoing packets, 3 tables are traversed to find the route. The packet processing rate on the receiver is as follows: - Before the fix: 3.78Mpps - After the fix: 5.50Mpps Signed-off-by: Wei Wang <weiwan@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vrf.c5
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--net/ipv6/route.c29
-rw-r--r--net/l3mdev/l3mdev.c7
4 files changed, 37 insertions, 8 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 11b9525dff27..69ef9cce5858 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1072,12 +1072,14 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
1072#if IS_ENABLED(CONFIG_IPV6) 1072#if IS_ENABLED(CONFIG_IPV6)
1073/* send to link-local or multicast address via interface enslaved to 1073/* send to link-local or multicast address via interface enslaved to
1074 * VRF device. Force lookup to VRF table without changing flow struct 1074 * VRF device. Force lookup to VRF table without changing flow struct
1075 * Note: Caller to this function must hold rcu_read_lock() and no refcnt
1076 * is taken on the dst by this function.
1075 */ 1077 */
1076static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, 1078static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
1077 struct flowi6 *fl6) 1079 struct flowi6 *fl6)
1078{ 1080{
1079 struct net *net = dev_net(dev); 1081 struct net *net = dev_net(dev);
1080 int flags = RT6_LOOKUP_F_IFACE; 1082 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF;
1081 struct dst_entry *dst = NULL; 1083 struct dst_entry *dst = NULL;
1082 struct rt6_info *rt; 1084 struct rt6_info *rt;
1083 1085
@@ -1087,7 +1089,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
1087 */ 1089 */
1088 if (fl6->flowi6_oif == dev->ifindex) { 1090 if (fl6->flowi6_oif == dev->ifindex) {
1089 dst = &net->ipv6.ip6_null_entry->dst; 1091 dst = &net->ipv6.ip6_null_entry->dst;
1090 dst_hold(dst);
1091 return dst; 1092 return dst;
1092 } 1093 }
1093 1094
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 0709835c01ad..89ad7917b98d 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -84,6 +84,10 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
84 struct flowi6 *fl6, 84 struct flowi6 *fl6,
85 const struct sk_buff *skb, int flags); 85 const struct sk_buff *skb, int flags);
86 86
87struct dst_entry *ip6_route_output_flags_noref(struct net *net,
88 const struct sock *sk,
89 struct flowi6 *fl6, int flags);
90
87struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 91struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
88 struct flowi6 *fl6, int flags); 92 struct flowi6 *fl6, int flags);
89 93
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 66fc69ef5909..3975ae8e2440 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2415,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net,
2415 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); 2415 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2416} 2416}
2417 2417
2418struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 2418struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2419 struct flowi6 *fl6, int flags) 2419 const struct sock *sk,
2420 struct flowi6 *fl6, int flags)
2420{ 2421{
2421 bool any_src; 2422 bool any_src;
2422 2423
@@ -2424,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2424 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { 2425 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2425 struct dst_entry *dst; 2426 struct dst_entry *dst;
2426 2427
2428 /* This function does not take refcnt on the dst */
2427 dst = l3mdev_link_scope_lookup(net, fl6); 2429 dst = l3mdev_link_scope_lookup(net, fl6);
2428 if (dst) 2430 if (dst)
2429 return dst; 2431 return dst;
@@ -2431,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2431 2433
2432 fl6->flowi6_iif = LOOPBACK_IFINDEX; 2434 fl6->flowi6_iif = LOOPBACK_IFINDEX;
2433 2435
2436 flags |= RT6_LOOKUP_F_DST_NOREF;
2434 any_src = ipv6_addr_any(&fl6->saddr); 2437 any_src = ipv6_addr_any(&fl6->saddr);
2435 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 2438 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2436 (fl6->flowi6_oif && any_src)) 2439 (fl6->flowi6_oif && any_src))
@@ -2443,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2443 2446
2444 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); 2447 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2445} 2448}
2449EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2450
2451struct dst_entry *ip6_route_output_flags(struct net *net,
2452 const struct sock *sk,
2453 struct flowi6 *fl6,
2454 int flags)
2455{
2456 struct dst_entry *dst;
2457 struct rt6_info *rt6;
2458
2459 rcu_read_lock();
2460 dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2461 rt6 = (struct rt6_info *)dst;
2462 /* For dst cached in uncached_list, refcnt is already taken. */
2463 if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2464 dst = &net->ipv6.ip6_null_entry->dst;
2465 dst_hold(dst);
2466 }
2467 rcu_read_unlock();
2468
2469 return dst;
2470}
2446EXPORT_SYMBOL_GPL(ip6_route_output_flags); 2471EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2447 2472
2448struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2473struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index cfc9fcb97465..f35899d45a9a 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -118,6 +118,8 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
118 * local and multicast addresses 118 * local and multicast addresses
119 * @net: network namespace for device index lookup 119 * @net: network namespace for device index lookup
120 * @fl6: IPv6 flow struct for lookup 120 * @fl6: IPv6 flow struct for lookup
121 * This function does not hold refcnt on the returned dst.
122 * Caller must hold rcu_read_lock().
121 */ 123 */
122 124
123struct dst_entry *l3mdev_link_scope_lookup(struct net *net, 125struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
@@ -126,9 +128,8 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
126 struct dst_entry *dst = NULL; 128 struct dst_entry *dst = NULL;
127 struct net_device *dev; 129 struct net_device *dev;
128 130
131 WARN_ON_ONCE(!rcu_read_lock_held());
129 if (fl6->flowi6_oif) { 132 if (fl6->flowi6_oif) {
130 rcu_read_lock();
131
132 dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); 133 dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
133 if (dev && netif_is_l3_slave(dev)) 134 if (dev && netif_is_l3_slave(dev))
134 dev = netdev_master_upper_dev_get_rcu(dev); 135 dev = netdev_master_upper_dev_get_rcu(dev);
@@ -136,8 +137,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
136 if (dev && netif_is_l3_master(dev) && 137 if (dev && netif_is_l3_master(dev) &&
137 dev->l3mdev_ops->l3mdev_link_scope_lookup) 138 dev->l3mdev_ops->l3mdev_link_scope_lookup)
138 dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); 139 dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
139
140 rcu_read_unlock();
141 } 140 }
142 141
143 return dst; 142 return dst;