aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-07-26 07:14:38 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-26 18:50:39 -0400
commitc6cffba4ffa26a8ffacd0bb9f3144e34f20da7de (patch)
treeb67532a74343d42bcf8784b8e32d7cf6d69313db /net/ipv4
parent4487e64de63b8e42efe5a5543871c42c5a5859d9 (diff)
ipv4: Fix input route performance regression.
With the routing cache removal we lost the "noref" code paths on input, and this can kill some routing workloads. Reinstate the noref path when we hit a cached route in the FIB nexthops. With help from Eric Dumazet. Reported-by: Alexander Duyck <alexander.duyck@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/route.c48
-rw-r--r--net/ipv4/xfrm4_input.c4
6 files changed, 31 insertions, 35 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a0124eb7dbea..77e87aff419a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -827,7 +827,7 @@ static int arp_process(struct sk_buff *skb)
827 } 827 }
828 828
829 if (arp->ar_op == htons(ARPOP_REQUEST) && 829 if (arp->ar_op == htons(ARPOP_REQUEST) &&
830 ip_route_input(skb, tip, sip, 0, dev) == 0) { 830 ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
831 831
832 rt = skb_rtable(skb); 832 rt = skb_rtable(skb);
833 addr_type = rt->rt_type; 833 addr_type = rt->rt_type;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e55171f184f9..da0cc2e6b250 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -172,9 +172,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
172 if (nexthop_nh->nh_exceptions) 172 if (nexthop_nh->nh_exceptions)
173 free_nh_exceptions(nexthop_nh); 173 free_nh_exceptions(nexthop_nh);
174 if (nexthop_nh->nh_rth_output) 174 if (nexthop_nh->nh_rth_output)
175 dst_release(&nexthop_nh->nh_rth_output->dst); 175 dst_free(&nexthop_nh->nh_rth_output->dst);
176 if (nexthop_nh->nh_rth_input) 176 if (nexthop_nh->nh_rth_input)
177 dst_release(&nexthop_nh->nh_rth_input->dst); 177 dst_free(&nexthop_nh->nh_rth_input->dst);
178 } endfor_nexthops(fi); 178 } endfor_nexthops(fi);
179 179
180 release_net(fi->fib_net); 180 release_net(fi->fib_net);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7ad88e5e7110..8d07c973409c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg)
258 /* skb dst is stale, drop it, and perform route lookup again */ 258 /* skb dst is stale, drop it, and perform route lookup again */
259 skb_dst_drop(head); 259 skb_dst_drop(head);
260 iph = ip_hdr(head); 260 iph = ip_hdr(head);
261 err = ip_route_input(head, iph->daddr, iph->saddr, 261 err = ip_route_input_noref(head, iph->daddr, iph->saddr,
262 iph->tos, head->dev); 262 iph->tos, head->dev);
263 if (err) 263 if (err)
264 goto out_rcu_unlock; 264 goto out_rcu_unlock;
265 265
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 93134b0eab0c..bda8cac2ae91 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -339,8 +339,8 @@ static int ip_rcv_finish(struct sk_buff *skb)
339 * how the packet travels inside Linux networking. 339 * how the packet travels inside Linux networking.
340 */ 340 */
341 if (!skb_dst(skb)) { 341 if (!skb_dst(skb)) {
342 int err = ip_route_input(skb, iph->daddr, iph->saddr, 342 int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
343 iph->tos, skb->dev); 343 iph->tos, skb->dev);
344 if (unlikely(err)) { 344 if (unlikely(err)) {
345 if (err == -EXDEV) 345 if (err == -EXDEV)
346 NET_INC_STATS_BH(dev_net(skb->dev), 346 NET_INC_STATS_BH(dev_net(skb->dev),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3f7bb7185c50..fc1a81ca79a7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1199,10 +1199,9 @@ restart:
1199 fnhe->fnhe_stamp = jiffies; 1199 fnhe->fnhe_stamp = jiffies;
1200} 1200}
1201 1201
1202static inline void rt_release_rcu(struct rcu_head *head) 1202static inline void rt_free(struct rtable *rt)
1203{ 1203{
1204 struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); 1204 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
1205 dst_release(dst);
1206} 1205}
1207 1206
1208static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1207static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
@@ -1216,9 +1215,15 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1216 1215
1217 prev = cmpxchg(p, orig, rt); 1216 prev = cmpxchg(p, orig, rt);
1218 if (prev == orig) { 1217 if (prev == orig) {
1219 dst_clone(&rt->dst);
1220 if (orig) 1218 if (orig)
1221 call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); 1219 rt_free(orig);
1220 } else {
1221 /* Routes we intend to cache in the FIB nexthop have
1222 * the DST_NOCACHE bit clear. However, if we are
1223 * unsuccessful at storing this route into the cache
1224 * we really need to set it.
1225 */
1226 rt->dst.flags |= DST_NOCACHE;
1222 } 1227 }
1223} 1228}
1224 1229
@@ -1245,7 +1250,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1245#ifdef CONFIG_IP_ROUTE_CLASSID 1250#ifdef CONFIG_IP_ROUTE_CLASSID
1246 rt->dst.tclassid = nh->nh_tclassid; 1251 rt->dst.tclassid = nh->nh_tclassid;
1247#endif 1252#endif
1248 if (!(rt->dst.flags & DST_HOST)) 1253 if (!(rt->dst.flags & DST_NOCACHE))
1249 rt_cache_route(nh, rt); 1254 rt_cache_route(nh, rt);
1250 } 1255 }
1251 1256
@@ -1261,7 +1266,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
1261 bool nopolicy, bool noxfrm, bool will_cache) 1266 bool nopolicy, bool noxfrm, bool will_cache)
1262{ 1267{
1263 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, 1268 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1264 (will_cache ? 0 : DST_HOST) | DST_NOCACHE | 1269 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1265 (nopolicy ? DST_NOPOLICY : 0) | 1270 (nopolicy ? DST_NOPOLICY : 0) |
1266 (noxfrm ? DST_NOXFRM : 0)); 1271 (noxfrm ? DST_NOXFRM : 0));
1267} 1272}
@@ -1366,8 +1371,7 @@ static void ip_handle_martian_source(struct net_device *dev,
1366static int __mkroute_input(struct sk_buff *skb, 1371static int __mkroute_input(struct sk_buff *skb,
1367 const struct fib_result *res, 1372 const struct fib_result *res,
1368 struct in_device *in_dev, 1373 struct in_device *in_dev,
1369 __be32 daddr, __be32 saddr, u32 tos, 1374 __be32 daddr, __be32 saddr, u32 tos)
1370 struct rtable **result)
1371{ 1375{
1372 struct rtable *rth; 1376 struct rtable *rth;
1373 int err; 1377 int err;
@@ -1418,7 +1422,7 @@ static int __mkroute_input(struct sk_buff *skb,
1418 if (!itag) { 1422 if (!itag) {
1419 rth = FIB_RES_NH(*res).nh_rth_input; 1423 rth = FIB_RES_NH(*res).nh_rth_input;
1420 if (rt_cache_valid(rth)) { 1424 if (rt_cache_valid(rth)) {
1421 dst_hold(&rth->dst); 1425 skb_dst_set_noref(skb, &rth->dst);
1422 goto out; 1426 goto out;
1423 } 1427 }
1424 do_cache = true; 1428 do_cache = true;
@@ -1445,8 +1449,8 @@ static int __mkroute_input(struct sk_buff *skb,
1445 rth->dst.output = ip_output; 1449 rth->dst.output = ip_output;
1446 1450
1447 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); 1451 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
1452 skb_dst_set(skb, &rth->dst);
1448out: 1453out:
1449 *result = rth;
1450 err = 0; 1454 err = 0;
1451 cleanup: 1455 cleanup:
1452 return err; 1456 return err;
@@ -1458,21 +1462,13 @@ static int ip_mkroute_input(struct sk_buff *skb,
1458 struct in_device *in_dev, 1462 struct in_device *in_dev,
1459 __be32 daddr, __be32 saddr, u32 tos) 1463 __be32 daddr, __be32 saddr, u32 tos)
1460{ 1464{
1461 struct rtable *rth = NULL;
1462 int err;
1463
1464#ifdef CONFIG_IP_ROUTE_MULTIPATH 1465#ifdef CONFIG_IP_ROUTE_MULTIPATH
1465 if (res->fi && res->fi->fib_nhs > 1) 1466 if (res->fi && res->fi->fib_nhs > 1)
1466 fib_select_multipath(res); 1467 fib_select_multipath(res);
1467#endif 1468#endif
1468 1469
1469 /* create a routing cache entry */ 1470 /* create a routing cache entry */
1470 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); 1471 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1471 if (err)
1472 return err;
1473
1474 skb_dst_set(skb, &rth->dst);
1475 return 0;
1476} 1472}
1477 1473
1478/* 1474/*
@@ -1588,8 +1584,9 @@ local_input:
1588 if (!itag) { 1584 if (!itag) {
1589 rth = FIB_RES_NH(res).nh_rth_input; 1585 rth = FIB_RES_NH(res).nh_rth_input;
1590 if (rt_cache_valid(rth)) { 1586 if (rt_cache_valid(rth)) {
1591 dst_hold(&rth->dst); 1587 skb_dst_set_noref(skb, &rth->dst);
1592 goto set_and_out; 1588 err = 0;
1589 goto out;
1593 } 1590 }
1594 do_cache = true; 1591 do_cache = true;
1595 } 1592 }
@@ -1620,7 +1617,6 @@ local_input:
1620 } 1617 }
1621 if (do_cache) 1618 if (do_cache)
1622 rt_cache_route(&FIB_RES_NH(res), rth); 1619 rt_cache_route(&FIB_RES_NH(res), rth);
1623set_and_out:
1624 skb_dst_set(skb, &rth->dst); 1620 skb_dst_set(skb, &rth->dst);
1625 err = 0; 1621 err = 0;
1626 goto out; 1622 goto out;
@@ -1658,8 +1654,8 @@ martian_source_keep_err:
1658 goto out; 1654 goto out;
1659} 1655}
1660 1656
1661int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1657int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1662 u8 tos, struct net_device *dev) 1658 u8 tos, struct net_device *dev)
1663{ 1659{
1664 int res; 1660 int res;
1665 1661
@@ -1702,7 +1698,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1702 rcu_read_unlock(); 1698 rcu_read_unlock();
1703 return res; 1699 return res;
1704} 1700}
1705EXPORT_SYMBOL(ip_route_input); 1701EXPORT_SYMBOL(ip_route_input_noref);
1706 1702
1707/* called with rcu_read_lock() */ 1703/* called with rcu_read_lock() */
1708static struct rtable *__mkroute_output(const struct fib_result *res, 1704static struct rtable *__mkroute_output(const struct fib_result *res,
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 58d23a572509..06814b6216dc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
27 if (skb_dst(skb) == NULL) { 27 if (skb_dst(skb) == NULL) {
28 const struct iphdr *iph = ip_hdr(skb); 28 const struct iphdr *iph = ip_hdr(skb);
29 29
30 if (ip_route_input(skb, iph->daddr, iph->saddr, 30 if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
31 iph->tos, skb->dev)) 31 iph->tos, skb->dev))
32 goto drop; 32 goto drop;
33 } 33 }
34 return dst_input(skb); 34 return dst_input(skb);