aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-07-17 15:20:47 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-20 16:36:16 -0400
commitf2bb4bedf35d5167a073dcdddf16543f351ef3ae (patch)
treed68e90f5541302bfda64f044c5713e4f22aca599
parentceb3320610d6f15ff20dd4c042b36473d77de76f (diff)
ipv4: Cache output routes in fib_info nexthops.
If we have an output route that lacks nexthop exceptions, we can cache it in the FIB info nexthop. Such routes will have DST_HOST cleared because such routes refer to a family of destinations, rather than just one. The sequence of the handling of exceptions during route lookup is adjusted to make the logic work properly. Before we allocate the route, we lookup the exception. Then we know if we will cache this route or not, and therefore whether DST_HOST should be set on the allocated route. Then we use DST_HOST to key off whether we should store the resulting route, during rt_set_nexthop(), in the FIB nexthop cache. With help from Eric Dumazet. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/route.c140
3 files changed, 101 insertions, 43 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 2daf096dfc60..fb62c590360e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -46,6 +46,7 @@ struct fib_config {
46 }; 46 };
47 47
48struct fib_info; 48struct fib_info;
49struct rtable;
49 50
50struct fib_nh_exception { 51struct fib_nh_exception {
51 struct fib_nh_exception __rcu *fnhe_next; 52 struct fib_nh_exception __rcu *fnhe_next;
@@ -80,6 +81,7 @@ struct fib_nh {
80 __be32 nh_gw; 81 __be32 nh_gw;
81 __be32 nh_saddr; 82 __be32 nh_saddr;
82 int nh_saddr_genid; 83 int nh_saddr_genid;
84 struct rtable *nh_rth_output;
83 struct fnhe_hash_bucket *nh_exceptions; 85 struct fnhe_hash_bucket *nh_exceptions;
84}; 86};
85 87
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2b57d768240d..83d0f42b619a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,6 +171,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
171 dev_put(nexthop_nh->nh_dev); 171 dev_put(nexthop_nh->nh_dev);
172 if (nexthop_nh->nh_exceptions) 172 if (nexthop_nh->nh_exceptions)
173 free_nh_exceptions(nexthop_nh); 173 free_nh_exceptions(nexthop_nh);
174 if (nexthop_nh->nh_rth_output)
175 dst_release(&nexthop_nh->nh_rth_output->dst);
174 } endfor_nexthops(fi); 176 } endfor_nexthops(fi);
175 177
176 release_net(fi->fib_net); 178 release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d52f7699c2fa..8a0260010ea1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1158,8 +1158,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
1158 return mtu; 1158 return mtu;
1159} 1159}
1160 1160
1161static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, 1161static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
1162 struct fib_info *fi)
1163{ 1162{
1164 if (fi->fib_metrics != (u32 *) dst_default_metrics) { 1163 if (fi->fib_metrics != (u32 *) dst_default_metrics) {
1165 rt->fi = fi; 1164 rt->fi = fi;
@@ -1168,50 +1167,83 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1168 dst_init_metrics(&rt->dst, fi->fib_metrics, true); 1167 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1169} 1168}
1170 1169
1171static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) 1170static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1172{ 1171{
1173 struct fnhe_hash_bucket *hash = nh->nh_exceptions; 1172 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1174 struct fib_nh_exception *fnhe; 1173 struct fib_nh_exception *fnhe;
1175 u32 hval; 1174 u32 hval;
1176 1175
1176 if (!hash)
1177 return NULL;
1178
1177 hval = fnhe_hashfun(daddr); 1179 hval = fnhe_hashfun(daddr);
1178 1180
1179restart:
1180 for (fnhe = rcu_dereference(hash[hval].chain); fnhe; 1181 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1181 fnhe = rcu_dereference(fnhe->fnhe_next)) { 1182 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1182 __be32 fnhe_daddr, gw; 1183 if (fnhe->fnhe_daddr == daddr)
1183 unsigned long expires; 1184 return fnhe;
1184 unsigned int seq; 1185 }
1185 u32 pmtu; 1186 return NULL;
1186 1187}
1187 seq = read_seqbegin(&fnhe_seqlock);
1188 fnhe_daddr = fnhe->fnhe_daddr;
1189 gw = fnhe->fnhe_gw;
1190 pmtu = fnhe->fnhe_pmtu;
1191 expires = fnhe->fnhe_expires;
1192 if (read_seqretry(&fnhe_seqlock, seq))
1193 goto restart;
1194 if (daddr != fnhe_daddr)
1195 continue;
1196 if (pmtu) {
1197 unsigned long diff = expires - jiffies;
1198 1188
1199 if (time_before(jiffies, expires)) { 1189static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1200 rt->rt_pmtu = pmtu; 1190 __be32 daddr)
1201 dst_set_expires(&rt->dst, diff); 1191{
1202 } 1192 __be32 fnhe_daddr, gw;
1203 } 1193 unsigned long expires;
1204 if (gw) { 1194 unsigned int seq;
1205 rt->rt_flags |= RTCF_REDIRECTED; 1195 u32 pmtu;
1206 rt->rt_gateway = gw; 1196
1197restart:
1198 seq = read_seqbegin(&fnhe_seqlock);
1199 fnhe_daddr = fnhe->fnhe_daddr;
1200 gw = fnhe->fnhe_gw;
1201 pmtu = fnhe->fnhe_pmtu;
1202 expires = fnhe->fnhe_expires;
1203 if (read_seqretry(&fnhe_seqlock, seq))
1204 goto restart;
1205
1206 if (daddr != fnhe_daddr)
1207 return;
1208
1209 if (pmtu) {
1210 unsigned long diff = expires - jiffies;
1211
1212 if (time_before(jiffies, expires)) {
1213 rt->rt_pmtu = pmtu;
1214 dst_set_expires(&rt->dst, diff);
1207 } 1215 }
1208 fnhe->fnhe_stamp = jiffies; 1216 }
1209 break; 1217 if (gw) {
1218 rt->rt_flags |= RTCF_REDIRECTED;
1219 rt->rt_gateway = gw;
1220 }
1221 fnhe->fnhe_stamp = jiffies;
1222}
1223
1224static inline void rt_release_rcu(struct rcu_head *head)
1225{
1226 struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
1227 dst_release(dst);
1228}
1229
1230static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1231{
1232 struct rtable *orig, *prev, **p = &nh->nh_rth_output;
1233
1234 orig = *p;
1235
1236 prev = cmpxchg(p, orig, rt);
1237 if (prev == orig) {
1238 dst_clone(&rt->dst);
1239 if (orig)
1240 call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu);
1210 } 1241 }
1211} 1242}
1212 1243
1213static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, 1244static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1214 const struct fib_result *res, 1245 const struct fib_result *res,
1246 struct fib_nh_exception *fnhe,
1215 struct fib_info *fi, u16 type, u32 itag) 1247 struct fib_info *fi, u16 type, u32 itag)
1216{ 1248{
1217 if (fi) { 1249 if (fi) {
@@ -1219,12 +1251,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1219 1251
1220 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) 1252 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1221 rt->rt_gateway = nh->nh_gw; 1253 rt->rt_gateway = nh->nh_gw;
1222 if (unlikely(nh->nh_exceptions)) 1254 if (unlikely(fnhe))
1223 rt_bind_exception(rt, nh, fl4->daddr); 1255 rt_bind_exception(rt, fnhe, daddr);
1224 rt_init_metrics(rt, fl4, fi); 1256 rt_init_metrics(rt, fi);
1225#ifdef CONFIG_IP_ROUTE_CLASSID 1257#ifdef CONFIG_IP_ROUTE_CLASSID
1226 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1258 rt->dst.tclassid = nh->nh_tclassid;
1227#endif 1259#endif
1260 if (!(rt->dst.flags & DST_HOST) &&
1261 rt_is_output_route(rt))
1262 rt_cache_route(nh, rt);
1228 } 1263 }
1229 1264
1230#ifdef CONFIG_IP_ROUTE_CLASSID 1265#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -1236,10 +1271,10 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1236} 1271}
1237 1272
1238static struct rtable *rt_dst_alloc(struct net_device *dev, 1273static struct rtable *rt_dst_alloc(struct net_device *dev,
1239 bool nopolicy, bool noxfrm) 1274 bool nopolicy, bool noxfrm, bool will_cache)
1240{ 1275{
1241 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, 1276 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1242 DST_HOST | DST_NOCACHE | 1277 (will_cache ? 0 : DST_HOST) | DST_NOCACHE |
1243 (nopolicy ? DST_NOPOLICY : 0) | 1278 (nopolicy ? DST_NOPOLICY : 0) |
1244 (noxfrm ? DST_NOXFRM : 0)); 1279 (noxfrm ? DST_NOXFRM : 0));
1245} 1280}
@@ -1276,7 +1311,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1276 goto e_err; 1311 goto e_err;
1277 } 1312 }
1278 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, 1313 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
1279 IN_DEV_CONF_GET(in_dev, NOPOLICY), false); 1314 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1280 if (!rth) 1315 if (!rth)
1281 goto e_nobufs; 1316 goto e_nobufs;
1282 1317
@@ -1349,6 +1384,7 @@ static int __mkroute_input(struct sk_buff *skb,
1349 __be32 daddr, __be32 saddr, u32 tos, 1384 __be32 daddr, __be32 saddr, u32 tos,
1350 struct rtable **result) 1385 struct rtable **result)
1351{ 1386{
1387 struct fib_nh_exception *fnhe;
1352 struct rtable *rth; 1388 struct rtable *rth;
1353 int err; 1389 int err;
1354 struct in_device *out_dev; 1390 struct in_device *out_dev;
@@ -1395,9 +1431,13 @@ static int __mkroute_input(struct sk_buff *skb,
1395 } 1431 }
1396 } 1432 }
1397 1433
1434 fnhe = NULL;
1435 if (res->fi)
1436 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1437
1398 rth = rt_dst_alloc(out_dev->dev, 1438 rth = rt_dst_alloc(out_dev->dev,
1399 IN_DEV_CONF_GET(in_dev, NOPOLICY), 1439 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1400 IN_DEV_CONF_GET(out_dev, NOXFRM)); 1440 IN_DEV_CONF_GET(out_dev, NOXFRM), false);
1401 if (!rth) { 1441 if (!rth) {
1402 err = -ENOBUFS; 1442 err = -ENOBUFS;
1403 goto cleanup; 1443 goto cleanup;
@@ -1416,7 +1456,7 @@ static int __mkroute_input(struct sk_buff *skb,
1416 rth->dst.input = ip_forward; 1456 rth->dst.input = ip_forward;
1417 rth->dst.output = ip_output; 1457 rth->dst.output = ip_output;
1418 1458
1419 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); 1459 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1420 1460
1421 *result = rth; 1461 *result = rth;
1422 err = 0; 1462 err = 0;
@@ -1558,7 +1598,7 @@ brd_input:
1558 1598
1559local_input: 1599local_input:
1560 rth = rt_dst_alloc(net->loopback_dev, 1600 rth = rt_dst_alloc(net->loopback_dev,
1561 IN_DEV_CONF_GET(in_dev, NOPOLICY), false); 1601 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1562 if (!rth) 1602 if (!rth)
1563 goto e_nobufs; 1603 goto e_nobufs;
1564 1604
@@ -1672,6 +1712,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1672 unsigned int flags) 1712 unsigned int flags)
1673{ 1713{
1674 struct fib_info *fi = res->fi; 1714 struct fib_info *fi = res->fi;
1715 struct fib_nh_exception *fnhe;
1675 struct in_device *in_dev; 1716 struct in_device *in_dev;
1676 u16 type = res->type; 1717 u16 type = res->type;
1677 struct rtable *rth; 1718 struct rtable *rth;
@@ -1710,9 +1751,22 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1710 fi = NULL; 1751 fi = NULL;
1711 } 1752 }
1712 1753
1754 fnhe = NULL;
1755 if (fi) {
1756 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1757 if (!fnhe) {
1758 rth = FIB_RES_NH(*res).nh_rth_output;
1759 if (rth &&
1760 rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) {
1761 dst_use(&rth->dst, jiffies);
1762 return rth;
1763 }
1764 }
1765 }
1713 rth = rt_dst_alloc(dev_out, 1766 rth = rt_dst_alloc(dev_out,
1714 IN_DEV_CONF_GET(in_dev, NOPOLICY), 1767 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1715 IN_DEV_CONF_GET(in_dev, NOXFRM)); 1768 IN_DEV_CONF_GET(in_dev, NOXFRM),
1769 fi && !fnhe);
1716 if (!rth) 1770 if (!rth)
1717 return ERR_PTR(-ENOBUFS); 1771 return ERR_PTR(-ENOBUFS);
1718 1772
@@ -1749,7 +1803,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1749#endif 1803#endif
1750 } 1804 }
1751 1805
1752 rt_set_nexthop(rth, fl4, res, fi, type, 0); 1806 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
1753 1807
1754 if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) 1808 if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
1755 rth->dst.flags |= DST_NOCACHE; 1809 rth->dst.flags |= DST_NOCACHE;