diff options
-rw-r--r-- | include/net/ip_fib.h | 2 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 140 |
3 files changed, 101 insertions, 43 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2daf096dfc60..fb62c590360e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -46,6 +46,7 @@ struct fib_config { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | struct fib_info; | 48 | struct fib_info; |
49 | struct rtable; | ||
49 | 50 | ||
50 | struct fib_nh_exception { | 51 | struct fib_nh_exception { |
51 | struct fib_nh_exception __rcu *fnhe_next; | 52 | struct fib_nh_exception __rcu *fnhe_next; |
@@ -80,6 +81,7 @@ struct fib_nh { | |||
80 | __be32 nh_gw; | 81 | __be32 nh_gw; |
81 | __be32 nh_saddr; | 82 | __be32 nh_saddr; |
82 | int nh_saddr_genid; | 83 | int nh_saddr_genid; |
84 | struct rtable *nh_rth_output; | ||
83 | struct fnhe_hash_bucket *nh_exceptions; | 85 | struct fnhe_hash_bucket *nh_exceptions; |
84 | }; | 86 | }; |
85 | 87 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2b57d768240d..83d0f42b619a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -171,6 +171,8 @@ static void free_fib_info_rcu(struct rcu_head *head) | |||
171 | dev_put(nexthop_nh->nh_dev); | 171 | dev_put(nexthop_nh->nh_dev); |
172 | if (nexthop_nh->nh_exceptions) | 172 | if (nexthop_nh->nh_exceptions) |
173 | free_nh_exceptions(nexthop_nh); | 173 | free_nh_exceptions(nexthop_nh); |
174 | if (nexthop_nh->nh_rth_output) | ||
175 | dst_release(&nexthop_nh->nh_rth_output->dst); | ||
174 | } endfor_nexthops(fi); | 176 | } endfor_nexthops(fi); |
175 | 177 | ||
176 | release_net(fi->fib_net); | 178 | release_net(fi->fib_net); |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d52f7699c2fa..8a0260010ea1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1158,8 +1158,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1158 | return mtu; | 1158 | return mtu; |
1159 | } | 1159 | } |
1160 | 1160 | ||
1161 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | 1161 | static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) |
1162 | struct fib_info *fi) | ||
1163 | { | 1162 | { |
1164 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | 1163 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { |
1165 | rt->fi = fi; | 1164 | rt->fi = fi; |
@@ -1168,50 +1167,83 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | |||
1168 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | 1167 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); |
1169 | } | 1168 | } |
1170 | 1169 | ||
1171 | static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) | 1170 | static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) |
1172 | { | 1171 | { |
1173 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; | 1172 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; |
1174 | struct fib_nh_exception *fnhe; | 1173 | struct fib_nh_exception *fnhe; |
1175 | u32 hval; | 1174 | u32 hval; |
1176 | 1175 | ||
1176 | if (!hash) | ||
1177 | return NULL; | ||
1178 | |||
1177 | hval = fnhe_hashfun(daddr); | 1179 | hval = fnhe_hashfun(daddr); |
1178 | 1180 | ||
1179 | restart: | ||
1180 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; | 1181 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; |
1181 | fnhe = rcu_dereference(fnhe->fnhe_next)) { | 1182 | fnhe = rcu_dereference(fnhe->fnhe_next)) { |
1182 | __be32 fnhe_daddr, gw; | 1183 | if (fnhe->fnhe_daddr == daddr) |
1183 | unsigned long expires; | 1184 | return fnhe; |
1184 | unsigned int seq; | 1185 | } |
1185 | u32 pmtu; | 1186 | return NULL; |
1186 | 1187 | } | |
1187 | seq = read_seqbegin(&fnhe_seqlock); | ||
1188 | fnhe_daddr = fnhe->fnhe_daddr; | ||
1189 | gw = fnhe->fnhe_gw; | ||
1190 | pmtu = fnhe->fnhe_pmtu; | ||
1191 | expires = fnhe->fnhe_expires; | ||
1192 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
1193 | goto restart; | ||
1194 | if (daddr != fnhe_daddr) | ||
1195 | continue; | ||
1196 | if (pmtu) { | ||
1197 | unsigned long diff = expires - jiffies; | ||
1198 | 1188 | ||
1199 | if (time_before(jiffies, expires)) { | 1189 | static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
1200 | rt->rt_pmtu = pmtu; | 1190 | __be32 daddr) |
1201 | dst_set_expires(&rt->dst, diff); | 1191 | { |
1202 | } | 1192 | __be32 fnhe_daddr, gw; |
1203 | } | 1193 | unsigned long expires; |
1204 | if (gw) { | 1194 | unsigned int seq; |
1205 | rt->rt_flags |= RTCF_REDIRECTED; | 1195 | u32 pmtu; |
1206 | rt->rt_gateway = gw; | 1196 | |
1197 | restart: | ||
1198 | seq = read_seqbegin(&fnhe_seqlock); | ||
1199 | fnhe_daddr = fnhe->fnhe_daddr; | ||
1200 | gw = fnhe->fnhe_gw; | ||
1201 | pmtu = fnhe->fnhe_pmtu; | ||
1202 | expires = fnhe->fnhe_expires; | ||
1203 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
1204 | goto restart; | ||
1205 | |||
1206 | if (daddr != fnhe_daddr) | ||
1207 | return; | ||
1208 | |||
1209 | if (pmtu) { | ||
1210 | unsigned long diff = expires - jiffies; | ||
1211 | |||
1212 | if (time_before(jiffies, expires)) { | ||
1213 | rt->rt_pmtu = pmtu; | ||
1214 | dst_set_expires(&rt->dst, diff); | ||
1207 | } | 1215 | } |
1208 | fnhe->fnhe_stamp = jiffies; | 1216 | } |
1209 | break; | 1217 | if (gw) { |
1218 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1219 | rt->rt_gateway = gw; | ||
1220 | } | ||
1221 | fnhe->fnhe_stamp = jiffies; | ||
1222 | } | ||
1223 | |||
1224 | static inline void rt_release_rcu(struct rcu_head *head) | ||
1225 | { | ||
1226 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); | ||
1227 | dst_release(dst); | ||
1228 | } | ||
1229 | |||
1230 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | ||
1231 | { | ||
1232 | struct rtable *orig, *prev, **p = &nh->nh_rth_output; | ||
1233 | |||
1234 | orig = *p; | ||
1235 | |||
1236 | prev = cmpxchg(p, orig, rt); | ||
1237 | if (prev == orig) { | ||
1238 | dst_clone(&rt->dst); | ||
1239 | if (orig) | ||
1240 | call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); | ||
1210 | } | 1241 | } |
1211 | } | 1242 | } |
1212 | 1243 | ||
1213 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | 1244 | static void rt_set_nexthop(struct rtable *rt, __be32 daddr, |
1214 | const struct fib_result *res, | 1245 | const struct fib_result *res, |
1246 | struct fib_nh_exception *fnhe, | ||
1215 | struct fib_info *fi, u16 type, u32 itag) | 1247 | struct fib_info *fi, u16 type, u32 itag) |
1216 | { | 1248 | { |
1217 | if (fi) { | 1249 | if (fi) { |
@@ -1219,12 +1251,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | |||
1219 | 1251 | ||
1220 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | 1252 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) |
1221 | rt->rt_gateway = nh->nh_gw; | 1253 | rt->rt_gateway = nh->nh_gw; |
1222 | if (unlikely(nh->nh_exceptions)) | 1254 | if (unlikely(fnhe)) |
1223 | rt_bind_exception(rt, nh, fl4->daddr); | 1255 | rt_bind_exception(rt, fnhe, daddr); |
1224 | rt_init_metrics(rt, fl4, fi); | 1256 | rt_init_metrics(rt, fi); |
1225 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1257 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1226 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | 1258 | rt->dst.tclassid = nh->nh_tclassid; |
1227 | #endif | 1259 | #endif |
1260 | if (!(rt->dst.flags & DST_HOST) && | ||
1261 | rt_is_output_route(rt)) | ||
1262 | rt_cache_route(nh, rt); | ||
1228 | } | 1263 | } |
1229 | 1264 | ||
1230 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1265 | #ifdef CONFIG_IP_ROUTE_CLASSID |
@@ -1236,10 +1271,10 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | |||
1236 | } | 1271 | } |
1237 | 1272 | ||
1238 | static struct rtable *rt_dst_alloc(struct net_device *dev, | 1273 | static struct rtable *rt_dst_alloc(struct net_device *dev, |
1239 | bool nopolicy, bool noxfrm) | 1274 | bool nopolicy, bool noxfrm, bool will_cache) |
1240 | { | 1275 | { |
1241 | return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, | 1276 | return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, |
1242 | DST_HOST | DST_NOCACHE | | 1277 | (will_cache ? 0 : DST_HOST) | DST_NOCACHE | |
1243 | (nopolicy ? DST_NOPOLICY : 0) | | 1278 | (nopolicy ? DST_NOPOLICY : 0) | |
1244 | (noxfrm ? DST_NOXFRM : 0)); | 1279 | (noxfrm ? DST_NOXFRM : 0)); |
1245 | } | 1280 | } |
@@ -1276,7 +1311,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1276 | goto e_err; | 1311 | goto e_err; |
1277 | } | 1312 | } |
1278 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, | 1313 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, |
1279 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 1314 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); |
1280 | if (!rth) | 1315 | if (!rth) |
1281 | goto e_nobufs; | 1316 | goto e_nobufs; |
1282 | 1317 | ||
@@ -1349,6 +1384,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1349 | __be32 daddr, __be32 saddr, u32 tos, | 1384 | __be32 daddr, __be32 saddr, u32 tos, |
1350 | struct rtable **result) | 1385 | struct rtable **result) |
1351 | { | 1386 | { |
1387 | struct fib_nh_exception *fnhe; | ||
1352 | struct rtable *rth; | 1388 | struct rtable *rth; |
1353 | int err; | 1389 | int err; |
1354 | struct in_device *out_dev; | 1390 | struct in_device *out_dev; |
@@ -1395,9 +1431,13 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1395 | } | 1431 | } |
1396 | } | 1432 | } |
1397 | 1433 | ||
1434 | fnhe = NULL; | ||
1435 | if (res->fi) | ||
1436 | fnhe = find_exception(&FIB_RES_NH(*res), daddr); | ||
1437 | |||
1398 | rth = rt_dst_alloc(out_dev->dev, | 1438 | rth = rt_dst_alloc(out_dev->dev, |
1399 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1439 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
1400 | IN_DEV_CONF_GET(out_dev, NOXFRM)); | 1440 | IN_DEV_CONF_GET(out_dev, NOXFRM), false); |
1401 | if (!rth) { | 1441 | if (!rth) { |
1402 | err = -ENOBUFS; | 1442 | err = -ENOBUFS; |
1403 | goto cleanup; | 1443 | goto cleanup; |
@@ -1416,7 +1456,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1416 | rth->dst.input = ip_forward; | 1456 | rth->dst.input = ip_forward; |
1417 | rth->dst.output = ip_output; | 1457 | rth->dst.output = ip_output; |
1418 | 1458 | ||
1419 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); | 1459 | rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); |
1420 | 1460 | ||
1421 | *result = rth; | 1461 | *result = rth; |
1422 | err = 0; | 1462 | err = 0; |
@@ -1558,7 +1598,7 @@ brd_input: | |||
1558 | 1598 | ||
1559 | local_input: | 1599 | local_input: |
1560 | rth = rt_dst_alloc(net->loopback_dev, | 1600 | rth = rt_dst_alloc(net->loopback_dev, |
1561 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 1601 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); |
1562 | if (!rth) | 1602 | if (!rth) |
1563 | goto e_nobufs; | 1603 | goto e_nobufs; |
1564 | 1604 | ||
@@ -1672,6 +1712,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1672 | unsigned int flags) | 1712 | unsigned int flags) |
1673 | { | 1713 | { |
1674 | struct fib_info *fi = res->fi; | 1714 | struct fib_info *fi = res->fi; |
1715 | struct fib_nh_exception *fnhe; | ||
1675 | struct in_device *in_dev; | 1716 | struct in_device *in_dev; |
1676 | u16 type = res->type; | 1717 | u16 type = res->type; |
1677 | struct rtable *rth; | 1718 | struct rtable *rth; |
@@ -1710,9 +1751,22 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1710 | fi = NULL; | 1751 | fi = NULL; |
1711 | } | 1752 | } |
1712 | 1753 | ||
1754 | fnhe = NULL; | ||
1755 | if (fi) { | ||
1756 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); | ||
1757 | if (!fnhe) { | ||
1758 | rth = FIB_RES_NH(*res).nh_rth_output; | ||
1759 | if (rth && | ||
1760 | rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) { | ||
1761 | dst_use(&rth->dst, jiffies); | ||
1762 | return rth; | ||
1763 | } | ||
1764 | } | ||
1765 | } | ||
1713 | rth = rt_dst_alloc(dev_out, | 1766 | rth = rt_dst_alloc(dev_out, |
1714 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1767 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
1715 | IN_DEV_CONF_GET(in_dev, NOXFRM)); | 1768 | IN_DEV_CONF_GET(in_dev, NOXFRM), |
1769 | fi && !fnhe); | ||
1716 | if (!rth) | 1770 | if (!rth) |
1717 | return ERR_PTR(-ENOBUFS); | 1771 | return ERR_PTR(-ENOBUFS); |
1718 | 1772 | ||
@@ -1749,7 +1803,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1749 | #endif | 1803 | #endif |
1750 | } | 1804 | } |
1751 | 1805 | ||
1752 | rt_set_nexthop(rth, fl4, res, fi, type, 0); | 1806 | rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); |
1753 | 1807 | ||
1754 | if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) | 1808 | if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) |
1755 | rth->dst.flags |= DST_NOCACHE; | 1809 | rth->dst.flags |= DST_NOCACHE; |