diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 183 |
1 files changed, 131 insertions, 52 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fc1a81ca79a7..c035251beb07 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | |||
147 | struct sk_buff *skb, u32 mtu); | 147 | struct sk_buff *skb, u32 mtu); |
148 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, | 148 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, |
149 | struct sk_buff *skb); | 149 | struct sk_buff *skb); |
150 | static void ipv4_dst_destroy(struct dst_entry *dst); | ||
150 | 151 | ||
151 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 152 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
152 | int how) | 153 | int how) |
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
170 | .default_advmss = ipv4_default_advmss, | 171 | .default_advmss = ipv4_default_advmss, |
171 | .mtu = ipv4_mtu, | 172 | .mtu = ipv4_mtu, |
172 | .cow_metrics = ipv4_cow_metrics, | 173 | .cow_metrics = ipv4_cow_metrics, |
174 | .destroy = ipv4_dst_destroy, | ||
173 | .ifdown = ipv4_dst_ifdown, | 175 | .ifdown = ipv4_dst_ifdown, |
174 | .negative_advice = ipv4_negative_advice, | 176 | .negative_advice = ipv4_negative_advice, |
175 | .link_failure = ipv4_link_failure, | 177 | .link_failure = ipv4_link_failure, |
@@ -587,11 +589,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, | |||
587 | build_sk_flow_key(fl4, sk); | 589 | build_sk_flow_key(fl4, sk); |
588 | } | 590 | } |
589 | 591 | ||
590 | static DEFINE_SEQLOCK(fnhe_seqlock); | 592 | static inline void rt_free(struct rtable *rt) |
593 | { | ||
594 | call_rcu(&rt->dst.rcu_head, dst_rcu_free); | ||
595 | } | ||
596 | |||
597 | static DEFINE_SPINLOCK(fnhe_lock); | ||
591 | 598 | ||
592 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | 599 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) |
593 | { | 600 | { |
594 | struct fib_nh_exception *fnhe, *oldest; | 601 | struct fib_nh_exception *fnhe, *oldest; |
602 | struct rtable *orig; | ||
595 | 603 | ||
596 | oldest = rcu_dereference(hash->chain); | 604 | oldest = rcu_dereference(hash->chain); |
597 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; | 605 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; |
@@ -599,6 +607,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | |||
599 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) | 607 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) |
600 | oldest = fnhe; | 608 | oldest = fnhe; |
601 | } | 609 | } |
610 | orig = rcu_dereference(oldest->fnhe_rth); | ||
611 | if (orig) { | ||
612 | RCU_INIT_POINTER(oldest->fnhe_rth, NULL); | ||
613 | rt_free(orig); | ||
614 | } | ||
602 | return oldest; | 615 | return oldest; |
603 | } | 616 | } |
604 | 617 | ||
@@ -620,7 +633,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
620 | int depth; | 633 | int depth; |
621 | u32 hval = fnhe_hashfun(daddr); | 634 | u32 hval = fnhe_hashfun(daddr); |
622 | 635 | ||
623 | write_seqlock_bh(&fnhe_seqlock); | 636 | spin_lock_bh(&fnhe_lock); |
624 | 637 | ||
625 | hash = nh->nh_exceptions; | 638 | hash = nh->nh_exceptions; |
626 | if (!hash) { | 639 | if (!hash) { |
@@ -667,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
667 | fnhe->fnhe_stamp = jiffies; | 680 | fnhe->fnhe_stamp = jiffies; |
668 | 681 | ||
669 | out_unlock: | 682 | out_unlock: |
670 | write_sequnlock_bh(&fnhe_seqlock); | 683 | spin_unlock_bh(&fnhe_lock); |
671 | return; | 684 | return; |
672 | } | 685 | } |
673 | 686 | ||
@@ -1164,53 +1177,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) | |||
1164 | return NULL; | 1177 | return NULL; |
1165 | } | 1178 | } |
1166 | 1179 | ||
1167 | static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | 1180 | static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
1168 | __be32 daddr) | 1181 | __be32 daddr) |
1169 | { | 1182 | { |
1170 | __be32 fnhe_daddr, gw; | 1183 | bool ret = false; |
1171 | unsigned long expires; | 1184 | |
1172 | unsigned int seq; | 1185 | spin_lock_bh(&fnhe_lock); |
1173 | u32 pmtu; | ||
1174 | |||
1175 | restart: | ||
1176 | seq = read_seqbegin(&fnhe_seqlock); | ||
1177 | fnhe_daddr = fnhe->fnhe_daddr; | ||
1178 | gw = fnhe->fnhe_gw; | ||
1179 | pmtu = fnhe->fnhe_pmtu; | ||
1180 | expires = fnhe->fnhe_expires; | ||
1181 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
1182 | goto restart; | ||
1183 | |||
1184 | if (daddr != fnhe_daddr) | ||
1185 | return; | ||
1186 | 1186 | ||
1187 | if (pmtu) { | 1187 | if (daddr == fnhe->fnhe_daddr) { |
1188 | unsigned long diff = expires - jiffies; | 1188 | struct rtable *orig; |
1189 | 1189 | ||
1190 | if (time_before(jiffies, expires)) { | 1190 | if (fnhe->fnhe_pmtu) { |
1191 | rt->rt_pmtu = pmtu; | 1191 | unsigned long expires = fnhe->fnhe_expires; |
1192 | dst_set_expires(&rt->dst, diff); | 1192 | unsigned long diff = expires - jiffies; |
1193 | |||
1194 | if (time_before(jiffies, expires)) { | ||
1195 | rt->rt_pmtu = fnhe->fnhe_pmtu; | ||
1196 | dst_set_expires(&rt->dst, diff); | ||
1197 | } | ||
1193 | } | 1198 | } |
1199 | if (fnhe->fnhe_gw) { | ||
1200 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1201 | rt->rt_gateway = fnhe->fnhe_gw; | ||
1202 | } | ||
1203 | |||
1204 | orig = rcu_dereference(fnhe->fnhe_rth); | ||
1205 | rcu_assign_pointer(fnhe->fnhe_rth, rt); | ||
1206 | if (orig) | ||
1207 | rt_free(orig); | ||
1208 | |||
1209 | fnhe->fnhe_stamp = jiffies; | ||
1210 | ret = true; | ||
1211 | } else { | ||
1212 | /* Routes we intend to cache in nexthop exception have | ||
1213 | * the DST_NOCACHE bit clear. However, if we are | ||
1214 | * unsuccessful at storing this route into the cache | ||
1215 | * we really need to set it. | ||
1216 | */ | ||
1217 | rt->dst.flags |= DST_NOCACHE; | ||
1194 | } | 1218 | } |
1195 | if (gw) { | 1219 | spin_unlock_bh(&fnhe_lock); |
1196 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1197 | rt->rt_gateway = gw; | ||
1198 | } | ||
1199 | fnhe->fnhe_stamp = jiffies; | ||
1200 | } | ||
1201 | 1220 | ||
1202 | static inline void rt_free(struct rtable *rt) | 1221 | return ret; |
1203 | { | ||
1204 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); | ||
1205 | } | 1222 | } |
1206 | 1223 | ||
1207 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | 1224 | static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) |
1208 | { | 1225 | { |
1209 | struct rtable *orig, *prev, **p = &nh->nh_rth_output; | 1226 | struct rtable *orig, *prev, **p; |
1210 | 1227 | bool ret = true; | |
1211 | if (rt_is_input_route(rt)) | ||
1212 | p = &nh->nh_rth_input; | ||
1213 | 1228 | ||
1229 | if (rt_is_input_route(rt)) { | ||
1230 | p = (struct rtable **)&nh->nh_rth_input; | ||
1231 | } else { | ||
1232 | if (!nh->nh_pcpu_rth_output) | ||
1233 | goto nocache; | ||
1234 | p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); | ||
1235 | } | ||
1214 | orig = *p; | 1236 | orig = *p; |
1215 | 1237 | ||
1216 | prev = cmpxchg(p, orig, rt); | 1238 | prev = cmpxchg(p, orig, rt); |
@@ -1223,7 +1245,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | |||
1223 | * unsuccessful at storing this route into the cache | 1245 | * unsuccessful at storing this route into the cache |
1224 | * we really need to set it. | 1246 | * we really need to set it. |
1225 | */ | 1247 | */ |
1248 | nocache: | ||
1226 | rt->dst.flags |= DST_NOCACHE; | 1249 | rt->dst.flags |= DST_NOCACHE; |
1250 | ret = false; | ||
1251 | } | ||
1252 | |||
1253 | return ret; | ||
1254 | } | ||
1255 | |||
1256 | static DEFINE_SPINLOCK(rt_uncached_lock); | ||
1257 | static LIST_HEAD(rt_uncached_list); | ||
1258 | |||
1259 | static void rt_add_uncached_list(struct rtable *rt) | ||
1260 | { | ||
1261 | spin_lock_bh(&rt_uncached_lock); | ||
1262 | list_add_tail(&rt->rt_uncached, &rt_uncached_list); | ||
1263 | spin_unlock_bh(&rt_uncached_lock); | ||
1264 | } | ||
1265 | |||
1266 | static void ipv4_dst_destroy(struct dst_entry *dst) | ||
1267 | { | ||
1268 | struct rtable *rt = (struct rtable *) dst; | ||
1269 | |||
1270 | if (dst->flags & DST_NOCACHE) { | ||
1271 | spin_lock_bh(&rt_uncached_lock); | ||
1272 | list_del(&rt->rt_uncached); | ||
1273 | spin_unlock_bh(&rt_uncached_lock); | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | void rt_flush_dev(struct net_device *dev) | ||
1278 | { | ||
1279 | if (!list_empty(&rt_uncached_list)) { | ||
1280 | struct net *net = dev_net(dev); | ||
1281 | struct rtable *rt; | ||
1282 | |||
1283 | spin_lock_bh(&rt_uncached_lock); | ||
1284 | list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { | ||
1285 | if (rt->dst.dev != dev) | ||
1286 | continue; | ||
1287 | rt->dst.dev = net->loopback_dev; | ||
1288 | dev_hold(rt->dst.dev); | ||
1289 | dev_put(dev); | ||
1290 | } | ||
1291 | spin_unlock_bh(&rt_uncached_lock); | ||
1227 | } | 1292 | } |
1228 | } | 1293 | } |
1229 | 1294 | ||
@@ -1239,20 +1304,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
1239 | struct fib_nh_exception *fnhe, | 1304 | struct fib_nh_exception *fnhe, |
1240 | struct fib_info *fi, u16 type, u32 itag) | 1305 | struct fib_info *fi, u16 type, u32 itag) |
1241 | { | 1306 | { |
1307 | bool cached = false; | ||
1308 | |||
1242 | if (fi) { | 1309 | if (fi) { |
1243 | struct fib_nh *nh = &FIB_RES_NH(*res); | 1310 | struct fib_nh *nh = &FIB_RES_NH(*res); |
1244 | 1311 | ||
1245 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | 1312 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) |
1246 | rt->rt_gateway = nh->nh_gw; | 1313 | rt->rt_gateway = nh->nh_gw; |
1247 | if (unlikely(fnhe)) | ||
1248 | rt_bind_exception(rt, fnhe, daddr); | ||
1249 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | 1314 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); |
1250 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1315 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1251 | rt->dst.tclassid = nh->nh_tclassid; | 1316 | rt->dst.tclassid = nh->nh_tclassid; |
1252 | #endif | 1317 | #endif |
1253 | if (!(rt->dst.flags & DST_NOCACHE)) | 1318 | if (unlikely(fnhe)) |
1254 | rt_cache_route(nh, rt); | 1319 | cached = rt_bind_exception(rt, fnhe, daddr); |
1320 | else if (!(rt->dst.flags & DST_NOCACHE)) | ||
1321 | cached = rt_cache_route(nh, rt); | ||
1255 | } | 1322 | } |
1323 | if (unlikely(!cached)) | ||
1324 | rt_add_uncached_list(rt); | ||
1256 | 1325 | ||
1257 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1326 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1258 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1327 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -1319,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1319 | rth->rt_iif = 0; | 1388 | rth->rt_iif = 0; |
1320 | rth->rt_pmtu = 0; | 1389 | rth->rt_pmtu = 0; |
1321 | rth->rt_gateway = 0; | 1390 | rth->rt_gateway = 0; |
1391 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1322 | if (our) { | 1392 | if (our) { |
1323 | rth->dst.input= ip_local_deliver; | 1393 | rth->dst.input= ip_local_deliver; |
1324 | rth->rt_flags |= RTCF_LOCAL; | 1394 | rth->rt_flags |= RTCF_LOCAL; |
@@ -1420,7 +1490,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1420 | do_cache = false; | 1490 | do_cache = false; |
1421 | if (res->fi) { | 1491 | if (res->fi) { |
1422 | if (!itag) { | 1492 | if (!itag) { |
1423 | rth = FIB_RES_NH(*res).nh_rth_input; | 1493 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); |
1424 | if (rt_cache_valid(rth)) { | 1494 | if (rt_cache_valid(rth)) { |
1425 | skb_dst_set_noref(skb, &rth->dst); | 1495 | skb_dst_set_noref(skb, &rth->dst); |
1426 | goto out; | 1496 | goto out; |
@@ -1444,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1444 | rth->rt_iif = 0; | 1514 | rth->rt_iif = 0; |
1445 | rth->rt_pmtu = 0; | 1515 | rth->rt_pmtu = 0; |
1446 | rth->rt_gateway = 0; | 1516 | rth->rt_gateway = 0; |
1517 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1447 | 1518 | ||
1448 | rth->dst.input = ip_forward; | 1519 | rth->dst.input = ip_forward; |
1449 | rth->dst.output = ip_output; | 1520 | rth->dst.output = ip_output; |
@@ -1582,7 +1653,7 @@ local_input: | |||
1582 | do_cache = false; | 1653 | do_cache = false; |
1583 | if (res.fi) { | 1654 | if (res.fi) { |
1584 | if (!itag) { | 1655 | if (!itag) { |
1585 | rth = FIB_RES_NH(res).nh_rth_input; | 1656 | rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); |
1586 | if (rt_cache_valid(rth)) { | 1657 | if (rt_cache_valid(rth)) { |
1587 | skb_dst_set_noref(skb, &rth->dst); | 1658 | skb_dst_set_noref(skb, &rth->dst); |
1588 | err = 0; | 1659 | err = 0; |
@@ -1610,6 +1681,7 @@ local_input: | |||
1610 | rth->rt_iif = 0; | 1681 | rth->rt_iif = 0; |
1611 | rth->rt_pmtu = 0; | 1682 | rth->rt_pmtu = 0; |
1612 | rth->rt_gateway = 0; | 1683 | rth->rt_gateway = 0; |
1684 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1613 | if (res.type == RTN_UNREACHABLE) { | 1685 | if (res.type == RTN_UNREACHABLE) { |
1614 | rth->dst.input= ip_error; | 1686 | rth->dst.input= ip_error; |
1615 | rth->dst.error= -err; | 1687 | rth->dst.error= -err; |
@@ -1748,19 +1820,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1748 | 1820 | ||
1749 | fnhe = NULL; | 1821 | fnhe = NULL; |
1750 | if (fi) { | 1822 | if (fi) { |
1823 | struct rtable __rcu **prth; | ||
1824 | |||
1751 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); | 1825 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); |
1752 | if (!fnhe) { | 1826 | if (fnhe) |
1753 | rth = FIB_RES_NH(*res).nh_rth_output; | 1827 | prth = &fnhe->fnhe_rth; |
1754 | if (rt_cache_valid(rth)) { | 1828 | else |
1755 | dst_hold(&rth->dst); | 1829 | prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); |
1756 | return rth; | 1830 | rth = rcu_dereference(*prth); |
1757 | } | 1831 | if (rt_cache_valid(rth)) { |
1832 | dst_hold(&rth->dst); | ||
1833 | return rth; | ||
1758 | } | 1834 | } |
1759 | } | 1835 | } |
1760 | rth = rt_dst_alloc(dev_out, | 1836 | rth = rt_dst_alloc(dev_out, |
1761 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1837 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
1762 | IN_DEV_CONF_GET(in_dev, NOXFRM), | 1838 | IN_DEV_CONF_GET(in_dev, NOXFRM), |
1763 | fi && !fnhe); | 1839 | fi); |
1764 | if (!rth) | 1840 | if (!rth) |
1765 | return ERR_PTR(-ENOBUFS); | 1841 | return ERR_PTR(-ENOBUFS); |
1766 | 1842 | ||
@@ -1773,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1773 | rth->rt_iif = orig_oif ? : 0; | 1849 | rth->rt_iif = orig_oif ? : 0; |
1774 | rth->rt_pmtu = 0; | 1850 | rth->rt_pmtu = 0; |
1775 | rth->rt_gateway = 0; | 1851 | rth->rt_gateway = 0; |
1852 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1776 | 1853 | ||
1777 | RT_CACHE_STAT_INC(out_slow_tot); | 1854 | RT_CACHE_STAT_INC(out_slow_tot); |
1778 | 1855 | ||
@@ -2052,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2052 | rt->rt_type = ort->rt_type; | 2129 | rt->rt_type = ort->rt_type; |
2053 | rt->rt_gateway = ort->rt_gateway; | 2130 | rt->rt_gateway = ort->rt_gateway; |
2054 | 2131 | ||
2132 | INIT_LIST_HEAD(&rt->rt_uncached); | ||
2133 | |||
2055 | dst_free(new); | 2134 | dst_free(new); |
2056 | } | 2135 | } |
2057 | 2136 | ||