aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c183
1 files changed, 131 insertions, 52 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fc1a81ca79a7..c035251beb07 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
147 struct sk_buff *skb, u32 mtu); 147 struct sk_buff *skb, u32 mtu);
148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
149 struct sk_buff *skb); 149 struct sk_buff *skb);
150static void ipv4_dst_destroy(struct dst_entry *dst);
150 151
151static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 152static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152 int how) 153 int how)
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = {
170 .default_advmss = ipv4_default_advmss, 171 .default_advmss = ipv4_default_advmss,
171 .mtu = ipv4_mtu, 172 .mtu = ipv4_mtu,
172 .cow_metrics = ipv4_cow_metrics, 173 .cow_metrics = ipv4_cow_metrics,
174 .destroy = ipv4_dst_destroy,
173 .ifdown = ipv4_dst_ifdown, 175 .ifdown = ipv4_dst_ifdown,
174 .negative_advice = ipv4_negative_advice, 176 .negative_advice = ipv4_negative_advice,
175 .link_failure = ipv4_link_failure, 177 .link_failure = ipv4_link_failure,
@@ -587,11 +589,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
587 build_sk_flow_key(fl4, sk); 589 build_sk_flow_key(fl4, sk);
588} 590}
589 591
590static DEFINE_SEQLOCK(fnhe_seqlock); 592static inline void rt_free(struct rtable *rt)
593{
594 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
595}
596
597static DEFINE_SPINLOCK(fnhe_lock);
591 598
592static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 599static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
593{ 600{
594 struct fib_nh_exception *fnhe, *oldest; 601 struct fib_nh_exception *fnhe, *oldest;
602 struct rtable *orig;
595 603
596 oldest = rcu_dereference(hash->chain); 604 oldest = rcu_dereference(hash->chain);
597 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 605 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +607,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
599 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 607 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
600 oldest = fnhe; 608 oldest = fnhe;
601 } 609 }
610 orig = rcu_dereference(oldest->fnhe_rth);
611 if (orig) {
612 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
613 rt_free(orig);
614 }
602 return oldest; 615 return oldest;
603} 616}
604 617
@@ -620,7 +633,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
620 int depth; 633 int depth;
621 u32 hval = fnhe_hashfun(daddr); 634 u32 hval = fnhe_hashfun(daddr);
622 635
623 write_seqlock_bh(&fnhe_seqlock); 636 spin_lock_bh(&fnhe_lock);
624 637
625 hash = nh->nh_exceptions; 638 hash = nh->nh_exceptions;
626 if (!hash) { 639 if (!hash) {
@@ -667,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
667 fnhe->fnhe_stamp = jiffies; 680 fnhe->fnhe_stamp = jiffies;
668 681
669out_unlock: 682out_unlock:
670 write_sequnlock_bh(&fnhe_seqlock); 683 spin_unlock_bh(&fnhe_lock);
671 return; 684 return;
672} 685}
673 686
@@ -1164,53 +1177,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1164 return NULL; 1177 return NULL;
1165} 1178}
1166 1179
1167static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1180static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1168 __be32 daddr) 1181 __be32 daddr)
1169{ 1182{
1170 __be32 fnhe_daddr, gw; 1183 bool ret = false;
1171 unsigned long expires; 1184
1172 unsigned int seq; 1185 spin_lock_bh(&fnhe_lock);
1173 u32 pmtu;
1174
1175restart:
1176 seq = read_seqbegin(&fnhe_seqlock);
1177 fnhe_daddr = fnhe->fnhe_daddr;
1178 gw = fnhe->fnhe_gw;
1179 pmtu = fnhe->fnhe_pmtu;
1180 expires = fnhe->fnhe_expires;
1181 if (read_seqretry(&fnhe_seqlock, seq))
1182 goto restart;
1183
1184 if (daddr != fnhe_daddr)
1185 return;
1186 1186
1187 if (pmtu) { 1187 if (daddr == fnhe->fnhe_daddr) {
1188 unsigned long diff = expires - jiffies; 1188 struct rtable *orig;
1189 1189
1190 if (time_before(jiffies, expires)) { 1190 if (fnhe->fnhe_pmtu) {
1191 rt->rt_pmtu = pmtu; 1191 unsigned long expires = fnhe->fnhe_expires;
1192 dst_set_expires(&rt->dst, diff); 1192 unsigned long diff = expires - jiffies;
1193
1194 if (time_before(jiffies, expires)) {
1195 rt->rt_pmtu = fnhe->fnhe_pmtu;
1196 dst_set_expires(&rt->dst, diff);
1197 }
1193 } 1198 }
1199 if (fnhe->fnhe_gw) {
1200 rt->rt_flags |= RTCF_REDIRECTED;
1201 rt->rt_gateway = fnhe->fnhe_gw;
1202 }
1203
1204 orig = rcu_dereference(fnhe->fnhe_rth);
1205 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1206 if (orig)
1207 rt_free(orig);
1208
1209 fnhe->fnhe_stamp = jiffies;
1210 ret = true;
1211 } else {
1212 /* Routes we intend to cache in nexthop exception have
1213 * the DST_NOCACHE bit clear. However, if we are
1214 * unsuccessful at storing this route into the cache
1215 * we really need to set it.
1216 */
1217 rt->dst.flags |= DST_NOCACHE;
1194 } 1218 }
1195 if (gw) { 1219 spin_unlock_bh(&fnhe_lock);
1196 rt->rt_flags |= RTCF_REDIRECTED;
1197 rt->rt_gateway = gw;
1198 }
1199 fnhe->fnhe_stamp = jiffies;
1200}
1201 1220
1202static inline void rt_free(struct rtable *rt) 1221 return ret;
1203{
1204 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
1205} 1222}
1206 1223
1207static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1224static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1208{ 1225{
1209 struct rtable *orig, *prev, **p = &nh->nh_rth_output; 1226 struct rtable *orig, *prev, **p;
1210 1227 bool ret = true;
1211 if (rt_is_input_route(rt))
1212 p = &nh->nh_rth_input;
1213 1228
1229 if (rt_is_input_route(rt)) {
1230 p = (struct rtable **)&nh->nh_rth_input;
1231 } else {
1232 if (!nh->nh_pcpu_rth_output)
1233 goto nocache;
1234 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1235 }
1214 orig = *p; 1236 orig = *p;
1215 1237
1216 prev = cmpxchg(p, orig, rt); 1238 prev = cmpxchg(p, orig, rt);
@@ -1223,7 +1245,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1223 * unsuccessful at storing this route into the cache 1245 * unsuccessful at storing this route into the cache
1224 * we really need to set it. 1246 * we really need to set it.
1225 */ 1247 */
1248nocache:
1226 rt->dst.flags |= DST_NOCACHE; 1249 rt->dst.flags |= DST_NOCACHE;
1250 ret = false;
1251 }
1252
1253 return ret;
1254}
1255
1256static DEFINE_SPINLOCK(rt_uncached_lock);
1257static LIST_HEAD(rt_uncached_list);
1258
1259static void rt_add_uncached_list(struct rtable *rt)
1260{
1261 spin_lock_bh(&rt_uncached_lock);
1262 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1263 spin_unlock_bh(&rt_uncached_lock);
1264}
1265
1266static void ipv4_dst_destroy(struct dst_entry *dst)
1267{
1268 struct rtable *rt = (struct rtable *) dst;
1269
1270 if (dst->flags & DST_NOCACHE) {
1271 spin_lock_bh(&rt_uncached_lock);
1272 list_del(&rt->rt_uncached);
1273 spin_unlock_bh(&rt_uncached_lock);
1274 }
1275}
1276
1277void rt_flush_dev(struct net_device *dev)
1278{
1279 if (!list_empty(&rt_uncached_list)) {
1280 struct net *net = dev_net(dev);
1281 struct rtable *rt;
1282
1283 spin_lock_bh(&rt_uncached_lock);
1284 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1285 if (rt->dst.dev != dev)
1286 continue;
1287 rt->dst.dev = net->loopback_dev;
1288 dev_hold(rt->dst.dev);
1289 dev_put(dev);
1290 }
1291 spin_unlock_bh(&rt_uncached_lock);
1227 } 1292 }
1228} 1293}
1229 1294
@@ -1239,20 +1304,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1239 struct fib_nh_exception *fnhe, 1304 struct fib_nh_exception *fnhe,
1240 struct fib_info *fi, u16 type, u32 itag) 1305 struct fib_info *fi, u16 type, u32 itag)
1241{ 1306{
1307 bool cached = false;
1308
1242 if (fi) { 1309 if (fi) {
1243 struct fib_nh *nh = &FIB_RES_NH(*res); 1310 struct fib_nh *nh = &FIB_RES_NH(*res);
1244 1311
1245 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) 1312 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1246 rt->rt_gateway = nh->nh_gw; 1313 rt->rt_gateway = nh->nh_gw;
1247 if (unlikely(fnhe))
1248 rt_bind_exception(rt, fnhe, daddr);
1249 dst_init_metrics(&rt->dst, fi->fib_metrics, true); 1314 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1250#ifdef CONFIG_IP_ROUTE_CLASSID 1315#ifdef CONFIG_IP_ROUTE_CLASSID
1251 rt->dst.tclassid = nh->nh_tclassid; 1316 rt->dst.tclassid = nh->nh_tclassid;
1252#endif 1317#endif
1253 if (!(rt->dst.flags & DST_NOCACHE)) 1318 if (unlikely(fnhe))
1254 rt_cache_route(nh, rt); 1319 cached = rt_bind_exception(rt, fnhe, daddr);
1320 else if (!(rt->dst.flags & DST_NOCACHE))
1321 cached = rt_cache_route(nh, rt);
1255 } 1322 }
1323 if (unlikely(!cached))
1324 rt_add_uncached_list(rt);
1256 1325
1257#ifdef CONFIG_IP_ROUTE_CLASSID 1326#ifdef CONFIG_IP_ROUTE_CLASSID
1258#ifdef CONFIG_IP_MULTIPLE_TABLES 1327#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1319,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1319 rth->rt_iif = 0; 1388 rth->rt_iif = 0;
1320 rth->rt_pmtu = 0; 1389 rth->rt_pmtu = 0;
1321 rth->rt_gateway = 0; 1390 rth->rt_gateway = 0;
1391 INIT_LIST_HEAD(&rth->rt_uncached);
1322 if (our) { 1392 if (our) {
1323 rth->dst.input= ip_local_deliver; 1393 rth->dst.input= ip_local_deliver;
1324 rth->rt_flags |= RTCF_LOCAL; 1394 rth->rt_flags |= RTCF_LOCAL;
@@ -1420,7 +1490,7 @@ static int __mkroute_input(struct sk_buff *skb,
1420 do_cache = false; 1490 do_cache = false;
1421 if (res->fi) { 1491 if (res->fi) {
1422 if (!itag) { 1492 if (!itag) {
1423 rth = FIB_RES_NH(*res).nh_rth_input; 1493 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1424 if (rt_cache_valid(rth)) { 1494 if (rt_cache_valid(rth)) {
1425 skb_dst_set_noref(skb, &rth->dst); 1495 skb_dst_set_noref(skb, &rth->dst);
1426 goto out; 1496 goto out;
@@ -1444,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb,
1444 rth->rt_iif = 0; 1514 rth->rt_iif = 0;
1445 rth->rt_pmtu = 0; 1515 rth->rt_pmtu = 0;
1446 rth->rt_gateway = 0; 1516 rth->rt_gateway = 0;
1517 INIT_LIST_HEAD(&rth->rt_uncached);
1447 1518
1448 rth->dst.input = ip_forward; 1519 rth->dst.input = ip_forward;
1449 rth->dst.output = ip_output; 1520 rth->dst.output = ip_output;
@@ -1582,7 +1653,7 @@ local_input:
1582 do_cache = false; 1653 do_cache = false;
1583 if (res.fi) { 1654 if (res.fi) {
1584 if (!itag) { 1655 if (!itag) {
1585 rth = FIB_RES_NH(res).nh_rth_input; 1656 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
1586 if (rt_cache_valid(rth)) { 1657 if (rt_cache_valid(rth)) {
1587 skb_dst_set_noref(skb, &rth->dst); 1658 skb_dst_set_noref(skb, &rth->dst);
1588 err = 0; 1659 err = 0;
@@ -1610,6 +1681,7 @@ local_input:
1610 rth->rt_iif = 0; 1681 rth->rt_iif = 0;
1611 rth->rt_pmtu = 0; 1682 rth->rt_pmtu = 0;
1612 rth->rt_gateway = 0; 1683 rth->rt_gateway = 0;
1684 INIT_LIST_HEAD(&rth->rt_uncached);
1613 if (res.type == RTN_UNREACHABLE) { 1685 if (res.type == RTN_UNREACHABLE) {
1614 rth->dst.input= ip_error; 1686 rth->dst.input= ip_error;
1615 rth->dst.error= -err; 1687 rth->dst.error= -err;
@@ -1748,19 +1820,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1748 1820
1749 fnhe = NULL; 1821 fnhe = NULL;
1750 if (fi) { 1822 if (fi) {
1823 struct rtable __rcu **prth;
1824
1751 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); 1825 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1752 if (!fnhe) { 1826 if (fnhe)
1753 rth = FIB_RES_NH(*res).nh_rth_output; 1827 prth = &fnhe->fnhe_rth;
1754 if (rt_cache_valid(rth)) { 1828 else
1755 dst_hold(&rth->dst); 1829 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
1756 return rth; 1830 rth = rcu_dereference(*prth);
1757 } 1831 if (rt_cache_valid(rth)) {
1832 dst_hold(&rth->dst);
1833 return rth;
1758 } 1834 }
1759 } 1835 }
1760 rth = rt_dst_alloc(dev_out, 1836 rth = rt_dst_alloc(dev_out,
1761 IN_DEV_CONF_GET(in_dev, NOPOLICY), 1837 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1762 IN_DEV_CONF_GET(in_dev, NOXFRM), 1838 IN_DEV_CONF_GET(in_dev, NOXFRM),
1763 fi && !fnhe); 1839 fi);
1764 if (!rth) 1840 if (!rth)
1765 return ERR_PTR(-ENOBUFS); 1841 return ERR_PTR(-ENOBUFS);
1766 1842
@@ -1773,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1773 rth->rt_iif = orig_oif ? : 0; 1849 rth->rt_iif = orig_oif ? : 0;
1774 rth->rt_pmtu = 0; 1850 rth->rt_pmtu = 0;
1775 rth->rt_gateway = 0; 1851 rth->rt_gateway = 0;
1852 INIT_LIST_HEAD(&rth->rt_uncached);
1776 1853
1777 RT_CACHE_STAT_INC(out_slow_tot); 1854 RT_CACHE_STAT_INC(out_slow_tot);
1778 1855
@@ -2052,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2052 rt->rt_type = ort->rt_type; 2129 rt->rt_type = ort->rt_type;
2053 rt->rt_gateway = ort->rt_gateway; 2130 rt->rt_gateway = ort->rt_gateway;
2054 2131
2132 INIT_LIST_HEAD(&rt->rt_uncached);
2133
2055 dst_free(new); 2134 dst_free(new);
2056 } 2135 }
2057 2136