aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSabrina Dubroca <sd@queasysnail.net>2018-03-14 05:21:14 -0400
committerDavid S. Miller <davem@davemloft.net>2018-03-14 13:37:36 -0400
commitd52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 (patch)
tree27f31c7dc279232da5b67a307e8a2fde449ac098 /net
parent16c2e4db832da4b883b3ee8b9dc32d1ca115759a (diff)
ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu
Prior to the rework of PMTU information storage in commit 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."), when a PMTU event advertising a PMTU smaller than net.ipv4.route.min_pmtu was received, we would disable setting the DF flag on packets by locking the MTU metric, and set the PMTU to net.ipv4.route.min_pmtu. Since then, we don't disable DF, and set PMTU to net.ipv4.route.min_pmtu, so the intermediate router that has this link with a small MTU will have to drop the packets. This patch reestablishes pre-2.6.39 behavior by splitting rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu. rt_mtu_locked indicates that we shouldn't set the DF bit on that path, and is checked in ip_dont_fragment(). One possible workaround is to set net.ipv4.route.min_pmtu to a value low enough to accommodate the lowest MTU encountered. Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Reviewed-by: Stefano Brivio <sbrivio@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/route.c26
-rw-r--r--net/ipv4/xfrm4_policy.c1
2 files changed, 20 insertions, 7 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f9dbb8cb66bf..299e247b2032 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
634static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) 634static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
635{ 635{
636 rt->rt_pmtu = fnhe->fnhe_pmtu; 636 rt->rt_pmtu = fnhe->fnhe_pmtu;
637 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
637 rt->dst.expires = fnhe->fnhe_expires; 638 rt->dst.expires = fnhe->fnhe_expires;
638 639
639 if (fnhe->fnhe_gw) { 640 if (fnhe->fnhe_gw) {
@@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
644} 645}
645 646
646static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, 647static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
647 u32 pmtu, unsigned long expires) 648 u32 pmtu, bool lock, unsigned long expires)
648{ 649{
649 struct fnhe_hash_bucket *hash; 650 struct fnhe_hash_bucket *hash;
650 struct fib_nh_exception *fnhe; 651 struct fib_nh_exception *fnhe;
@@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
681 fnhe->fnhe_genid = genid; 682 fnhe->fnhe_genid = genid;
682 if (gw) 683 if (gw)
683 fnhe->fnhe_gw = gw; 684 fnhe->fnhe_gw = gw;
684 if (pmtu) 685 if (pmtu) {
685 fnhe->fnhe_pmtu = pmtu; 686 fnhe->fnhe_pmtu = pmtu;
687 fnhe->fnhe_mtu_locked = lock;
688 }
686 fnhe->fnhe_expires = max(1UL, expires); 689 fnhe->fnhe_expires = max(1UL, expires);
687 /* Update all cached dsts too */ 690 /* Update all cached dsts too */
688 rt = rcu_dereference(fnhe->fnhe_rth_input); 691 rt = rcu_dereference(fnhe->fnhe_rth_input);
@@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
706 fnhe->fnhe_daddr = daddr; 709 fnhe->fnhe_daddr = daddr;
707 fnhe->fnhe_gw = gw; 710 fnhe->fnhe_gw = gw;
708 fnhe->fnhe_pmtu = pmtu; 711 fnhe->fnhe_pmtu = pmtu;
712 fnhe->fnhe_mtu_locked = lock;
709 fnhe->fnhe_expires = expires; 713 fnhe->fnhe_expires = expires;
710 714
711 /* Exception created; mark the cached routes for the nexthop 715 /* Exception created; mark the cached routes for the nexthop
@@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
787 struct fib_nh *nh = &FIB_RES_NH(res); 791 struct fib_nh *nh = &FIB_RES_NH(res);
788 792
789 update_or_create_fnhe(nh, fl4->daddr, new_gw, 793 update_or_create_fnhe(nh, fl4->daddr, new_gw,
790 0, jiffies + ip_rt_gc_timeout); 794 0, false,
795 jiffies + ip_rt_gc_timeout);
791 } 796 }
792 if (kill_route) 797 if (kill_route)
793 rt->dst.obsolete = DST_OBSOLETE_KILL; 798 rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
1009{ 1014{
1010 struct dst_entry *dst = &rt->dst; 1015 struct dst_entry *dst = &rt->dst;
1011 struct fib_result res; 1016 struct fib_result res;
1017 bool lock = false;
1012 1018
1013 if (dst_metric_locked(dst, RTAX_MTU)) 1019 if (ip_mtu_locked(dst))
1014 return; 1020 return;
1015 1021
1016 if (ipv4_mtu(dst) < mtu) 1022 if (ipv4_mtu(dst) < mtu)
1017 return; 1023 return;
1018 1024
1019 if (mtu < ip_rt_min_pmtu) 1025 if (mtu < ip_rt_min_pmtu) {
1026 lock = true;
1020 mtu = ip_rt_min_pmtu; 1027 mtu = ip_rt_min_pmtu;
1028 }
1021 1029
1022 if (rt->rt_pmtu == mtu && 1030 if (rt->rt_pmtu == mtu &&
1023 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) 1031 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
1027 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { 1035 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
1028 struct fib_nh *nh = &FIB_RES_NH(res); 1036 struct fib_nh *nh = &FIB_RES_NH(res);
1029 1037
1030 update_or_create_fnhe(nh, fl4->daddr, 0, mtu, 1038 update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
1031 jiffies + ip_rt_mtu_expires); 1039 jiffies + ip_rt_mtu_expires);
1032 } 1040 }
1033 rcu_read_unlock(); 1041 rcu_read_unlock();
@@ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
1280 1288
1281 mtu = READ_ONCE(dst->dev->mtu); 1289 mtu = READ_ONCE(dst->dev->mtu);
1282 1290
1283 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { 1291 if (unlikely(ip_mtu_locked(dst))) {
1284 if (rt->rt_uses_gateway && mtu > 576) 1292 if (rt->rt_uses_gateway && mtu > 576)
1285 mtu = 576; 1293 mtu = 576;
1286 } 1294 }
@@ -1521,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
1521 rt->rt_is_input = 0; 1529 rt->rt_is_input = 0;
1522 rt->rt_iif = 0; 1530 rt->rt_iif = 0;
1523 rt->rt_pmtu = 0; 1531 rt->rt_pmtu = 0;
1532 rt->rt_mtu_locked = 0;
1524 rt->rt_gateway = 0; 1533 rt->rt_gateway = 0;
1525 rt->rt_uses_gateway = 0; 1534 rt->rt_uses_gateway = 0;
1526 rt->rt_table_id = 0; 1535 rt->rt_table_id = 0;
@@ -2546,6 +2555,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2546 rt->rt_is_input = ort->rt_is_input; 2555 rt->rt_is_input = ort->rt_is_input;
2547 rt->rt_iif = ort->rt_iif; 2556 rt->rt_iif = ort->rt_iif;
2548 rt->rt_pmtu = ort->rt_pmtu; 2557 rt->rt_pmtu = ort->rt_pmtu;
2558 rt->rt_mtu_locked = ort->rt_mtu_locked;
2549 2559
2550 rt->rt_genid = rt_genid_ipv4(net); 2560 rt->rt_genid = rt_genid_ipv4(net);
2551 rt->rt_flags = ort->rt_flags; 2561 rt->rt_flags = ort->rt_flags;
@@ -2648,6 +2658,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
2648 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 2658 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2649 if (rt->rt_pmtu && expires) 2659 if (rt->rt_pmtu && expires)
2650 metrics[RTAX_MTU - 1] = rt->rt_pmtu; 2660 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2661 if (rt->rt_mtu_locked && expires)
2662 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
2651 if (rtnetlink_put_metrics(skb, metrics) < 0) 2663 if (rtnetlink_put_metrics(skb, metrics) < 0)
2652 goto nla_put_failure; 2664 goto nla_put_failure;
2653 2665
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8d33f7b311f4..fbebda67ac1b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
100 xdst->u.rt.rt_gateway = rt->rt_gateway; 100 xdst->u.rt.rt_gateway = rt->rt_gateway;
101 xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; 101 xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
102 xdst->u.rt.rt_pmtu = rt->rt_pmtu; 102 xdst->u.rt.rt_pmtu = rt->rt_pmtu;
103 xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
103 xdst->u.rt.rt_table_id = rt->rt_table_id; 104 xdst->u.rt.rt_table_id = rt->rt_table_id;
104 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); 105 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
105 rt_add_uncached_list(&xdst->u.rt); 106 rt_add_uncached_list(&xdst->u.rt);