diff options
author | Sabrina Dubroca <sd@queasysnail.net> | 2018-03-14 05:21:14 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-14 13:37:36 -0400 |
commit | d52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 (patch) | |
tree | 27f31c7dc279232da5b67a307e8a2fde449ac098 /net | |
parent | 16c2e4db832da4b883b3ee8b9dc32d1ca115759a (diff) |
ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu
Prior to the rework of PMTU information storage in commit
2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."),
when a PMTU event advertising a PMTU smaller than
net.ipv4.route.min_pmtu was received, we would disable setting the DF
flag on packets by locking the MTU metric, and set the PMTU to
net.ipv4.route.min_pmtu.
Since then, we don't disable DF, and set PMTU to
net.ipv4.route.min_pmtu, so the intermediate router that has this link
with a small MTU will have to drop the packets.
This patch reestablishes pre-2.6.39 behavior by splitting
rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu.
rt_mtu_locked indicates that we shouldn't set the DF bit on that path,
and is checked in ip_dont_fragment().
One possible workaround is to set net.ipv4.route.min_pmtu to a value low
enough to accommodate the lowest MTU encountered.
Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/route.c | 26 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 1 |
2 files changed, 20 insertions, 7 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f9dbb8cb66bf..299e247b2032 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr) | |||
634 | static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) | 634 | static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) |
635 | { | 635 | { |
636 | rt->rt_pmtu = fnhe->fnhe_pmtu; | 636 | rt->rt_pmtu = fnhe->fnhe_pmtu; |
637 | rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; | ||
637 | rt->dst.expires = fnhe->fnhe_expires; | 638 | rt->dst.expires = fnhe->fnhe_expires; |
638 | 639 | ||
639 | if (fnhe->fnhe_gw) { | 640 | if (fnhe->fnhe_gw) { |
@@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh | |||
644 | } | 645 | } |
645 | 646 | ||
646 | static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | 647 | static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, |
647 | u32 pmtu, unsigned long expires) | 648 | u32 pmtu, bool lock, unsigned long expires) |
648 | { | 649 | { |
649 | struct fnhe_hash_bucket *hash; | 650 | struct fnhe_hash_bucket *hash; |
650 | struct fib_nh_exception *fnhe; | 651 | struct fib_nh_exception *fnhe; |
@@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
681 | fnhe->fnhe_genid = genid; | 682 | fnhe->fnhe_genid = genid; |
682 | if (gw) | 683 | if (gw) |
683 | fnhe->fnhe_gw = gw; | 684 | fnhe->fnhe_gw = gw; |
684 | if (pmtu) | 685 | if (pmtu) { |
685 | fnhe->fnhe_pmtu = pmtu; | 686 | fnhe->fnhe_pmtu = pmtu; |
687 | fnhe->fnhe_mtu_locked = lock; | ||
688 | } | ||
686 | fnhe->fnhe_expires = max(1UL, expires); | 689 | fnhe->fnhe_expires = max(1UL, expires); |
687 | /* Update all cached dsts too */ | 690 | /* Update all cached dsts too */ |
688 | rt = rcu_dereference(fnhe->fnhe_rth_input); | 691 | rt = rcu_dereference(fnhe->fnhe_rth_input); |
@@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
706 | fnhe->fnhe_daddr = daddr; | 709 | fnhe->fnhe_daddr = daddr; |
707 | fnhe->fnhe_gw = gw; | 710 | fnhe->fnhe_gw = gw; |
708 | fnhe->fnhe_pmtu = pmtu; | 711 | fnhe->fnhe_pmtu = pmtu; |
712 | fnhe->fnhe_mtu_locked = lock; | ||
709 | fnhe->fnhe_expires = expires; | 713 | fnhe->fnhe_expires = expires; |
710 | 714 | ||
711 | /* Exception created; mark the cached routes for the nexthop | 715 | /* Exception created; mark the cached routes for the nexthop |
@@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow | |||
787 | struct fib_nh *nh = &FIB_RES_NH(res); | 791 | struct fib_nh *nh = &FIB_RES_NH(res); |
788 | 792 | ||
789 | update_or_create_fnhe(nh, fl4->daddr, new_gw, | 793 | update_or_create_fnhe(nh, fl4->daddr, new_gw, |
790 | 0, jiffies + ip_rt_gc_timeout); | 794 | 0, false, |
795 | jiffies + ip_rt_gc_timeout); | ||
791 | } | 796 | } |
792 | if (kill_route) | 797 | if (kill_route) |
793 | rt->dst.obsolete = DST_OBSOLETE_KILL; | 798 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
@@ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
1009 | { | 1014 | { |
1010 | struct dst_entry *dst = &rt->dst; | 1015 | struct dst_entry *dst = &rt->dst; |
1011 | struct fib_result res; | 1016 | struct fib_result res; |
1017 | bool lock = false; | ||
1012 | 1018 | ||
1013 | if (dst_metric_locked(dst, RTAX_MTU)) | 1019 | if (ip_mtu_locked(dst)) |
1014 | return; | 1020 | return; |
1015 | 1021 | ||
1016 | if (ipv4_mtu(dst) < mtu) | 1022 | if (ipv4_mtu(dst) < mtu) |
1017 | return; | 1023 | return; |
1018 | 1024 | ||
1019 | if (mtu < ip_rt_min_pmtu) | 1025 | if (mtu < ip_rt_min_pmtu) { |
1026 | lock = true; | ||
1020 | mtu = ip_rt_min_pmtu; | 1027 | mtu = ip_rt_min_pmtu; |
1028 | } | ||
1021 | 1029 | ||
1022 | if (rt->rt_pmtu == mtu && | 1030 | if (rt->rt_pmtu == mtu && |
1023 | time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) | 1031 | time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) |
@@ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
1027 | if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { | 1035 | if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { |
1028 | struct fib_nh *nh = &FIB_RES_NH(res); | 1036 | struct fib_nh *nh = &FIB_RES_NH(res); |
1029 | 1037 | ||
1030 | update_or_create_fnhe(nh, fl4->daddr, 0, mtu, | 1038 | update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, |
1031 | jiffies + ip_rt_mtu_expires); | 1039 | jiffies + ip_rt_mtu_expires); |
1032 | } | 1040 | } |
1033 | rcu_read_unlock(); | 1041 | rcu_read_unlock(); |
@@ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1280 | 1288 | ||
1281 | mtu = READ_ONCE(dst->dev->mtu); | 1289 | mtu = READ_ONCE(dst->dev->mtu); |
1282 | 1290 | ||
1283 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | 1291 | if (unlikely(ip_mtu_locked(dst))) { |
1284 | if (rt->rt_uses_gateway && mtu > 576) | 1292 | if (rt->rt_uses_gateway && mtu > 576) |
1285 | mtu = 576; | 1293 | mtu = 576; |
1286 | } | 1294 | } |
@@ -1521,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, | |||
1521 | rt->rt_is_input = 0; | 1529 | rt->rt_is_input = 0; |
1522 | rt->rt_iif = 0; | 1530 | rt->rt_iif = 0; |
1523 | rt->rt_pmtu = 0; | 1531 | rt->rt_pmtu = 0; |
1532 | rt->rt_mtu_locked = 0; | ||
1524 | rt->rt_gateway = 0; | 1533 | rt->rt_gateway = 0; |
1525 | rt->rt_uses_gateway = 0; | 1534 | rt->rt_uses_gateway = 0; |
1526 | rt->rt_table_id = 0; | 1535 | rt->rt_table_id = 0; |
@@ -2546,6 +2555,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2546 | rt->rt_is_input = ort->rt_is_input; | 2555 | rt->rt_is_input = ort->rt_is_input; |
2547 | rt->rt_iif = ort->rt_iif; | 2556 | rt->rt_iif = ort->rt_iif; |
2548 | rt->rt_pmtu = ort->rt_pmtu; | 2557 | rt->rt_pmtu = ort->rt_pmtu; |
2558 | rt->rt_mtu_locked = ort->rt_mtu_locked; | ||
2549 | 2559 | ||
2550 | rt->rt_genid = rt_genid_ipv4(net); | 2560 | rt->rt_genid = rt_genid_ipv4(net); |
2551 | rt->rt_flags = ort->rt_flags; | 2561 | rt->rt_flags = ort->rt_flags; |
@@ -2648,6 +2658,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, | |||
2648 | memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); | 2658 | memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); |
2649 | if (rt->rt_pmtu && expires) | 2659 | if (rt->rt_pmtu && expires) |
2650 | metrics[RTAX_MTU - 1] = rt->rt_pmtu; | 2660 | metrics[RTAX_MTU - 1] = rt->rt_pmtu; |
2661 | if (rt->rt_mtu_locked && expires) | ||
2662 | metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); | ||
2651 | if (rtnetlink_put_metrics(skb, metrics) < 0) | 2663 | if (rtnetlink_put_metrics(skb, metrics) < 0) |
2652 | goto nla_put_failure; | 2664 | goto nla_put_failure; |
2653 | 2665 | ||
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8d33f7b311f4..fbebda67ac1b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
100 | xdst->u.rt.rt_gateway = rt->rt_gateway; | 100 | xdst->u.rt.rt_gateway = rt->rt_gateway; |
101 | xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; | 101 | xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; |
102 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; | 102 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; |
103 | xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; | ||
103 | xdst->u.rt.rt_table_id = rt->rt_table_id; | 104 | xdst->u.rt.rt_table_id = rt->rt_table_id; |
104 | INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); | 105 | INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); |
105 | rt_add_uncached_list(&xdst->u.rt); | 106 | rt_add_uncached_list(&xdst->u.rt); |