aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorHannes Frederic Sowa <hannes@stressinduktion.org>2014-01-09 04:01:15 -0500
committerDavid S. Miller <davem@davemloft.net>2014-01-13 14:22:54 -0500
commitf87c10a8aa1e82498c42d0335524d6ae7cf5a52b (patch)
tree85117fae4f0f25831d4232155b47f3f6553faf27 /net
parent6c76a07a711165a31332aa619215f961b115071d (diff)
ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing
While forwarding we should not use the protocol path mtu to calculate the mtu for a forwarded packet but instead use the interface mtu. We mark forwarded skbs in ip_forward with IPSKB_FORWARDED, which was introduced for multicast forwarding. But as it does not conflict with our usage in unicast code path it is perfect for reuse. I moved the functions ip_sk_accept_pmtu, ip_sk_use_pmtu and ip_skb_dst_mtu along with the new ip_dst_mtu_maybe_forward to net/ip.h to fix circular dependencies because of IPSKB_FORWARDED. Because someone might have written a software which does probe destinations manually and expects the kernel to honour those path mtus I introduced a new per-namespace "ip_forward_use_pmtu" knob so someone can disable this new behaviour. We also still use mtus which are locked on a route for forwarding. The reason for this change is, that path mtus information can be injected into the kernel via e.g. icmp_err protocol handler without verification of local sockets. As such, this could cause the IPv4 forwarding path to wrongfully emit fragmentation needed notifications or start to fragment packets along a path. Tunnel and ipsec output paths clear IPCB again, thus IPSKB_FORWARDED won't be set and further fragmentation logic will use the path mtu to determine the fragmentation size. They also recheck packet size with help of path mtu discovery and report appropriate errors. Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: David Miller <davem@davemloft.net> Cc: John Heffner <johnwheffner@gmail.com> Cc: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/ip_forward.c7
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
4 files changed, 17 insertions, 8 deletions
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 694de3b7aebf..e9f1217a8afd 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -54,6 +54,7 @@ static int ip_forward_finish(struct sk_buff *skb)
54 54
55int ip_forward(struct sk_buff *skb) 55int ip_forward(struct sk_buff *skb)
56{ 56{
57 u32 mtu;
57 struct iphdr *iph; /* Our header */ 58 struct iphdr *iph; /* Our header */
58 struct rtable *rt; /* Route we use */ 59 struct rtable *rt; /* Route we use */
59 struct ip_options *opt = &(IPCB(skb)->opt); 60 struct ip_options *opt = &(IPCB(skb)->opt);
@@ -88,11 +89,13 @@ int ip_forward(struct sk_buff *skb)
88 if (opt->is_strictroute && rt->rt_uses_gateway) 89 if (opt->is_strictroute && rt->rt_uses_gateway)
89 goto sr_failed; 90 goto sr_failed;
90 91
91 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && 92 IPCB(skb)->flags |= IPSKB_FORWARDED;
93 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
94 if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
92 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { 95 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
93 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); 96 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
94 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 97 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
95 htonl(dst_mtu(&rt->dst))); 98 htonl(mtu));
96 goto drop; 99 goto drop;
97 } 100 }
98 101
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index df184616493f..9a78804cfe9c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -449,6 +449,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
449 __be16 not_last_frag; 449 __be16 not_last_frag;
450 struct rtable *rt = skb_rtable(skb); 450 struct rtable *rt = skb_rtable(skb);
451 int err = 0; 451 int err = 0;
452 bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
452 453
453 dev = rt->dst.dev; 454 dev = rt->dst.dev;
454 455
@@ -458,12 +459,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
458 459
459 iph = ip_hdr(skb); 460 iph = ip_hdr(skb);
460 461
462 mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding);
461 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || 463 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
462 (IPCB(skb)->frag_max_size && 464 (IPCB(skb)->frag_max_size &&
463 IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { 465 IPCB(skb)->frag_max_size > mtu))) {
464 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 466 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
465 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 467 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
466 htonl(ip_skb_dst_mtu(skb))); 468 htonl(mtu));
467 kfree_skb(skb); 469 kfree_skb(skb);
468 return -EMSGSIZE; 470 return -EMSGSIZE;
469 } 471 }
@@ -473,7 +475,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
473 */ 475 */
474 476
475 hlen = iph->ihl * 4; 477 hlen = iph->ihl * 4;
476 mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ 478 mtu = mtu - hlen; /* Size of data space */
477#ifdef CONFIG_BRIDGE_NETFILTER 479#ifdef CONFIG_BRIDGE_NETFILTER
478 if (skb->nf_bridge) 480 if (skb->nf_bridge)
479 mtu -= nf_bridge_mtu_reduction(skb); 481 mtu -= nf_bridge_mtu_reduction(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f8da28278014..25071b48921c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,9 +112,6 @@
112#define RT_FL_TOS(oldflp4) \ 112#define RT_FL_TOS(oldflp4) \
113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
114 114
115/* IPv4 datagram length is stored into 16bit field (tot_len) */
116#define IP_MAX_MTU 0xFFFF
117
118#define RT_GC_TIMEOUT (300*HZ) 115#define RT_GC_TIMEOUT (300*HZ)
119 116
120static int ip_rt_max_size; 117static int ip_rt_max_size;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1d2480ac2bb6..44eba052b43d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -831,6 +831,13 @@ static struct ctl_table ipv4_net_table[] = {
831 .mode = 0644, 831 .mode = 0644,
832 .proc_handler = proc_dointvec 832 .proc_handler = proc_dointvec
833 }, 833 },
834 {
835 .procname = "ip_forward_use_pmtu",
836 .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
837 .maxlen = sizeof(int),
838 .mode = 0644,
839 .proc_handler = proc_dointvec,
840 },
834 { } 841 { }
835}; 842};
836 843