aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHannes Frederic Sowa <hannes@stressinduktion.org>2013-11-04 20:24:17 -0500
committerDavid S. Miller <davem@davemloft.net>2013-11-05 21:52:27 -0500
commit482fc6094afad572a4ea1fd722e7b11ca72022a0 (patch)
tree8ea3f61751718374f7d84b64e82ab46b93f89a57
parentb915550179d9df122fb781dc87d737ca92058b3c (diff)
ipv4: introduce new IP_MTU_DISCOVER mode IP_PMTUDISC_INTERFACE
Sockets marked with IP_PMTUDISC_INTERFACE won't do path mtu discovery, their sockets won't accept and install new path mtu information and they will always use the interface mtu for outgoing packets. It is guaranteed that the packet is not fragmented locally. But we won't set the DF-Flag on the outgoing frames. Florian Weimer had the idea to use this flag to ensure DNS servers are never generating outgoing fragments. They may well be fragmented on the path, but the server never stores or usees path mtu values, which could well be forged in an attack. (The root of the problem with path MTU discovery is that there is no reliable way to authenticate ICMP Fragmentation Needed But DF Set messages because they are sent from intermediate routers with their source addresses, and the IMCP payload will not always contain sufficient information to identify a flow.) Recent research in the DNS community showed that it is possible to implement an attack where DNS cache poisoning is feasible by spoofing fragments. This work was done by Amir Herzberg and Haya Shulman: <https://sites.google.com/site/hayashulman/files/fragmentation-poisoning.pdf> This issue was previously discussed among the DNS community, e.g. <http://www.ietf.org/mail-archive/web/dnsext/current/msg01204.html>, without leading to fixes. This patch depends on the patch "ipv4: fix DO and PROBE pmtu mode regarding local fragmentation with UFO/CORK" for the enforcement of the non-fragmentable checks. If other users than ip_append_page/data should use this semantic too, we have to add a new flag to IPCB(skb)->flags to suppress local fragmentation and check for this in ip_finish_output. Many thanks to Florian Weimer for the idea and feedback while implementing this patch. Cc: David S. Miller <davem@davemloft.net> Suggested-by: Florian Weimer <fweimer@redhat.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/route.h16
-rw-r--r--include/uapi/linux/in.h5
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp_ipv4.c1
7 files changed, 28 insertions, 9 deletions
diff --git a/include/net/route.h b/include/net/route.h
index dd4ae0029fd8..f68c167280a7 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -313,12 +313,20 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
313 return hoplimit; 313 return hoplimit;
314} 314}
315 315
316static inline int ip_skb_dst_mtu(struct sk_buff *skb) 316static inline bool ip_sk_accept_pmtu(const struct sock *sk)
317{ 317{
318 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; 318 return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE;
319}
319 320
320 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? 321static inline bool ip_sk_use_pmtu(const struct sock *sk)
321 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); 322{
323 return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
324}
325
326static inline int ip_skb_dst_mtu(const struct sk_buff *skb)
327{
328 return (!skb->sk || ip_sk_use_pmtu(skb->sk)) ?
329 dst_mtu(skb_dst(skb)) : skb_dst(skb)->dev->mtu;
322} 330}
323 331
324#endif /* _ROUTE_H */ 332#endif /* _ROUTE_H */
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index f9e8e496ae5d..393c5de09d42 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -115,6 +115,11 @@ struct in_addr {
115#define IP_PMTUDISC_WANT 1 /* Use per route hints */ 115#define IP_PMTUDISC_WANT 1 /* Use per route hints */
116#define IP_PMTUDISC_DO 2 /* Always DF */ 116#define IP_PMTUDISC_DO 2 /* Always DF */
117#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ 117#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
118/* Always use interface mtu (ignores dst pmtu) but don't set DF flag.
119 * Also incoming ICMP frag_needed notifications will be ignored on
120 * this socket to prevent accepting spoofed ones.
121 */
122#define IP_PMTUDISC_INTERFACE 4
118 123
119#define IP_MULTICAST_IF 32 124#define IP_MULTICAST_IF 32
120#define IP_MULTICAST_TTL 33 125#define IP_MULTICAST_TTL 33
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 720c36225ed9..d9f65fc66db5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -174,6 +174,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
174 mtu = dst_mtu(dst); 174 mtu = dst_mtu(dst);
175 175
176 if (inet->pmtudisc != IP_PMTUDISC_DONT && 176 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
177 ip_sk_accept_pmtu(sk) &&
177 inet_csk(sk)->icsk_pmtu_cookie > mtu) { 178 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
178 dccp_sync_mss(sk, mtu); 179 dccp_sync_mss(sk, mtu);
179 180
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 51be64e18e32..912402752f2f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1037,7 +1037,6 @@ error:
1037static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, 1037static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1038 struct ipcm_cookie *ipc, struct rtable **rtp) 1038 struct ipcm_cookie *ipc, struct rtable **rtp)
1039{ 1039{
1040 struct inet_sock *inet = inet_sk(sk);
1041 struct ip_options_rcu *opt; 1040 struct ip_options_rcu *opt;
1042 struct rtable *rt; 1041 struct rtable *rt;
1043 1042
@@ -1063,8 +1062,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1063 * We steal reference to this route, caller should not release it 1062 * We steal reference to this route, caller should not release it
1064 */ 1063 */
1065 *rtp = NULL; 1064 *rtp = NULL;
1066 cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? 1065 cork->fragsize = ip_sk_use_pmtu(sk) ?
1067 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1066 dst_mtu(&rt->dst) : rt->dst.dev->mtu;
1068 cork->dst = &rt->dst; 1067 cork->dst = &rt->dst;
1069 cork->length = 0; 1068 cork->length = 0;
1070 cork->ttl = ipc->ttl; 1069 cork->ttl = ipc->ttl;
@@ -1315,7 +1314,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1315 /* DF bit is set when we want to see DF on outgoing frames. 1314 /* DF bit is set when we want to see DF on outgoing frames.
1316 * If local_df is set too, we still allow to fragment this frame 1315 * If local_df is set too, we still allow to fragment this frame
1317 * locally. */ 1316 * locally. */
1318 if (inet->pmtudisc >= IP_PMTUDISC_DO || 1317 if (inet->pmtudisc == IP_PMTUDISC_DO ||
1318 inet->pmtudisc == IP_PMTUDISC_PROBE ||
1319 (skb->len <= dst_mtu(&rt->dst) && 1319 (skb->len <= dst_mtu(&rt->dst) &&
1320 ip_dont_fragment(sk, &rt->dst))) 1320 ip_dont_fragment(sk, &rt->dst)))
1321 df = htons(IP_DF); 1321 df = htons(IP_DF);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0626f2cb192e..3f858266fa7e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -627,7 +627,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
627 inet->nodefrag = val ? 1 : 0; 627 inet->nodefrag = val ? 1 : 0;
628 break; 628 break;
629 case IP_MTU_DISCOVER: 629 case IP_MTU_DISCOVER:
630 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) 630 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE)
631 goto e_inval; 631 goto e_inval;
632 inet->pmtudisc = val; 632 inet->pmtudisc = val;
633 break; 633 break;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d2d325382b13..f428935c50db 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1036,6 +1036,10 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1036 bool new = false; 1036 bool new = false;
1037 1037
1038 bh_lock_sock(sk); 1038 bh_lock_sock(sk);
1039
1040 if (!ip_sk_accept_pmtu(sk))
1041 goto out;
1042
1039 rt = (struct rtable *) __sk_dst_get(sk); 1043 rt = (struct rtable *) __sk_dst_get(sk);
1040 1044
1041 if (sock_owned_by_user(sk) || !rt) { 1045 if (sock_owned_by_user(sk) || !rt) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 300ab2c93f29..14bba8a1c5a7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -288,6 +288,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
288 mtu = dst_mtu(dst); 288 mtu = dst_mtu(dst);
289 289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT && 290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
291 ip_sk_accept_pmtu(sk) &&
291 inet_csk(sk)->icsk_pmtu_cookie > mtu) { 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
292 tcp_sync_mss(sk, mtu); 293 tcp_sync_mss(sk, mtu);
293 294