aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHannes Frederic Sowa <hannes@stressinduktion.org>2014-02-25 19:20:42 -0500
committerDavid S. Miller <davem@davemloft.net>2014-02-26 15:51:00 -0500
commit1b346576359c72bee34b1476b4fc63d77d37b314 (patch)
treec39719e520d1771322b8e9e801c873587e05d237
parent69647ce46a236a355a7a3096d793819a9bd7c1d3 (diff)
ipv4: yet another new IP_MTU_DISCOVER option IP_PMTUDISC_OMIT
IP_PMTUDISC_INTERFACE has a design error: because it does not allow the generation of fragments if the interface mtu is exceeded, it is very hard to make use of this option in already deployed name server software for which I introduced this option. This patch adds yet another new IP_MTU_DISCOVER option to not honor any path mtu information and not accepting new icmp notifications destined for the socket this option is enabled on. But we allow outgoing fragmentation in case the packet size exceeds the outgoing interface mtu. As such this new option can be used as a drop-in replacement for IP_PMTUDISC_DONT, which is currently in use by most name server software making the adoption of this option very smooth and easy. The original advantage of IP_PMTUDISC_INTERFACE is still maintained: ignoring incoming path MTU updates and not honoring discovered path MTUs in the output path. Fixes: 482fc6094afad5 ("ipv4: introduce new IP_MTU_DISCOVER mode IP_PMTUDISC_INTERFACE") Cc: Florian Weimer <fweimer@redhat.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip.h9
-rw-r--r--include/uapi/linux/in.h4
-rw-r--r--net/ipv4/ip_output.c9
-rw-r--r--net/ipv4/ip_sockglue.c2
4 files changed, 16 insertions, 8 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index 4aa781b7f609..b885d75cede4 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -266,7 +266,8 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
266 266
267static inline bool ip_sk_accept_pmtu(const struct sock *sk) 267static inline bool ip_sk_accept_pmtu(const struct sock *sk)
268{ 268{
269 return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE; 269 return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE &&
270 inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT;
270} 271}
271 272
272static inline bool ip_sk_use_pmtu(const struct sock *sk) 273static inline bool ip_sk_use_pmtu(const struct sock *sk)
@@ -274,6 +275,12 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk)
274 return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; 275 return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
275} 276}
276 277
278static inline bool ip_sk_local_df(const struct sock *sk)
279{
280 return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
281 inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
282}
283
277static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, 284static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
278 bool forwarding) 285 bool forwarding)
279{ 286{
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 393c5de09d42..c33a65e3d62c 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -120,6 +120,10 @@ struct in_addr {
120 * this socket to prevent accepting spoofed ones. 120 * this socket to prevent accepting spoofed ones.
121 */ 121 */
122#define IP_PMTUDISC_INTERFACE 4 122#define IP_PMTUDISC_INTERFACE 4
123/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
124 * fragmented if they exeed the interface mtu
125 */
126#define IP_PMTUDISC_OMIT 5
123 127
124#define IP_MULTICAST_IF 32 128#define IP_MULTICAST_IF 32
125#define IP_MULTICAST_TTL 33 129#define IP_MULTICAST_TTL 33
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 22aa11971ed1..e85445b2b102 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -824,8 +824,7 @@ static int __ip_append_data(struct sock *sk,
824 824
825 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 825 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
826 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 826 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
827 maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? 827 maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
828 mtu : 0xFFFF;
829 828
830 if (cork->length + length > maxnonfragsize - fragheaderlen) { 829 if (cork->length + length > maxnonfragsize - fragheaderlen) {
831 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, 830 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1148,8 +1147,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1148 1147
1149 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1148 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1150 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1149 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1151 maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? 1150 maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
1152 mtu : 0xFFFF;
1153 1151
1154 if (cork->length + size > maxnonfragsize - fragheaderlen) { 1152 if (cork->length + size > maxnonfragsize - fragheaderlen) {
1155 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, 1153 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1310,8 +1308,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1310 * to fragment the frame generated here. No matter, what transforms 1308 * to fragment the frame generated here. No matter, what transforms
1311 * how transforms change size of the packet, it will come out. 1309 * how transforms change size of the packet, it will come out.
1312 */ 1310 */
1313 if (inet->pmtudisc < IP_PMTUDISC_DO) 1311 skb->local_df = ip_sk_local_df(sk);
1314 skb->local_df = 1;
1315 1312
1316 /* DF bit is set when we want to see DF on outgoing frames. 1313 /* DF bit is set when we want to see DF on outgoing frames.
1317 * If local_df is set too, we still allow to fragment this frame 1314 * If local_df is set too, we still allow to fragment this frame
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0968b28c4cf3..64741b938632 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -643,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
643 inet->nodefrag = val ? 1 : 0; 643 inet->nodefrag = val ? 1 : 0;
644 break; 644 break;
645 case IP_MTU_DISCOVER: 645 case IP_MTU_DISCOVER:
646 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE) 646 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
647 goto e_inval; 647 goto e_inval;
648 inet->pmtudisc = val; 648 inet->pmtudisc = val;
649 break; 649 break;