diff options
author | John Heffner <jheffner@psc.edu> | 2007-04-20 18:53:27 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-04-26 01:29:10 -0400 |
commit | 628a5c561890a9a9a74dea017873530584aab06e (patch) | |
tree | f10edc4078c3f19487bbe3a902ecadda89273361 /net/ipv4 | |
parent | b881ef7603230550aa0150b22af94089f07ab00d (diff) |
[INET]: Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery). This is particularly useful for diagnostic utilities, like
traceroute/tracepath.
Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/ip_output.c | 20 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 2 |
2 files changed, 16 insertions, 6 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 34606eff8a05..534650cad3a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
189 | return -EINVAL; | 189 | return -EINVAL; |
190 | } | 190 | } |
191 | 191 | ||
192 | static inline int ip_skb_dst_mtu(struct sk_buff *skb) | ||
193 | { | ||
194 | struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; | ||
195 | |||
196 | return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? | ||
197 | skb->dst->dev->mtu : dst_mtu(skb->dst); | ||
198 | } | ||
199 | |||
192 | static inline int ip_finish_output(struct sk_buff *skb) | 200 | static inline int ip_finish_output(struct sk_buff *skb) |
193 | { | 201 | { |
194 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) | 202 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) |
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb) | |||
198 | return dst_output(skb); | 206 | return dst_output(skb); |
199 | } | 207 | } |
200 | #endif | 208 | #endif |
201 | if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) | 209 | if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) |
202 | return ip_fragment(skb, ip_finish_output2); | 210 | return ip_fragment(skb, ip_finish_output2); |
203 | else | 211 | else |
204 | return ip_finish_output2(skb); | 212 | return ip_finish_output2(skb); |
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
422 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 430 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { |
423 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); | 431 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); |
424 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 432 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
425 | htonl(dst_mtu(&rt->u.dst))); | 433 | htonl(ip_skb_dst_mtu(skb))); |
426 | kfree_skb(skb); | 434 | kfree_skb(skb); |
427 | return -EMSGSIZE; | 435 | return -EMSGSIZE; |
428 | } | 436 | } |
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk, | |||
787 | inet->cork.addr = ipc->addr; | 795 | inet->cork.addr = ipc->addr; |
788 | } | 796 | } |
789 | dst_hold(&rt->u.dst); | 797 | dst_hold(&rt->u.dst); |
790 | inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); | 798 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
799 | rt->u.dst.dev->mtu : | ||
800 | dst_mtu(rt->u.dst.path); | ||
791 | inet->cork.rt = rt; | 801 | inet->cork.rt = rt; |
792 | inet->cork.length = 0; | 802 | inet->cork.length = 0; |
793 | sk->sk_sndmsg_page = NULL; | 803 | sk->sk_sndmsg_page = NULL; |
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk) | |||
1203 | * to fragment the frame generated here. No matter, what transforms | 1213 | * to fragment the frame generated here. No matter, what transforms |
1204 | * how transforms change size of the packet, it will come out. | 1214 | * how transforms change size of the packet, it will come out. |
1205 | */ | 1215 | */ |
1206 | if (inet->pmtudisc != IP_PMTUDISC_DO) | 1216 | if (inet->pmtudisc < IP_PMTUDISC_DO) |
1207 | skb->local_df = 1; | 1217 | skb->local_df = 1; |
1208 | 1218 | ||
1209 | /* DF bit is set when we want to see DF on outgoing frames. | 1219 | /* DF bit is set when we want to see DF on outgoing frames. |
1210 | * If local_df is set too, we still allow to fragment this frame | 1220 | * If local_df is set too, we still allow to fragment this frame |
1211 | * locally. */ | 1221 | * locally. */ |
1212 | if (inet->pmtudisc == IP_PMTUDISC_DO || | 1222 | if (inet->pmtudisc >= IP_PMTUDISC_DO || |
1213 | (skb->len <= dst_mtu(&rt->u.dst) && | 1223 | (skb->len <= dst_mtu(&rt->u.dst) && |
1214 | ip_dont_fragment(sk, &rt->u.dst))) | 1224 | ip_dont_fragment(sk, &rt->u.dst))) |
1215 | df = htons(IP_DF); | 1225 | df = htons(IP_DF); |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c199d2311731..4d544573f48a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
542 | inet->hdrincl = val ? 1 : 0; | 542 | inet->hdrincl = val ? 1 : 0; |
543 | break; | 543 | break; |
544 | case IP_MTU_DISCOVER: | 544 | case IP_MTU_DISCOVER: |
545 | if (val<0 || val>2) | 545 | if (val<0 || val>3) |
546 | goto e_inval; | 546 | goto e_inval; |
547 | inet->pmtudisc = val; | 547 | inet->pmtudisc = val; |
548 | break; | 548 | break; |