diff options
author | John Heffner <jheffner@psc.edu> | 2007-04-20 18:53:27 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-04-26 01:29:10 -0400 |
commit | 628a5c561890a9a9a74dea017873530584aab06e (patch) | |
tree | f10edc4078c3f19487bbe3a902ecadda89273361 | |
parent | b881ef7603230550aa0150b22af94089f07ab00d (diff) |
[INET]: Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery). This is particularly useful for diagnostic utilities, like
traceroute/tracepath.
Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/in.h | 1 | ||||
-rw-r--r-- | include/linux/in6.h | 1 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 20 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 15 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 2 |
6 files changed, 31 insertions, 10 deletions
diff --git a/include/linux/in.h b/include/linux/in.h index 1912e7c0bc26..3975cbf52f20 100644 --- a/include/linux/in.h +++ b/include/linux/in.h | |||
@@ -83,6 +83,7 @@ struct in_addr { | |||
83 | #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ | 83 | #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ |
84 | #define IP_PMTUDISC_WANT 1 /* Use per route hints */ | 84 | #define IP_PMTUDISC_WANT 1 /* Use per route hints */ |
85 | #define IP_PMTUDISC_DO 2 /* Always DF */ | 85 | #define IP_PMTUDISC_DO 2 /* Always DF */ |
86 | #define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ | ||
86 | 87 | ||
87 | #define IP_MULTICAST_IF 32 | 88 | #define IP_MULTICAST_IF 32 |
88 | #define IP_MULTICAST_TTL 33 | 89 | #define IP_MULTICAST_TTL 33 |
diff --git a/include/linux/in6.h b/include/linux/in6.h index 4e8350ae8869..d559fac4a26d 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h | |||
@@ -179,6 +179,7 @@ struct in6_flowlabel_req | |||
179 | #define IPV6_PMTUDISC_DONT 0 | 179 | #define IPV6_PMTUDISC_DONT 0 |
180 | #define IPV6_PMTUDISC_WANT 1 | 180 | #define IPV6_PMTUDISC_WANT 1 |
181 | #define IPV6_PMTUDISC_DO 2 | 181 | #define IPV6_PMTUDISC_DO 2 |
182 | #define IPV6_PMTUDISC_PROBE 3 | ||
182 | 183 | ||
183 | /* Flowlabel */ | 184 | /* Flowlabel */ |
184 | #define IPV6_FLOWLABEL_MGR 32 | 185 | #define IPV6_FLOWLABEL_MGR 32 |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 34606eff8a05..534650cad3a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
189 | return -EINVAL; | 189 | return -EINVAL; |
190 | } | 190 | } |
191 | 191 | ||
192 | static inline int ip_skb_dst_mtu(struct sk_buff *skb) | ||
193 | { | ||
194 | struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; | ||
195 | |||
196 | return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? | ||
197 | skb->dst->dev->mtu : dst_mtu(skb->dst); | ||
198 | } | ||
199 | |||
192 | static inline int ip_finish_output(struct sk_buff *skb) | 200 | static inline int ip_finish_output(struct sk_buff *skb) |
193 | { | 201 | { |
194 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) | 202 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) |
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb) | |||
198 | return dst_output(skb); | 206 | return dst_output(skb); |
199 | } | 207 | } |
200 | #endif | 208 | #endif |
201 | if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) | 209 | if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) |
202 | return ip_fragment(skb, ip_finish_output2); | 210 | return ip_fragment(skb, ip_finish_output2); |
203 | else | 211 | else |
204 | return ip_finish_output2(skb); | 212 | return ip_finish_output2(skb); |
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
422 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 430 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { |
423 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); | 431 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); |
424 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 432 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
425 | htonl(dst_mtu(&rt->u.dst))); | 433 | htonl(ip_skb_dst_mtu(skb))); |
426 | kfree_skb(skb); | 434 | kfree_skb(skb); |
427 | return -EMSGSIZE; | 435 | return -EMSGSIZE; |
428 | } | 436 | } |
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk, | |||
787 | inet->cork.addr = ipc->addr; | 795 | inet->cork.addr = ipc->addr; |
788 | } | 796 | } |
789 | dst_hold(&rt->u.dst); | 797 | dst_hold(&rt->u.dst); |
790 | inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); | 798 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
799 | rt->u.dst.dev->mtu : | ||
800 | dst_mtu(rt->u.dst.path); | ||
791 | inet->cork.rt = rt; | 801 | inet->cork.rt = rt; |
792 | inet->cork.length = 0; | 802 | inet->cork.length = 0; |
793 | sk->sk_sndmsg_page = NULL; | 803 | sk->sk_sndmsg_page = NULL; |
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk) | |||
1203 | * to fragment the frame generated here. No matter, what transforms | 1213 | * to fragment the frame generated here. No matter, what transforms |
1204 | * how transforms change size of the packet, it will come out. | 1214 | * how transforms change size of the packet, it will come out. |
1205 | */ | 1215 | */ |
1206 | if (inet->pmtudisc != IP_PMTUDISC_DO) | 1216 | if (inet->pmtudisc < IP_PMTUDISC_DO) |
1207 | skb->local_df = 1; | 1217 | skb->local_df = 1; |
1208 | 1218 | ||
1209 | /* DF bit is set when we want to see DF on outgoing frames. | 1219 | /* DF bit is set when we want to see DF on outgoing frames. |
1210 | * If local_df is set too, we still allow to fragment this frame | 1220 | * If local_df is set too, we still allow to fragment this frame |
1211 | * locally. */ | 1221 | * locally. */ |
1212 | if (inet->pmtudisc == IP_PMTUDISC_DO || | 1222 | if (inet->pmtudisc >= IP_PMTUDISC_DO || |
1213 | (skb->len <= dst_mtu(&rt->u.dst) && | 1223 | (skb->len <= dst_mtu(&rt->u.dst) && |
1214 | ip_dont_fragment(sk, &rt->u.dst))) | 1224 | ip_dont_fragment(sk, &rt->u.dst))) |
1215 | df = htons(IP_DF); | 1225 | df = htons(IP_DF); |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c199d2311731..4d544573f48a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
542 | inet->hdrincl = val ? 1 : 0; | 542 | inet->hdrincl = val ? 1 : 0; |
543 | break; | 543 | break; |
544 | case IP_MTU_DISCOVER: | 544 | case IP_MTU_DISCOVER: |
545 | if (val<0 || val>2) | 545 | if (val<0 || val>3) |
546 | goto e_inval; | 546 | goto e_inval; |
547 | inet->pmtudisc = val; | 547 | inet->pmtudisc = val; |
548 | break; | 548 | break; |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5a5b7d4ad31c..f508171bab73 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb) | |||
137 | return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); | 137 | return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); |
138 | } | 138 | } |
139 | 139 | ||
140 | static inline int ip6_skb_dst_mtu(struct sk_buff *skb) | ||
141 | { | ||
142 | struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; | ||
143 | |||
144 | return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? | ||
145 | skb->dst->dev->mtu : dst_mtu(skb->dst); | ||
146 | } | ||
147 | |||
140 | int ip6_output(struct sk_buff *skb) | 148 | int ip6_output(struct sk_buff *skb) |
141 | { | 149 | { |
142 | if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || | 150 | if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || |
143 | dst_allfrag(skb->dst)) | 151 | dst_allfrag(skb->dst)) |
144 | return ip6_fragment(skb, ip6_output2); | 152 | return ip6_fragment(skb, ip6_output2); |
145 | else | 153 | else |
@@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
566 | hlen = ip6_find_1stfragopt(skb, &prevhdr); | 574 | hlen = ip6_find_1stfragopt(skb, &prevhdr); |
567 | nexthdr = *prevhdr; | 575 | nexthdr = *prevhdr; |
568 | 576 | ||
569 | mtu = dst_mtu(&rt->u.dst); | 577 | mtu = ip6_skb_dst_mtu(skb); |
570 | 578 | ||
571 | /* We must not fragment if the socket is set to force MTU discovery | 579 | /* We must not fragment if the socket is set to force MTU discovery |
572 | * or if the skb it not generated by a local socket. (This last | 580 | * or if the skb it not generated by a local socket. (This last |
@@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, | |||
1063 | inet->cork.fl = *fl; | 1071 | inet->cork.fl = *fl; |
1064 | np->cork.hop_limit = hlimit; | 1072 | np->cork.hop_limit = hlimit; |
1065 | np->cork.tclass = tclass; | 1073 | np->cork.tclass = tclass; |
1066 | mtu = dst_mtu(rt->u.dst.path); | 1074 | mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? |
1075 | rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); | ||
1067 | if (np->frag_size < mtu) { | 1076 | if (np->frag_size < mtu) { |
1068 | if (np->frag_size) | 1077 | if (np->frag_size) |
1069 | mtu = np->frag_size; | 1078 | mtu = np->frag_size; |
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index da930fa089c9..aa3d07c52a8f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c | |||
@@ -694,7 +694,7 @@ done: | |||
694 | retv = ip6_ra_control(sk, val, NULL); | 694 | retv = ip6_ra_control(sk, val, NULL); |
695 | break; | 695 | break; |
696 | case IPV6_MTU_DISCOVER: | 696 | case IPV6_MTU_DISCOVER: |
697 | if (val<0 || val>2) | 697 | if (val<0 || val>3) |
698 | goto e_inval; | 698 | goto e_inval; |
699 | np->pmtudisc = val; | 699 | np->pmtudisc = val; |
700 | retv = 0; | 700 | retv = 0; |