aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Heffner <jheffner@psc.edu>2007-04-20 18:53:27 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-26 01:29:10 -0400
commit628a5c561890a9a9a74dea017873530584aab06e (patch)
treef10edc4078c3f19487bbe3a902ecadda89273361
parentb881ef7603230550aa0150b22af94089f07ab00d (diff)
[INET]: Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner <jheffner@psc.edu> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/in.h1
-rw-r--r--include/linux/in6.h1
-rw-r--r--net/ipv4/ip_output.c20
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/ipv6_sockglue.c2
6 files changed, 31 insertions, 10 deletions
diff --git a/include/linux/in.h b/include/linux/in.h
index 1912e7c0bc26..3975cbf52f20 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -83,6 +83,7 @@ struct in_addr {
83#define IP_PMTUDISC_DONT 0 /* Never send DF frames */ 83#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
84#define IP_PMTUDISC_WANT 1 /* Use per route hints */ 84#define IP_PMTUDISC_WANT 1 /* Use per route hints */
85#define IP_PMTUDISC_DO 2 /* Always DF */ 85#define IP_PMTUDISC_DO 2 /* Always DF */
86#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
86 87
87#define IP_MULTICAST_IF 32 88#define IP_MULTICAST_IF 32
88#define IP_MULTICAST_TTL 33 89#define IP_MULTICAST_TTL 33
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 4e8350ae8869..d559fac4a26d 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -179,6 +179,7 @@ struct in6_flowlabel_req
179#define IPV6_PMTUDISC_DONT 0 179#define IPV6_PMTUDISC_DONT 0
180#define IPV6_PMTUDISC_WANT 1 180#define IPV6_PMTUDISC_WANT 1
181#define IPV6_PMTUDISC_DO 2 181#define IPV6_PMTUDISC_DO 2
182#define IPV6_PMTUDISC_PROBE 3
182 183
183/* Flowlabel */ 184/* Flowlabel */
184#define IPV6_FLOWLABEL_MGR 32 185#define IPV6_FLOWLABEL_MGR 32
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 34606eff8a05..534650cad3a8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
189 return -EINVAL; 189 return -EINVAL;
190} 190}
191 191
192static inline int ip_skb_dst_mtu(struct sk_buff *skb)
193{
194 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
195
196 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
197 skb->dst->dev->mtu : dst_mtu(skb->dst);
198}
199
192static inline int ip_finish_output(struct sk_buff *skb) 200static inline int ip_finish_output(struct sk_buff *skb)
193{ 201{
194#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 202#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
198 return dst_output(skb); 206 return dst_output(skb);
199 } 207 }
200#endif 208#endif
201 if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) 209 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
202 return ip_fragment(skb, ip_finish_output2); 210 return ip_fragment(skb, ip_finish_output2);
203 else 211 else
204 return ip_finish_output2(skb); 212 return ip_finish_output2(skb);
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
422 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { 430 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
423 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 431 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
424 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 432 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
425 htonl(dst_mtu(&rt->u.dst))); 433 htonl(ip_skb_dst_mtu(skb)));
426 kfree_skb(skb); 434 kfree_skb(skb);
427 return -EMSGSIZE; 435 return -EMSGSIZE;
428 } 436 }
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
787 inet->cork.addr = ipc->addr; 795 inet->cork.addr = ipc->addr;
788 } 796 }
789 dst_hold(&rt->u.dst); 797 dst_hold(&rt->u.dst);
790 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); 798 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
799 rt->u.dst.dev->mtu :
800 dst_mtu(rt->u.dst.path);
791 inet->cork.rt = rt; 801 inet->cork.rt = rt;
792 inet->cork.length = 0; 802 inet->cork.length = 0;
793 sk->sk_sndmsg_page = NULL; 803 sk->sk_sndmsg_page = NULL;
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
1203 * to fragment the frame generated here. No matter, what transforms 1213 * to fragment the frame generated here. No matter, what transforms
1204 * how transforms change size of the packet, it will come out. 1214 * how transforms change size of the packet, it will come out.
1205 */ 1215 */
1206 if (inet->pmtudisc != IP_PMTUDISC_DO) 1216 if (inet->pmtudisc < IP_PMTUDISC_DO)
1207 skb->local_df = 1; 1217 skb->local_df = 1;
1208 1218
1209 /* DF bit is set when we want to see DF on outgoing frames. 1219 /* DF bit is set when we want to see DF on outgoing frames.
1210 * If local_df is set too, we still allow to fragment this frame 1220 * If local_df is set too, we still allow to fragment this frame
1211 * locally. */ 1221 * locally. */
1212 if (inet->pmtudisc == IP_PMTUDISC_DO || 1222 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1213 (skb->len <= dst_mtu(&rt->u.dst) && 1223 (skb->len <= dst_mtu(&rt->u.dst) &&
1214 ip_dont_fragment(sk, &rt->u.dst))) 1224 ip_dont_fragment(sk, &rt->u.dst)))
1215 df = htons(IP_DF); 1225 df = htons(IP_DF);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c199d2311731..4d544573f48a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
542 inet->hdrincl = val ? 1 : 0; 542 inet->hdrincl = val ? 1 : 0;
543 break; 543 break;
544 case IP_MTU_DISCOVER: 544 case IP_MTU_DISCOVER:
545 if (val<0 || val>2) 545 if (val<0 || val>3)
546 goto e_inval; 546 goto e_inval;
547 inet->pmtudisc = val; 547 inet->pmtudisc = val;
548 break; 548 break;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5a5b7d4ad31c..f508171bab73 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); 137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138} 138}
139 139
140static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141{
142 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143
144 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145 skb->dst->dev->mtu : dst_mtu(skb->dst);
146}
147
140int ip6_output(struct sk_buff *skb) 148int ip6_output(struct sk_buff *skb)
141{ 149{
142 if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || 150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
143 dst_allfrag(skb->dst)) 151 dst_allfrag(skb->dst))
144 return ip6_fragment(skb, ip6_output2); 152 return ip6_fragment(skb, ip6_output2);
145 else 153 else
@@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
566 hlen = ip6_find_1stfragopt(skb, &prevhdr); 574 hlen = ip6_find_1stfragopt(skb, &prevhdr);
567 nexthdr = *prevhdr; 575 nexthdr = *prevhdr;
568 576
569 mtu = dst_mtu(&rt->u.dst); 577 mtu = ip6_skb_dst_mtu(skb);
570 578
571 /* We must not fragment if the socket is set to force MTU discovery 579 /* We must not fragment if the socket is set to force MTU discovery
572 * or if the skb it not generated by a local socket. (This last 580 * or if the skb it not generated by a local socket. (This last
@@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1063 inet->cork.fl = *fl; 1071 inet->cork.fl = *fl;
1064 np->cork.hop_limit = hlimit; 1072 np->cork.hop_limit = hlimit;
1065 np->cork.tclass = tclass; 1073 np->cork.tclass = tclass;
1066 mtu = dst_mtu(rt->u.dst.path); 1074 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1075 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1067 if (np->frag_size < mtu) { 1076 if (np->frag_size < mtu) {
1068 if (np->frag_size) 1077 if (np->frag_size)
1069 mtu = np->frag_size; 1078 mtu = np->frag_size;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index da930fa089c9..aa3d07c52a8f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -694,7 +694,7 @@ done:
694 retv = ip6_ra_control(sk, val, NULL); 694 retv = ip6_ra_control(sk, val, NULL);
695 break; 695 break;
696 case IPV6_MTU_DISCOVER: 696 case IPV6_MTU_DISCOVER:
697 if (val<0 || val>2) 697 if (val<0 || val>3)
698 goto e_inval; 698 goto e_inval;
699 np->pmtudisc = val; 699 np->pmtudisc = val;
700 retv = 0; 700 retv = 0;