aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-04-24 03:37:38 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-27 00:03:34 -0400
commit67469601406c12ced3db9956aeb0ef0854e2952f (patch)
treea25902d49946c394ebf110265179da2dc5b7d827 /net/ipv4
parenta85c9bb895aed633615078f69f4a4bce9e39be5f (diff)
ipv6: RTAX_FEATURE_ALLFRAG causes inefficient TCP segment sizing
Quoting Tore Anderson from : https://bugzilla.kernel.org/show_bug.cgi?id=42572 When RTAX_FEATURE_ALLFRAG is set on a route, the effective TCP segment size does not take into account the size of the IPv6 Fragmentation header that needs to be included in outbound packets, causing every transmitted TCP segment to be fragmented across two IPv6 packets, the latter of which will only contain 8 bytes of actual payload. RTAX_FEATURE_ALLFRAG is typically set on a route in response to receving a ICMPv6 Packet Too Big message indicating a Path MTU of less than 1280 bytes. 1280 bytes is the minimum IPv6 MTU, however ICMPv6 PTBs with MTU < 1280 are still valid, in particular when an IPv6 packet is sent to an IPv4 destination through a stateless translator. Any ICMPv4 Need To Fragment packets originated from the IPv4 part of the path will be translated to ICMPv6 PTB which may then indicate an MTU of less than 1280. The Linux kernel refuses to reduce the effective MTU to anything below 1280 bytes, instead it sets it to exactly 1280 bytes, and RTAX_FEATURE_ALLFRAG is also set. However, the TCP segment size appears to be set to 1240 bytes (1280 Path MTU - 40 bytes of IPv6 header), instead of 1232 (additionally taking into account the 8 bytes required by the IPv6 Fragmentation extension header). This in turn results in rather inefficient transmission, as every transmitted TCP segment now is split in two fragments containing 1232+8 bytes of payload. After this patch, all the outgoing packets that includes a Fragmentation header all are "atomic" or "non-fragmented" fragments, i.e., they both have Offset=0 and More Fragments=0. With help from David S. Miller Reported-by: Tore Anderson <tore@fud.no> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Maciej Żenczykowski <maze@google.com> Cc: Tom Herbert <therbert@google.com> Tested-by: Tore Anderson <tore@fud.no> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_output.c19
1 files changed, 17 insertions, 2 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7b7cf3811348..834e89fc541b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1150,7 +1150,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1150} 1150}
1151 1151
1152/* Calculate MSS. Not accounting for SACKs here. */ 1152/* Calculate MSS. Not accounting for SACKs here. */
1153int tcp_mtu_to_mss(const struct sock *sk, int pmtu) 1153int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1154{ 1154{
1155 const struct tcp_sock *tp = tcp_sk(sk); 1155 const struct tcp_sock *tp = tcp_sk(sk);
1156 const struct inet_connection_sock *icsk = inet_csk(sk); 1156 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1161,6 +1161,14 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
1161 */ 1161 */
1162 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); 1162 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1163 1163
1164 /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
1165 if (icsk->icsk_af_ops->net_frag_header_len) {
1166 const struct dst_entry *dst = __sk_dst_get(sk);
1167
1168 if (dst && dst_allfrag(dst))
1169 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1170 }
1171
1164 /* Clamp it (mss_clamp does not include tcp options) */ 1172 /* Clamp it (mss_clamp does not include tcp options) */
1165 if (mss_now > tp->rx_opt.mss_clamp) 1173 if (mss_now > tp->rx_opt.mss_clamp)
1166 mss_now = tp->rx_opt.mss_clamp; 1174 mss_now = tp->rx_opt.mss_clamp;
@@ -1179,7 +1187,7 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
1179} 1187}
1180 1188
1181/* Inverse of above */ 1189/* Inverse of above */
1182int tcp_mss_to_mtu(const struct sock *sk, int mss) 1190int tcp_mss_to_mtu(struct sock *sk, int mss)
1183{ 1191{
1184 const struct tcp_sock *tp = tcp_sk(sk); 1192 const struct tcp_sock *tp = tcp_sk(sk);
1185 const struct inet_connection_sock *icsk = inet_csk(sk); 1193 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1190,6 +1198,13 @@ int tcp_mss_to_mtu(const struct sock *sk, int mss)
1190 icsk->icsk_ext_hdr_len + 1198 icsk->icsk_ext_hdr_len +
1191 icsk->icsk_af_ops->net_header_len; 1199 icsk->icsk_af_ops->net_header_len;
1192 1200
1201 /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
1202 if (icsk->icsk_af_ops->net_frag_header_len) {
1203 const struct dst_entry *dst = __sk_dst_get(sk);
1204
1205 if (dst && dst_allfrag(dst))
1206 mtu += icsk->icsk_af_ops->net_frag_header_len;
1207 }
1193 return mtu; 1208 return mtu;
1194} 1209}
1195 1210