aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/tcp_ipv6.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-07-23 03:48:52 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-23 03:58:46 -0400
commit563d34d05786263893ba4a1042eb9b9374127cf5 (patch)
treee9ce502c1f32bea966c81d5597d0a29eb4b9d244 /net/ipv6/tcp_ipv6.c
parentc3def943c7117d42caaed3478731ea7c3c87190e (diff)
tcp: dont drop MTU reduction indications
ICMP messages generated in output path if frame length is bigger than mtu are actually lost because socket is owned by user (doing the xmit) One example is the ipgre_tunnel_xmit() calling icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); We had a similar case fixed in commit a34a101e1e6 (ipv6: disable GSO on sockets hitting dst_allfrag). Problem of such fix is that it relied on retransmit timers, so short tcp sessions paid a too big latency increase price. This patch uses the tcp_release_cb() infrastructure so that MTU reduction messages (ICMP messages) are not lost, and no extra delay is added in TCP transmits. Reported-by: Maciej Żenczykowski <maze@google.com> Diagnosed-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Nandita Dukkipati <nanditad@google.com> Cc: Tom Herbert <therbert@google.com> Cc: Tore Anderson <tore@fud.no> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
-rw-r--r--net/ipv6/tcp_ipv6.c40
1 files changed, 24 insertions, 16 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0302ec3fecfc..f49476e2d884 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -315,6 +315,23 @@ failure:
315 return err; 315 return err;
316} 316}
317 317
318static void tcp_v6_mtu_reduced(struct sock *sk)
319{
320 struct dst_entry *dst;
321
322 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
323 return;
324
325 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
326 if (!dst)
327 return;
328
329 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
330 tcp_sync_mss(sk, dst_mtu(dst));
331 tcp_simple_retransmit(sk);
332 }
333}
334
318static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 335static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
319 u8 type, u8 code, int offset, __be32 info) 336 u8 type, u8 code, int offset, __be32 info)
320{ 337{
@@ -342,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
342 } 359 }
343 360
344 bh_lock_sock(sk); 361 bh_lock_sock(sk);
345 if (sock_owned_by_user(sk)) 362 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
346 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 363 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
347 364
348 if (sk->sk_state == TCP_CLOSE) 365 if (sk->sk_state == TCP_CLOSE)
@@ -371,21 +388,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
371 } 388 }
372 389
373 if (type == ICMPV6_PKT_TOOBIG) { 390 if (type == ICMPV6_PKT_TOOBIG) {
374 struct dst_entry *dst; 391 tp->mtu_info = ntohl(info);
375 392 if (!sock_owned_by_user(sk))
376 if (sock_owned_by_user(sk)) 393 tcp_v6_mtu_reduced(sk);
377 goto out; 394 else
378 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 395 set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
379 goto out;
380
381 dst = inet6_csk_update_pmtu(sk, ntohl(info));
382 if (!dst)
383 goto out;
384
385 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
386 tcp_sync_mss(sk, dst_mtu(dst));
387 tcp_simple_retransmit(sk);
388 }
389 goto out; 396 goto out;
390 } 397 }
391 398
@@ -1949,6 +1956,7 @@ struct proto tcpv6_prot = {
1949 .sendpage = tcp_sendpage, 1956 .sendpage = tcp_sendpage,
1950 .backlog_rcv = tcp_v6_do_rcv, 1957 .backlog_rcv = tcp_v6_do_rcv,
1951 .release_cb = tcp_release_cb, 1958 .release_cb = tcp_release_cb,
1959 .mtu_reduced = tcp_v6_mtu_reduced,
1952 .hash = tcp_v6_hash, 1960 .hash = tcp_v6_hash,
1953 .unhash = inet_unhash, 1961 .unhash = inet_unhash,
1954 .get_port = inet_csk_get_port, 1962 .get_port = inet_csk_get_port,