aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h6
-rw-r--r--include/net/sock.h1
-rw-r--r--net/ipv4/tcp_ipv4.c19
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv6/tcp_ipv6.c40
5 files changed, 51 insertions, 21 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2761856987b2..eb125a4c30b3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -493,6 +493,9 @@ struct tcp_sock {
493 u32 probe_seq_start; 493 u32 probe_seq_start;
494 u32 probe_seq_end; 494 u32 probe_seq_end;
495 } mtu_probe; 495 } mtu_probe;
496 u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
497 * while socket was owned by user.
498 */
496 499
497#ifdef CONFIG_TCP_MD5SIG 500#ifdef CONFIG_TCP_MD5SIG
498/* TCP AF-Specific parts; only used by MD5 Signature support so far */ 501/* TCP AF-Specific parts; only used by MD5 Signature support so far */
@@ -518,6 +521,9 @@ enum tsq_flags {
518 TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ 521 TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */
519 TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ 522 TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */
520 TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ 523 TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */
524 TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call
525 * tcp_v{4|6}_mtu_reduced()
526 */
521}; 527};
522 528
523static inline struct tcp_sock *tcp_sk(const struct sock *sk) 529static inline struct tcp_sock *tcp_sk(const struct sock *sk)
diff --git a/include/net/sock.h b/include/net/sock.h
index 88de092df50f..e067f8c18f88 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -859,6 +859,7 @@ struct proto {
859 struct sk_buff *skb); 859 struct sk_buff *skb);
860 860
861 void (*release_cb)(struct sock *sk); 861 void (*release_cb)(struct sock *sk);
862 void (*mtu_reduced)(struct sock *sk);
862 863
863 /* Keeping track of sk's, looking them up, and port selection methods. */ 864 /* Keeping track of sk's, looking them up, and port selection methods. */
864 void (*hash)(struct sock *sk); 865 void (*hash)(struct sock *sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 59110caeb074..bc5432e3c778 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -275,12 +275,15 @@ failure:
275EXPORT_SYMBOL(tcp_v4_connect); 275EXPORT_SYMBOL(tcp_v4_connect);
276 276
277/* 277/*
278 * This routine does path mtu discovery as defined in RFC1191. 278 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
279 * It can be called through tcp_release_cb() if socket was owned by user
280 * at the time tcp_v4_err() was called to handle ICMP message.
279 */ 281 */
280static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) 282static void tcp_v4_mtu_reduced(struct sock *sk)
281{ 283{
282 struct dst_entry *dst; 284 struct dst_entry *dst;
283 struct inet_sock *inet = inet_sk(sk); 285 struct inet_sock *inet = inet_sk(sk);
286 u32 mtu = tcp_sk(sk)->mtu_info;
284 287
285 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 288 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
286 * send out by Linux are always <576bytes so they should go through 289 * send out by Linux are always <576bytes so they should go through
@@ -373,8 +376,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
373 bh_lock_sock(sk); 376 bh_lock_sock(sk);
374 /* If too many ICMPs get dropped on busy 377 /* If too many ICMPs get dropped on busy
375 * servers this needs to be solved differently. 378 * servers this needs to be solved differently.
379 * We do take care of PMTU discovery (RFC1191) special case :
380 * we can receive locally generated ICMP messages while socket is held.
376 */ 381 */
377 if (sock_owned_by_user(sk)) 382 if (sock_owned_by_user(sk) &&
383 type != ICMP_DEST_UNREACH &&
384 code != ICMP_FRAG_NEEDED)
378 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
379 386
380 if (sk->sk_state == TCP_CLOSE) 387 if (sk->sk_state == TCP_CLOSE)
@@ -409,8 +416,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
409 goto out; 416 goto out;
410 417
411 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 418 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
419 tp->mtu_info = info;
412 if (!sock_owned_by_user(sk)) 420 if (!sock_owned_by_user(sk))
413 do_pmtu_discovery(sk, iph, info); 421 tcp_v4_mtu_reduced(sk);
422 else
423 set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
414 goto out; 424 goto out;
415 } 425 }
416 426
@@ -2596,6 +2606,7 @@ struct proto tcp_prot = {
2596 .sendpage = tcp_sendpage, 2606 .sendpage = tcp_sendpage,
2597 .backlog_rcv = tcp_v4_do_rcv, 2607 .backlog_rcv = tcp_v4_do_rcv,
2598 .release_cb = tcp_release_cb, 2608 .release_cb = tcp_release_cb,
2609 .mtu_reduced = tcp_v4_mtu_reduced,
2599 .hash = inet_hash, 2610 .hash = inet_hash,
2600 .unhash = inet_unhash, 2611 .unhash = inet_unhash,
2601 .get_port = inet_csk_get_port, 2612 .get_port = inet_csk_get_port,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 950aebfd9967..33cd065cfbd8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -885,7 +885,8 @@ static void tcp_tasklet_func(unsigned long data)
885 885
886#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ 886#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
887 (1UL << TCP_WRITE_TIMER_DEFERRED) | \ 887 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
888 (1UL << TCP_DELACK_TIMER_DEFERRED)) 888 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
889 (1UL << TCP_MTU_REDUCED_DEFERRED))
889/** 890/**
890 * tcp_release_cb - tcp release_sock() callback 891 * tcp_release_cb - tcp release_sock() callback
891 * @sk: socket 892 * @sk: socket
@@ -914,6 +915,9 @@ void tcp_release_cb(struct sock *sk)
914 915
915 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) 916 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
916 tcp_delack_timer_handler(sk); 917 tcp_delack_timer_handler(sk);
918
919 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED))
920 sk->sk_prot->mtu_reduced(sk);
917} 921}
918EXPORT_SYMBOL(tcp_release_cb); 922EXPORT_SYMBOL(tcp_release_cb);
919 923
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0302ec3fecfc..f49476e2d884 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -315,6 +315,23 @@ failure:
315 return err; 315 return err;
316} 316}
317 317
318static void tcp_v6_mtu_reduced(struct sock *sk)
319{
320 struct dst_entry *dst;
321
322 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
323 return;
324
325 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
326 if (!dst)
327 return;
328
329 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
330 tcp_sync_mss(sk, dst_mtu(dst));
331 tcp_simple_retransmit(sk);
332 }
333}
334
318static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 335static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
319 u8 type, u8 code, int offset, __be32 info) 336 u8 type, u8 code, int offset, __be32 info)
320{ 337{
@@ -342,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
342 } 359 }
343 360
344 bh_lock_sock(sk); 361 bh_lock_sock(sk);
345 if (sock_owned_by_user(sk)) 362 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
346 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 363 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
347 364
348 if (sk->sk_state == TCP_CLOSE) 365 if (sk->sk_state == TCP_CLOSE)
@@ -371,21 +388,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
371 } 388 }
372 389
373 if (type == ICMPV6_PKT_TOOBIG) { 390 if (type == ICMPV6_PKT_TOOBIG) {
374 struct dst_entry *dst; 391 tp->mtu_info = ntohl(info);
375 392 if (!sock_owned_by_user(sk))
376 if (sock_owned_by_user(sk)) 393 tcp_v6_mtu_reduced(sk);
377 goto out; 394 else
378 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 395 set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
379 goto out;
380
381 dst = inet6_csk_update_pmtu(sk, ntohl(info));
382 if (!dst)
383 goto out;
384
385 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
386 tcp_sync_mss(sk, dst_mtu(dst));
387 tcp_simple_retransmit(sk);
388 }
389 goto out; 396 goto out;
390 } 397 }
391 398
@@ -1949,6 +1956,7 @@ struct proto tcpv6_prot = {
1949 .sendpage = tcp_sendpage, 1956 .sendpage = tcp_sendpage,
1950 .backlog_rcv = tcp_v6_do_rcv, 1957 .backlog_rcv = tcp_v6_do_rcv,
1951 .release_cb = tcp_release_cb, 1958 .release_cb = tcp_release_cb,
1959 .mtu_reduced = tcp_v6_mtu_reduced,
1952 .hash = tcp_v6_hash, 1960 .hash = tcp_v6_hash,
1953 .unhash = inet_unhash, 1961 .unhash = inet_unhash,
1954 .get_port = inet_csk_get_port, 1962 .get_port = inet_csk_get_port,