diff options
-rw-r--r-- | include/linux/tcp.h | 6 | ||||
-rw-r--r-- | include/net/sock.h | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 6 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 40 |
5 files changed, 51 insertions, 21 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2761856987b2..eb125a4c30b3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -493,6 +493,9 @@ struct tcp_sock { | |||
493 | u32 probe_seq_start; | 493 | u32 probe_seq_start; |
494 | u32 probe_seq_end; | 494 | u32 probe_seq_end; |
495 | } mtu_probe; | 495 | } mtu_probe; |
496 | u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG | ||
497 | * while socket was owned by user. | ||
498 | */ | ||
496 | 499 | ||
497 | #ifdef CONFIG_TCP_MD5SIG | 500 | #ifdef CONFIG_TCP_MD5SIG |
498 | /* TCP AF-Specific parts; only used by MD5 Signature support so far */ | 501 | /* TCP AF-Specific parts; only used by MD5 Signature support so far */ |
@@ -518,6 +521,9 @@ enum tsq_flags { | |||
518 | TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ | 521 | TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ |
519 | TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ | 522 | TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ |
520 | TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ | 523 | TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ |
524 | TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call | ||
525 | * tcp_v{4|6}_mtu_reduced() | ||
526 | */ | ||
521 | }; | 527 | }; |
522 | 528 | ||
523 | static inline struct tcp_sock *tcp_sk(const struct sock *sk) | 529 | static inline struct tcp_sock *tcp_sk(const struct sock *sk) |
diff --git a/include/net/sock.h b/include/net/sock.h index 88de092df50f..e067f8c18f88 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -859,6 +859,7 @@ struct proto { | |||
859 | struct sk_buff *skb); | 859 | struct sk_buff *skb); |
860 | 860 | ||
861 | void (*release_cb)(struct sock *sk); | 861 | void (*release_cb)(struct sock *sk); |
862 | void (*mtu_reduced)(struct sock *sk); | ||
862 | 863 | ||
863 | /* Keeping track of sk's, looking them up, and port selection methods. */ | 864 | /* Keeping track of sk's, looking them up, and port selection methods. */ |
864 | void (*hash)(struct sock *sk); | 865 | void (*hash)(struct sock *sk); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59110caeb074..bc5432e3c778 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -275,12 +275,15 @@ failure: | |||
275 | EXPORT_SYMBOL(tcp_v4_connect); | 275 | EXPORT_SYMBOL(tcp_v4_connect); |
276 | 276 | ||
277 | /* | 277 | /* |
278 | * This routine does path mtu discovery as defined in RFC1191. | 278 | * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. |
279 | * It can be called through tcp_release_cb() if socket was owned by user | ||
280 | * at the time tcp_v4_err() was called to handle ICMP message. | ||
279 | */ | 281 | */ |
280 | static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) | 282 | static void tcp_v4_mtu_reduced(struct sock *sk) |
281 | { | 283 | { |
282 | struct dst_entry *dst; | 284 | struct dst_entry *dst; |
283 | struct inet_sock *inet = inet_sk(sk); | 285 | struct inet_sock *inet = inet_sk(sk); |
286 | u32 mtu = tcp_sk(sk)->mtu_info; | ||
284 | 287 | ||
285 | /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs | 288 | /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs |
286 | * send out by Linux are always <576bytes so they should go through | 289 | * send out by Linux are always <576bytes so they should go through |
@@ -373,8 +376,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
373 | bh_lock_sock(sk); | 376 | bh_lock_sock(sk); |
374 | /* If too many ICMPs get dropped on busy | 377 | /* If too many ICMPs get dropped on busy |
375 | * servers this needs to be solved differently. | 378 | * servers this needs to be solved differently. |
379 | * We do take care of PMTU discovery (RFC1191) special case : | ||
380 | * we can receive locally generated ICMP messages while socket is held. | ||
376 | */ | 381 | */ |
377 | if (sock_owned_by_user(sk)) | 382 | if (sock_owned_by_user(sk) && |
383 | type != ICMP_DEST_UNREACH && | ||
384 | code != ICMP_FRAG_NEEDED) | ||
378 | NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); | 385 | NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); |
379 | 386 | ||
380 | if (sk->sk_state == TCP_CLOSE) | 387 | if (sk->sk_state == TCP_CLOSE) |
@@ -409,8 +416,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
409 | goto out; | 416 | goto out; |
410 | 417 | ||
411 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ | 418 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ |
419 | tp->mtu_info = info; | ||
412 | if (!sock_owned_by_user(sk)) | 420 | if (!sock_owned_by_user(sk)) |
413 | do_pmtu_discovery(sk, iph, info); | 421 | tcp_v4_mtu_reduced(sk); |
422 | else | ||
423 | set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); | ||
414 | goto out; | 424 | goto out; |
415 | } | 425 | } |
416 | 426 | ||
@@ -2596,6 +2606,7 @@ struct proto tcp_prot = { | |||
2596 | .sendpage = tcp_sendpage, | 2606 | .sendpage = tcp_sendpage, |
2597 | .backlog_rcv = tcp_v4_do_rcv, | 2607 | .backlog_rcv = tcp_v4_do_rcv, |
2598 | .release_cb = tcp_release_cb, | 2608 | .release_cb = tcp_release_cb, |
2609 | .mtu_reduced = tcp_v4_mtu_reduced, | ||
2599 | .hash = inet_hash, | 2610 | .hash = inet_hash, |
2600 | .unhash = inet_unhash, | 2611 | .unhash = inet_unhash, |
2601 | .get_port = inet_csk_get_port, | 2612 | .get_port = inet_csk_get_port, |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 950aebfd9967..33cd065cfbd8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -885,7 +885,8 @@ static void tcp_tasklet_func(unsigned long data) | |||
885 | 885 | ||
886 | #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ | 886 | #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ |
887 | (1UL << TCP_WRITE_TIMER_DEFERRED) | \ | 887 | (1UL << TCP_WRITE_TIMER_DEFERRED) | \ |
888 | (1UL << TCP_DELACK_TIMER_DEFERRED)) | 888 | (1UL << TCP_DELACK_TIMER_DEFERRED) | \ |
889 | (1UL << TCP_MTU_REDUCED_DEFERRED)) | ||
889 | /** | 890 | /** |
890 | * tcp_release_cb - tcp release_sock() callback | 891 | * tcp_release_cb - tcp release_sock() callback |
891 | * @sk: socket | 892 | * @sk: socket |
@@ -914,6 +915,9 @@ void tcp_release_cb(struct sock *sk) | |||
914 | 915 | ||
915 | if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) | 916 | if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) |
916 | tcp_delack_timer_handler(sk); | 917 | tcp_delack_timer_handler(sk); |
918 | |||
919 | if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) | ||
920 | sk->sk_prot->mtu_reduced(sk); | ||
917 | } | 921 | } |
918 | EXPORT_SYMBOL(tcp_release_cb); | 922 | EXPORT_SYMBOL(tcp_release_cb); |
919 | 923 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0302ec3fecfc..f49476e2d884 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -315,6 +315,23 @@ failure: | |||
315 | return err; | 315 | return err; |
316 | } | 316 | } |
317 | 317 | ||
318 | static void tcp_v6_mtu_reduced(struct sock *sk) | ||
319 | { | ||
320 | struct dst_entry *dst; | ||
321 | |||
322 | if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) | ||
323 | return; | ||
324 | |||
325 | dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info); | ||
326 | if (!dst) | ||
327 | return; | ||
328 | |||
329 | if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { | ||
330 | tcp_sync_mss(sk, dst_mtu(dst)); | ||
331 | tcp_simple_retransmit(sk); | ||
332 | } | ||
333 | } | ||
334 | |||
318 | static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | 335 | static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |
319 | u8 type, u8 code, int offset, __be32 info) | 336 | u8 type, u8 code, int offset, __be32 info) |
320 | { | 337 | { |
@@ -342,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
342 | } | 359 | } |
343 | 360 | ||
344 | bh_lock_sock(sk); | 361 | bh_lock_sock(sk); |
345 | if (sock_owned_by_user(sk)) | 362 | if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) |
346 | NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); | 363 | NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); |
347 | 364 | ||
348 | if (sk->sk_state == TCP_CLOSE) | 365 | if (sk->sk_state == TCP_CLOSE) |
@@ -371,21 +388,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
371 | } | 388 | } |
372 | 389 | ||
373 | if (type == ICMPV6_PKT_TOOBIG) { | 390 | if (type == ICMPV6_PKT_TOOBIG) { |
374 | struct dst_entry *dst; | 391 | tp->mtu_info = ntohl(info); |
375 | 392 | if (!sock_owned_by_user(sk)) | |
376 | if (sock_owned_by_user(sk)) | 393 | tcp_v6_mtu_reduced(sk); |
377 | goto out; | 394 | else |
378 | if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) | 395 | set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); |
379 | goto out; | ||
380 | |||
381 | dst = inet6_csk_update_pmtu(sk, ntohl(info)); | ||
382 | if (!dst) | ||
383 | goto out; | ||
384 | |||
385 | if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { | ||
386 | tcp_sync_mss(sk, dst_mtu(dst)); | ||
387 | tcp_simple_retransmit(sk); | ||
388 | } | ||
389 | goto out; | 396 | goto out; |
390 | } | 397 | } |
391 | 398 | ||
@@ -1949,6 +1956,7 @@ struct proto tcpv6_prot = { | |||
1949 | .sendpage = tcp_sendpage, | 1956 | .sendpage = tcp_sendpage, |
1950 | .backlog_rcv = tcp_v6_do_rcv, | 1957 | .backlog_rcv = tcp_v6_do_rcv, |
1951 | .release_cb = tcp_release_cb, | 1958 | .release_cb = tcp_release_cb, |
1959 | .mtu_reduced = tcp_v6_mtu_reduced, | ||
1952 | .hash = tcp_v6_hash, | 1960 | .hash = tcp_v6_hash, |
1953 | .unhash = inet_unhash, | 1961 | .unhash = inet_unhash, |
1954 | .get_port = inet_csk_get_port, | 1962 | .get_port = inet_csk_get_port, |