diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 85 |
1 files changed, 65 insertions, 20 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c4638e6f0238..97c8f5620c43 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | |||
285 | 285 | ||
286 | int sysctl_tcp_min_tso_segs __read_mostly = 2; | 286 | int sysctl_tcp_min_tso_segs __read_mostly = 2; |
287 | 287 | ||
288 | int sysctl_tcp_autocorking __read_mostly = 1; | ||
289 | |||
288 | struct percpu_counter tcp_orphan_count; | 290 | struct percpu_counter tcp_orphan_count; |
289 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 291 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
290 | 292 | ||
@@ -379,7 +381,7 @@ void tcp_init_sock(struct sock *sk) | |||
379 | struct inet_connection_sock *icsk = inet_csk(sk); | 381 | struct inet_connection_sock *icsk = inet_csk(sk); |
380 | struct tcp_sock *tp = tcp_sk(sk); | 382 | struct tcp_sock *tp = tcp_sk(sk); |
381 | 383 | ||
382 | skb_queue_head_init(&tp->out_of_order_queue); | 384 | __skb_queue_head_init(&tp->out_of_order_queue); |
383 | tcp_init_xmit_timers(sk); | 385 | tcp_init_xmit_timers(sk); |
384 | tcp_prequeue_init(tp); | 386 | tcp_prequeue_init(tp); |
385 | INIT_LIST_HEAD(&tp->tsq_node); | 387 | INIT_LIST_HEAD(&tp->tsq_node); |
@@ -619,19 +621,58 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) | |||
619 | tp->snd_up = tp->write_seq; | 621 | tp->snd_up = tp->write_seq; |
620 | } | 622 | } |
621 | 623 | ||
622 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, | 624 | /* If a not yet filled skb is pushed, do not send it if |
623 | int nonagle) | 625 | * we have data packets in Qdisc or NIC queues : |
626 | * Because TX completion will happen shortly, it gives a chance | ||
627 | * to coalesce future sendmsg() payload into this skb, without | ||
628 | * need for a timer, and with no latency trade off. | ||
629 | * As packets containing data payload have a bigger truesize | ||
630 | * than pure acks (dataless) packets, the last checks prevent | ||
631 | * autocorking if we only have an ACK in Qdisc/NIC queues, | ||
632 | * or if TX completion was delayed after we processed ACK packet. | ||
633 | */ | ||
634 | static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, | ||
635 | int size_goal) | ||
624 | { | 636 | { |
625 | if (tcp_send_head(sk)) { | 637 | return skb->len < size_goal && |
626 | struct tcp_sock *tp = tcp_sk(sk); | 638 | sysctl_tcp_autocorking && |
639 | skb != tcp_write_queue_head(sk) && | ||
640 | atomic_read(&sk->sk_wmem_alloc) > skb->truesize; | ||
641 | } | ||
642 | |||
643 | static void tcp_push(struct sock *sk, int flags, int mss_now, | ||
644 | int nonagle, int size_goal) | ||
645 | { | ||
646 | struct tcp_sock *tp = tcp_sk(sk); | ||
647 | struct sk_buff *skb; | ||
627 | 648 | ||
628 | if (!(flags & MSG_MORE) || forced_push(tp)) | 649 | if (!tcp_send_head(sk)) |
629 | tcp_mark_push(tp, tcp_write_queue_tail(sk)); | 650 | return; |
651 | |||
652 | skb = tcp_write_queue_tail(sk); | ||
653 | if (!(flags & MSG_MORE) || forced_push(tp)) | ||
654 | tcp_mark_push(tp, skb); | ||
655 | |||
656 | tcp_mark_urg(tp, flags); | ||
657 | |||
658 | if (tcp_should_autocork(sk, skb, size_goal)) { | ||
630 | 659 | ||
631 | tcp_mark_urg(tp, flags); | 660 | /* avoid atomic op if TSQ_THROTTLED bit is already set */ |
632 | __tcp_push_pending_frames(sk, mss_now, | 661 | if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { |
633 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); | 662 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); |
663 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | ||
664 | } | ||
665 | /* It is possible TX completion already happened | ||
666 | * before we set TSQ_THROTTLED. | ||
667 | */ | ||
668 | if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) | ||
669 | return; | ||
634 | } | 670 | } |
671 | |||
672 | if (flags & MSG_MORE) | ||
673 | nonagle = TCP_NAGLE_CORK; | ||
674 | |||
675 | __tcp_push_pending_frames(sk, mss_now, nonagle); | ||
635 | } | 676 | } |
636 | 677 | ||
637 | static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, | 678 | static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, |
@@ -934,7 +975,8 @@ new_segment: | |||
934 | wait_for_sndbuf: | 975 | wait_for_sndbuf: |
935 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 976 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
936 | wait_for_memory: | 977 | wait_for_memory: |
937 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | 978 | tcp_push(sk, flags & ~MSG_MORE, mss_now, |
979 | TCP_NAGLE_PUSH, size_goal); | ||
938 | 980 | ||
939 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 981 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
940 | goto do_error; | 982 | goto do_error; |
@@ -944,7 +986,7 @@ wait_for_memory: | |||
944 | 986 | ||
945 | out: | 987 | out: |
946 | if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) | 988 | if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) |
947 | tcp_push(sk, flags, mss_now, tp->nonagle); | 989 | tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); |
948 | return copied; | 990 | return copied; |
949 | 991 | ||
950 | do_error: | 992 | do_error: |
@@ -1002,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) | |||
1002 | } | 1044 | } |
1003 | } | 1045 | } |
1004 | 1046 | ||
1005 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | 1047 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, |
1048 | int *copied, size_t size) | ||
1006 | { | 1049 | { |
1007 | struct tcp_sock *tp = tcp_sk(sk); | 1050 | struct tcp_sock *tp = tcp_sk(sk); |
1008 | int err, flags; | 1051 | int err, flags; |
@@ -1017,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | |||
1017 | if (unlikely(tp->fastopen_req == NULL)) | 1060 | if (unlikely(tp->fastopen_req == NULL)) |
1018 | return -ENOBUFS; | 1061 | return -ENOBUFS; |
1019 | tp->fastopen_req->data = msg; | 1062 | tp->fastopen_req->data = msg; |
1063 | tp->fastopen_req->size = size; | ||
1020 | 1064 | ||
1021 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; | 1065 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; |
1022 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, | 1066 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, |
1023 | msg->msg_namelen, flags); | 1067 | msg->msg_namelen, flags); |
1024 | *size = tp->fastopen_req->copied; | 1068 | *copied = tp->fastopen_req->copied; |
1025 | tcp_free_fastopen_req(tp); | 1069 | tcp_free_fastopen_req(tp); |
1026 | return err; | 1070 | return err; |
1027 | } | 1071 | } |
@@ -1041,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1041 | 1085 | ||
1042 | flags = msg->msg_flags; | 1086 | flags = msg->msg_flags; |
1043 | if (flags & MSG_FASTOPEN) { | 1087 | if (flags & MSG_FASTOPEN) { |
1044 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); | 1088 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); |
1045 | if (err == -EINPROGRESS && copied_syn > 0) | 1089 | if (err == -EINPROGRESS && copied_syn > 0) |
1046 | goto out; | 1090 | goto out; |
1047 | else if (err) | 1091 | else if (err) |
@@ -1225,7 +1269,8 @@ wait_for_sndbuf: | |||
1225 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 1269 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
1226 | wait_for_memory: | 1270 | wait_for_memory: |
1227 | if (copied) | 1271 | if (copied) |
1228 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | 1272 | tcp_push(sk, flags & ~MSG_MORE, mss_now, |
1273 | TCP_NAGLE_PUSH, size_goal); | ||
1229 | 1274 | ||
1230 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 1275 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
1231 | goto do_error; | 1276 | goto do_error; |
@@ -1236,7 +1281,7 @@ wait_for_memory: | |||
1236 | 1281 | ||
1237 | out: | 1282 | out: |
1238 | if (copied) | 1283 | if (copied) |
1239 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1284 | tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); |
1240 | release_sock(sk); | 1285 | release_sock(sk); |
1241 | return copied + copied_syn; | 1286 | return copied + copied_syn; |
1242 | 1287 | ||
@@ -1623,11 +1668,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1623 | (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && | 1668 | (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && |
1624 | !sysctl_tcp_low_latency && | 1669 | !sysctl_tcp_low_latency && |
1625 | net_dma_find_channel()) { | 1670 | net_dma_find_channel()) { |
1626 | preempt_enable_no_resched(); | 1671 | preempt_enable(); |
1627 | tp->ucopy.pinned_list = | 1672 | tp->ucopy.pinned_list = |
1628 | dma_pin_iovec_pages(msg->msg_iov, len); | 1673 | dma_pin_iovec_pages(msg->msg_iov, len); |
1629 | } else { | 1674 | } else { |
1630 | preempt_enable_no_resched(); | 1675 | preempt_enable(); |
1631 | } | 1676 | } |
1632 | } | 1677 | } |
1633 | #endif | 1678 | #endif |
@@ -2186,7 +2231,7 @@ adjudge_to_death: | |||
2186 | /* This is a (useful) BSD violating of the RFC. There is a | 2231 | /* This is a (useful) BSD violating of the RFC. There is a |
2187 | * problem with TCP as specified in that the other end could | 2232 | * problem with TCP as specified in that the other end could |
2188 | * keep a socket open forever with no application left this end. | 2233 | * keep a socket open forever with no application left this end. |
2189 | * We use a 3 minute timeout (about the same as BSD) then kill | 2234 | * We use a 1 minute timeout (about the same as BSD) then kill |
2190 | * our end. If they send after that then tough - BUT: long enough | 2235 | * our end. If they send after that then tough - BUT: long enough |
2191 | * that we won't make the old 4*rto = almost no time - whoops | 2236 | * that we won't make the old 4*rto = almost no time - whoops |
2192 | * reset mistake. | 2237 | * reset mistake. |