aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c85
1 files changed, 65 insertions, 20 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c4638e6f0238..97c8f5620c43 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
285 285
286int sysctl_tcp_min_tso_segs __read_mostly = 2; 286int sysctl_tcp_min_tso_segs __read_mostly = 2;
287 287
288int sysctl_tcp_autocorking __read_mostly = 1;
289
288struct percpu_counter tcp_orphan_count; 290struct percpu_counter tcp_orphan_count;
289EXPORT_SYMBOL_GPL(tcp_orphan_count); 291EXPORT_SYMBOL_GPL(tcp_orphan_count);
290 292
@@ -379,7 +381,7 @@ void tcp_init_sock(struct sock *sk)
379 struct inet_connection_sock *icsk = inet_csk(sk); 381 struct inet_connection_sock *icsk = inet_csk(sk);
380 struct tcp_sock *tp = tcp_sk(sk); 382 struct tcp_sock *tp = tcp_sk(sk);
381 383
382 skb_queue_head_init(&tp->out_of_order_queue); 384 __skb_queue_head_init(&tp->out_of_order_queue);
383 tcp_init_xmit_timers(sk); 385 tcp_init_xmit_timers(sk);
384 tcp_prequeue_init(tp); 386 tcp_prequeue_init(tp);
385 INIT_LIST_HEAD(&tp->tsq_node); 387 INIT_LIST_HEAD(&tp->tsq_node);
@@ -619,19 +621,58 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
619 tp->snd_up = tp->write_seq; 621 tp->snd_up = tp->write_seq;
620} 622}
621 623
622static inline void tcp_push(struct sock *sk, int flags, int mss_now, 624/* If a not yet filled skb is pushed, do not send it if
623 int nonagle) 625 * we have data packets in Qdisc or NIC queues :
626 * Because TX completion will happen shortly, it gives a chance
627 * to coalesce future sendmsg() payload into this skb, without
628 * need for a timer, and with no latency trade off.
629 * As packets containing data payload have a bigger truesize
630 * than pure acks (dataless) packets, the last checks prevent
631 * autocorking if we only have an ACK in Qdisc/NIC queues,
632 * or if TX completion was delayed after we processed ACK packet.
633 */
634static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
635 int size_goal)
624{ 636{
625 if (tcp_send_head(sk)) { 637 return skb->len < size_goal &&
626 struct tcp_sock *tp = tcp_sk(sk); 638 sysctl_tcp_autocorking &&
639 skb != tcp_write_queue_head(sk) &&
640 atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
641}
642
643static void tcp_push(struct sock *sk, int flags, int mss_now,
644 int nonagle, int size_goal)
645{
646 struct tcp_sock *tp = tcp_sk(sk);
647 struct sk_buff *skb;
627 648
628 if (!(flags & MSG_MORE) || forced_push(tp)) 649 if (!tcp_send_head(sk))
629 tcp_mark_push(tp, tcp_write_queue_tail(sk)); 650 return;
651
652 skb = tcp_write_queue_tail(sk);
653 if (!(flags & MSG_MORE) || forced_push(tp))
654 tcp_mark_push(tp, skb);
655
656 tcp_mark_urg(tp, flags);
657
658 if (tcp_should_autocork(sk, skb, size_goal)) {
630 659
631 tcp_mark_urg(tp, flags); 660 /* avoid atomic op if TSQ_THROTTLED bit is already set */
632 __tcp_push_pending_frames(sk, mss_now, 661 if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
633 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); 662 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
663 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
664 }
665 /* It is possible TX completion already happened
666 * before we set TSQ_THROTTLED.
667 */
668 if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
669 return;
634 } 670 }
671
672 if (flags & MSG_MORE)
673 nonagle = TCP_NAGLE_CORK;
674
675 __tcp_push_pending_frames(sk, mss_now, nonagle);
635} 676}
636 677
637static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, 678static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
@@ -934,7 +975,8 @@ new_segment:
934wait_for_sndbuf: 975wait_for_sndbuf:
935 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 976 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
936wait_for_memory: 977wait_for_memory:
937 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 978 tcp_push(sk, flags & ~MSG_MORE, mss_now,
979 TCP_NAGLE_PUSH, size_goal);
938 980
939 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 981 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
940 goto do_error; 982 goto do_error;
@@ -944,7 +986,7 @@ wait_for_memory:
944 986
945out: 987out:
946 if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) 988 if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
947 tcp_push(sk, flags, mss_now, tp->nonagle); 989 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
948 return copied; 990 return copied;
949 991
950do_error: 992do_error:
@@ -1002,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
1002 } 1044 }
1003} 1045}
1004 1046
1005static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) 1047static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1048 int *copied, size_t size)
1006{ 1049{
1007 struct tcp_sock *tp = tcp_sk(sk); 1050 struct tcp_sock *tp = tcp_sk(sk);
1008 int err, flags; 1051 int err, flags;
@@ -1017,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
1017 if (unlikely(tp->fastopen_req == NULL)) 1060 if (unlikely(tp->fastopen_req == NULL))
1018 return -ENOBUFS; 1061 return -ENOBUFS;
1019 tp->fastopen_req->data = msg; 1062 tp->fastopen_req->data = msg;
1063 tp->fastopen_req->size = size;
1020 1064
1021 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; 1065 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
1022 err = __inet_stream_connect(sk->sk_socket, msg->msg_name, 1066 err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
1023 msg->msg_namelen, flags); 1067 msg->msg_namelen, flags);
1024 *size = tp->fastopen_req->copied; 1068 *copied = tp->fastopen_req->copied;
1025 tcp_free_fastopen_req(tp); 1069 tcp_free_fastopen_req(tp);
1026 return err; 1070 return err;
1027} 1071}
@@ -1041,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1041 1085
1042 flags = msg->msg_flags; 1086 flags = msg->msg_flags;
1043 if (flags & MSG_FASTOPEN) { 1087 if (flags & MSG_FASTOPEN) {
1044 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); 1088 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
1045 if (err == -EINPROGRESS && copied_syn > 0) 1089 if (err == -EINPROGRESS && copied_syn > 0)
1046 goto out; 1090 goto out;
1047 else if (err) 1091 else if (err)
@@ -1225,7 +1269,8 @@ wait_for_sndbuf:
1225 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1269 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1226wait_for_memory: 1270wait_for_memory:
1227 if (copied) 1271 if (copied)
1228 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 1272 tcp_push(sk, flags & ~MSG_MORE, mss_now,
1273 TCP_NAGLE_PUSH, size_goal);
1229 1274
1230 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1275 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1231 goto do_error; 1276 goto do_error;
@@ -1236,7 +1281,7 @@ wait_for_memory:
1236 1281
1237out: 1282out:
1238 if (copied) 1283 if (copied)
1239 tcp_push(sk, flags, mss_now, tp->nonagle); 1284 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
1240 release_sock(sk); 1285 release_sock(sk);
1241 return copied + copied_syn; 1286 return copied + copied_syn;
1242 1287
@@ -1623,11 +1668,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1623 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && 1668 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1624 !sysctl_tcp_low_latency && 1669 !sysctl_tcp_low_latency &&
1625 net_dma_find_channel()) { 1670 net_dma_find_channel()) {
1626 preempt_enable_no_resched(); 1671 preempt_enable();
1627 tp->ucopy.pinned_list = 1672 tp->ucopy.pinned_list =
1628 dma_pin_iovec_pages(msg->msg_iov, len); 1673 dma_pin_iovec_pages(msg->msg_iov, len);
1629 } else { 1674 } else {
1630 preempt_enable_no_resched(); 1675 preempt_enable();
1631 } 1676 }
1632 } 1677 }
1633#endif 1678#endif
@@ -2186,7 +2231,7 @@ adjudge_to_death:
2186 /* This is a (useful) BSD violating of the RFC. There is a 2231 /* This is a (useful) BSD violating of the RFC. There is a
2187 * problem with TCP as specified in that the other end could 2232 * problem with TCP as specified in that the other end could
2188 * keep a socket open forever with no application left this end. 2233 * keep a socket open forever with no application left this end.
2189 * We use a 3 minute timeout (about the same as BSD) then kill 2234 * We use a 1 minute timeout (about the same as BSD) then kill
2190 * our end. If they send after that then tough - BUT: long enough 2235 * our end. If they send after that then tough - BUT: long enough
2191 * that we won't make the old 4*rto = almost no time - whoops 2236 * that we won't make the old 4*rto = almost no time - whoops
2192 * reset mistake. 2237 * reset mistake.