aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c43
1 files changed, 32 insertions, 11 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 95544e4028c0..6e5617b9f9db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,6 +283,8 @@
283 283
284int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; 284int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
285 285
286int sysctl_tcp_min_tso_segs __read_mostly = 2;
287
286struct percpu_counter tcp_orphan_count; 288struct percpu_counter tcp_orphan_count;
287EXPORT_SYMBOL_GPL(tcp_orphan_count); 289EXPORT_SYMBOL_GPL(tcp_orphan_count);
288 290
@@ -410,10 +412,6 @@ void tcp_init_sock(struct sock *sk)
410 412
411 icsk->icsk_sync_mss = tcp_sync_mss; 413 icsk->icsk_sync_mss = tcp_sync_mss;
412 414
413 /* Presumed zeroed, in order of appearance:
414 * cookie_in_always, cookie_out_never,
415 * s_data_constant, s_data_in, s_data_out
416 */
417 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 415 sk->sk_sndbuf = sysctl_tcp_wmem[1];
418 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 416 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
419 417
@@ -499,7 +497,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
499 mask |= POLLIN | POLLRDNORM; 497 mask |= POLLIN | POLLRDNORM;
500 498
501 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 499 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
502 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 500 if (sk_stream_is_writeable(sk)) {
503 mask |= POLLOUT | POLLWRNORM; 501 mask |= POLLOUT | POLLWRNORM;
504 } else { /* send SIGIO later */ 502 } else { /* send SIGIO later */
505 set_bit(SOCK_ASYNC_NOSPACE, 503 set_bit(SOCK_ASYNC_NOSPACE,
@@ -510,7 +508,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
510 * wspace test but before the flags are set, 508 * wspace test but before the flags are set,
511 * IO signal will be lost. 509 * IO signal will be lost.
512 */ 510 */
513 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 511 if (sk_stream_is_writeable(sk))
514 mask |= POLLOUT | POLLWRNORM; 512 mask |= POLLOUT | POLLWRNORM;
515 } 513 }
516 } else 514 } else
@@ -789,12 +787,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
789 xmit_size_goal = mss_now; 787 xmit_size_goal = mss_now;
790 788
791 if (large_allowed && sk_can_gso(sk)) { 789 if (large_allowed && sk_can_gso(sk)) {
792 xmit_size_goal = ((sk->sk_gso_max_size - 1) - 790 u32 gso_size, hlen;
793 inet_csk(sk)->icsk_af_ops->net_header_len - 791
794 inet_csk(sk)->icsk_ext_hdr_len - 792 /* Maybe we should/could use sk->sk_prot->max_header here ? */
795 tp->tcp_header_len); 793 hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
794 inet_csk(sk)->icsk_ext_hdr_len +
795 tp->tcp_header_len;
796
797 /* Goal is to send at least one packet per ms,
798 * not one big TSO packet every 100 ms.
799 * This preserves ACK clocking and is consistent
800 * with tcp_tso_should_defer() heuristic.
801 */
802 gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
803 gso_size = max_t(u32, gso_size,
804 sysctl_tcp_min_tso_segs * mss_now);
796 805
797 /* TSQ : try to have two TSO segments in flight */ 806 xmit_size_goal = min_t(u32, gso_size,
807 sk->sk_gso_max_size - 1 - hlen);
808
809 /* TSQ : try to have at least two segments in flight
810 * (one in NIC TX ring, another in Qdisc)
811 */
798 xmit_size_goal = min_t(u32, xmit_size_goal, 812 xmit_size_goal = min_t(u32, xmit_size_goal,
799 sysctl_tcp_limit_output_bytes >> 1); 813 sysctl_tcp_limit_output_bytes >> 1);
800 814
@@ -2639,6 +2653,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2639 else 2653 else
2640 tp->tsoffset = val - tcp_time_stamp; 2654 tp->tsoffset = val - tcp_time_stamp;
2641 break; 2655 break;
2656 case TCP_NOTSENT_LOWAT:
2657 tp->notsent_lowat = val;
2658 sk->sk_write_space(sk);
2659 break;
2642 default: 2660 default:
2643 err = -ENOPROTOOPT; 2661 err = -ENOPROTOOPT;
2644 break; 2662 break;
@@ -2855,6 +2873,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2855 case TCP_TIMESTAMP: 2873 case TCP_TIMESTAMP:
2856 val = tcp_time_stamp + tp->tsoffset; 2874 val = tcp_time_stamp + tp->tsoffset;
2857 break; 2875 break;
2876 case TCP_NOTSENT_LOWAT:
2877 val = tp->notsent_lowat;
2878 break;
2858 default: 2879 default:
2859 return -ENOPROTOOPT; 2880 return -ENOPROTOOPT;
2860 } 2881 }