aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c54
1 files changed, 43 insertions, 11 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f18262e2326..a2a796c5536b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
59 */ 59 */
60int sysctl_tcp_tso_win_divisor __read_mostly = 3; 60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61 61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 64
@@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
948 945
949 skb_orphan(skb); 946 skb_orphan(skb);
950 skb->sk = sk; 947 skb->sk = sk;
951 skb->destructor = tcp_wfree; 948 skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
952 skb_set_hash_from_sk(skb, sk); 949 skb_set_hash_from_sk(skb, sk);
953 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 950 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
954 951
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
1350{ 1347{
1351 struct tcp_sock *tp = tcp_sk(sk); 1348 struct tcp_sock *tp = tcp_sk(sk);
1352 struct inet_connection_sock *icsk = inet_csk(sk); 1349 struct inet_connection_sock *icsk = inet_csk(sk);
1350 struct net *net = sock_net(sk);
1353 1351
1354 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1; 1352 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1355 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + 1353 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1356 icsk->icsk_af_ops->net_header_len; 1354 icsk->icsk_af_ops->net_header_len;
1357 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); 1355 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1358 icsk->icsk_mtup.probe_size = 0; 1356 icsk->icsk_mtup.probe_size = 0;
1359} 1357}
1360EXPORT_SYMBOL(tcp_mtup_init); 1358EXPORT_SYMBOL(tcp_mtup_init);
@@ -2019,7 +2017,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2019 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) 2017 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2020 break; 2018 break;
2021 2019
2022 if (tso_segs == 1) { 2020 if (tso_segs == 1 || !max_segs) {
2023 if (unlikely(!tcp_nagle_test(tp, skb, mss_now, 2021 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2024 (tcp_skb_is_last(sk, skb) ? 2022 (tcp_skb_is_last(sk, skb) ?
2025 nonagle : TCP_NAGLE_PUSH)))) 2023 nonagle : TCP_NAGLE_PUSH))))
@@ -2032,7 +2030,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2032 } 2030 }
2033 2031
2034 limit = mss_now; 2032 limit = mss_now;
2035 if (tso_segs > 1 && !tcp_urg_mode(tp)) 2033 if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp))
2036 limit = tcp_mss_split_point(sk, skb, mss_now, 2034 limit = tcp_mss_split_point(sk, skb, mss_now,
2037 min_t(unsigned int, 2035 min_t(unsigned int,
2038 cwnd_quota, 2036 cwnd_quota,
@@ -2939,6 +2937,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2939} 2937}
2940EXPORT_SYMBOL(tcp_make_synack); 2938EXPORT_SYMBOL(tcp_make_synack);
2941 2939
2940static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
2941{
2942 struct inet_connection_sock *icsk = inet_csk(sk);
2943 const struct tcp_congestion_ops *ca;
2944 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
2945
2946 if (ca_key == TCP_CA_UNSPEC)
2947 return;
2948
2949 rcu_read_lock();
2950 ca = tcp_ca_find_key(ca_key);
2951 if (likely(ca && try_module_get(ca->owner))) {
2952 module_put(icsk->icsk_ca_ops->owner);
2953 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
2954 icsk->icsk_ca_ops = ca;
2955 }
2956 rcu_read_unlock();
2957}
2958
2942/* Do all connect socket setups that can be done AF independent. */ 2959/* Do all connect socket setups that can be done AF independent. */
2943static void tcp_connect_init(struct sock *sk) 2960static void tcp_connect_init(struct sock *sk)
2944{ 2961{
@@ -2964,6 +2981,8 @@ static void tcp_connect_init(struct sock *sk)
2964 tcp_mtup_init(sk); 2981 tcp_mtup_init(sk);
2965 tcp_sync_mss(sk, dst_mtu(dst)); 2982 tcp_sync_mss(sk, dst_mtu(dst));
2966 2983
2984 tcp_ca_dst_init(sk, dst);
2985
2967 if (!tp->window_clamp) 2986 if (!tp->window_clamp)
2968 tp->window_clamp = dst_metric(dst, RTAX_WINDOW); 2987 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2969 tp->advmss = dst_metric_advmss(dst); 2988 tp->advmss = dst_metric_advmss(dst);
@@ -3034,7 +3053,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3034{ 3053{
3035 struct tcp_sock *tp = tcp_sk(sk); 3054 struct tcp_sock *tp = tcp_sk(sk);
3036 struct tcp_fastopen_request *fo = tp->fastopen_req; 3055 struct tcp_fastopen_request *fo = tp->fastopen_req;
3037 int syn_loss = 0, space, err = 0; 3056 int syn_loss = 0, space, err = 0, copied;
3038 unsigned long last_syn_loss = 0; 3057 unsigned long last_syn_loss = 0;
3039 struct sk_buff *syn_data; 3058 struct sk_buff *syn_data;
3040 3059
@@ -3072,11 +3091,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3072 goto fallback; 3091 goto fallback;
3073 syn_data->ip_summed = CHECKSUM_PARTIAL; 3092 syn_data->ip_summed = CHECKSUM_PARTIAL;
3074 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); 3093 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3075 if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), 3094 copied = copy_from_iter(skb_put(syn_data, space), space,
3076 fo->data->msg_iter.iov, 0, space))) { 3095 &fo->data->msg_iter);
3096 if (unlikely(!copied)) {
3077 kfree_skb(syn_data); 3097 kfree_skb(syn_data);
3078 goto fallback; 3098 goto fallback;
3079 } 3099 }
3100 if (copied != space) {
3101 skb_trim(syn_data, copied);
3102 space = copied;
3103 }
3080 3104
3081 /* No more data pending in inet_wait_for_connect() */ 3105 /* No more data pending in inet_wait_for_connect() */
3082 if (space == fo->size) 3106 if (space == fo->size)
@@ -3244,6 +3268,14 @@ void tcp_send_ack(struct sock *sk)
3244 skb_reserve(buff, MAX_TCP_HEADER); 3268 skb_reserve(buff, MAX_TCP_HEADER);
3245 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); 3269 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3246 3270
3271 /* We do not want pure acks influencing TCP Small Queues or fq/pacing
3272 * too much.
3273 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
3274 * We also avoid tcp_wfree() overhead (cache line miss accessing
3275 * tp->tsq_flags) by using regular sock_wfree()
3276 */
3277 skb_set_tcp_pure_ack(buff);
3278
3247 /* Send it off, this clears delayed acks for us. */ 3279 /* Send it off, this clears delayed acks for us. */
3248 skb_mstamp_get(&buff->skb_mstamp); 3280 skb_mstamp_get(&buff->skb_mstamp);
3249 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); 3281 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));