diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 54 |
1 files changed, 43 insertions, 11 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f18262e2326..a2a796c5536b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072; | |||
59 | */ | 59 | */ |
60 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; | 60 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; |
61 | 61 | ||
62 | int sysctl_tcp_mtu_probing __read_mostly = 0; | ||
63 | int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; | ||
64 | |||
65 | /* By default, RFC2861 behavior. */ | 62 | /* By default, RFC2861 behavior. */ |
66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 63 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
67 | 64 | ||
@@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
948 | 945 | ||
949 | skb_orphan(skb); | 946 | skb_orphan(skb); |
950 | skb->sk = sk; | 947 | skb->sk = sk; |
951 | skb->destructor = tcp_wfree; | 948 | skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; |
952 | skb_set_hash_from_sk(skb, sk); | 949 | skb_set_hash_from_sk(skb, sk); |
953 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 950 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
954 | 951 | ||
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk) | |||
1350 | { | 1347 | { |
1351 | struct tcp_sock *tp = tcp_sk(sk); | 1348 | struct tcp_sock *tp = tcp_sk(sk); |
1352 | struct inet_connection_sock *icsk = inet_csk(sk); | 1349 | struct inet_connection_sock *icsk = inet_csk(sk); |
1350 | struct net *net = sock_net(sk); | ||
1353 | 1351 | ||
1354 | icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1; | 1352 | icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; |
1355 | icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + | 1353 | icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + |
1356 | icsk->icsk_af_ops->net_header_len; | 1354 | icsk->icsk_af_ops->net_header_len; |
1357 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); | 1355 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); |
1358 | icsk->icsk_mtup.probe_size = 0; | 1356 | icsk->icsk_mtup.probe_size = 0; |
1359 | } | 1357 | } |
1360 | EXPORT_SYMBOL(tcp_mtup_init); | 1358 | EXPORT_SYMBOL(tcp_mtup_init); |
@@ -2019,7 +2017,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2019 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) | 2017 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) |
2020 | break; | 2018 | break; |
2021 | 2019 | ||
2022 | if (tso_segs == 1) { | 2020 | if (tso_segs == 1 || !max_segs) { |
2023 | if (unlikely(!tcp_nagle_test(tp, skb, mss_now, | 2021 | if (unlikely(!tcp_nagle_test(tp, skb, mss_now, |
2024 | (tcp_skb_is_last(sk, skb) ? | 2022 | (tcp_skb_is_last(sk, skb) ? |
2025 | nonagle : TCP_NAGLE_PUSH)))) | 2023 | nonagle : TCP_NAGLE_PUSH)))) |
@@ -2032,7 +2030,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2032 | } | 2030 | } |
2033 | 2031 | ||
2034 | limit = mss_now; | 2032 | limit = mss_now; |
2035 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | 2033 | if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp)) |
2036 | limit = tcp_mss_split_point(sk, skb, mss_now, | 2034 | limit = tcp_mss_split_point(sk, skb, mss_now, |
2037 | min_t(unsigned int, | 2035 | min_t(unsigned int, |
2038 | cwnd_quota, | 2036 | cwnd_quota, |
@@ -2939,6 +2937,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2939 | } | 2937 | } |
2940 | EXPORT_SYMBOL(tcp_make_synack); | 2938 | EXPORT_SYMBOL(tcp_make_synack); |
2941 | 2939 | ||
2940 | static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) | ||
2941 | { | ||
2942 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2943 | const struct tcp_congestion_ops *ca; | ||
2944 | u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); | ||
2945 | |||
2946 | if (ca_key == TCP_CA_UNSPEC) | ||
2947 | return; | ||
2948 | |||
2949 | rcu_read_lock(); | ||
2950 | ca = tcp_ca_find_key(ca_key); | ||
2951 | if (likely(ca && try_module_get(ca->owner))) { | ||
2952 | module_put(icsk->icsk_ca_ops->owner); | ||
2953 | icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); | ||
2954 | icsk->icsk_ca_ops = ca; | ||
2955 | } | ||
2956 | rcu_read_unlock(); | ||
2957 | } | ||
2958 | |||
2942 | /* Do all connect socket setups that can be done AF independent. */ | 2959 | /* Do all connect socket setups that can be done AF independent. */ |
2943 | static void tcp_connect_init(struct sock *sk) | 2960 | static void tcp_connect_init(struct sock *sk) |
2944 | { | 2961 | { |
@@ -2964,6 +2981,8 @@ static void tcp_connect_init(struct sock *sk) | |||
2964 | tcp_mtup_init(sk); | 2981 | tcp_mtup_init(sk); |
2965 | tcp_sync_mss(sk, dst_mtu(dst)); | 2982 | tcp_sync_mss(sk, dst_mtu(dst)); |
2966 | 2983 | ||
2984 | tcp_ca_dst_init(sk, dst); | ||
2985 | |||
2967 | if (!tp->window_clamp) | 2986 | if (!tp->window_clamp) |
2968 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); | 2987 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
2969 | tp->advmss = dst_metric_advmss(dst); | 2988 | tp->advmss = dst_metric_advmss(dst); |
@@ -3034,7 +3053,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3034 | { | 3053 | { |
3035 | struct tcp_sock *tp = tcp_sk(sk); | 3054 | struct tcp_sock *tp = tcp_sk(sk); |
3036 | struct tcp_fastopen_request *fo = tp->fastopen_req; | 3055 | struct tcp_fastopen_request *fo = tp->fastopen_req; |
3037 | int syn_loss = 0, space, err = 0; | 3056 | int syn_loss = 0, space, err = 0, copied; |
3038 | unsigned long last_syn_loss = 0; | 3057 | unsigned long last_syn_loss = 0; |
3039 | struct sk_buff *syn_data; | 3058 | struct sk_buff *syn_data; |
3040 | 3059 | ||
@@ -3072,11 +3091,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3072 | goto fallback; | 3091 | goto fallback; |
3073 | syn_data->ip_summed = CHECKSUM_PARTIAL; | 3092 | syn_data->ip_summed = CHECKSUM_PARTIAL; |
3074 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); | 3093 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); |
3075 | if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), | 3094 | copied = copy_from_iter(skb_put(syn_data, space), space, |
3076 | fo->data->msg_iter.iov, 0, space))) { | 3095 | &fo->data->msg_iter); |
3096 | if (unlikely(!copied)) { | ||
3077 | kfree_skb(syn_data); | 3097 | kfree_skb(syn_data); |
3078 | goto fallback; | 3098 | goto fallback; |
3079 | } | 3099 | } |
3100 | if (copied != space) { | ||
3101 | skb_trim(syn_data, copied); | ||
3102 | space = copied; | ||
3103 | } | ||
3080 | 3104 | ||
3081 | /* No more data pending in inet_wait_for_connect() */ | 3105 | /* No more data pending in inet_wait_for_connect() */ |
3082 | if (space == fo->size) | 3106 | if (space == fo->size) |
@@ -3244,6 +3268,14 @@ void tcp_send_ack(struct sock *sk) | |||
3244 | skb_reserve(buff, MAX_TCP_HEADER); | 3268 | skb_reserve(buff, MAX_TCP_HEADER); |
3245 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); | 3269 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
3246 | 3270 | ||
3271 | /* We do not want pure acks influencing TCP Small Queues or fq/pacing | ||
3272 | * too much. | ||
3273 | * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 | ||
3274 | * We also avoid tcp_wfree() overhead (cache line miss accessing | ||
3275 | * tp->tsq_flags) by using regular sock_wfree() | ||
3276 | */ | ||
3277 | skb_set_tcp_pure_ack(buff); | ||
3278 | |||
3247 | /* Send it off, this clears delayed acks for us. */ | 3279 | /* Send it off, this clears delayed acks for us. */ |
3248 | skb_mstamp_get(&buff->skb_mstamp); | 3280 | skb_mstamp_get(&buff->skb_mstamp); |
3249 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); | 3281 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |