aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c60
1 files changed, 44 insertions, 16 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f18262e2326..1db253e36045 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
59 */ 59 */
60int sysctl_tcp_tso_win_divisor __read_mostly = 3; 60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61 61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 64
@@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
948 945
949 skb_orphan(skb); 946 skb_orphan(skb);
950 skb->sk = sk; 947 skb->sk = sk;
951 skb->destructor = tcp_wfree; 948 skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
952 skb_set_hash_from_sk(skb, sk); 949 skb_set_hash_from_sk(skb, sk);
953 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 950 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
954 951
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
1350{ 1347{
1351 struct tcp_sock *tp = tcp_sk(sk); 1348 struct tcp_sock *tp = tcp_sk(sk);
1352 struct inet_connection_sock *icsk = inet_csk(sk); 1349 struct inet_connection_sock *icsk = inet_csk(sk);
1350 struct net *net = sock_net(sk);
1353 1351
1354 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1; 1352 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1355 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + 1353 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1356 icsk->icsk_af_ops->net_header_len; 1354 icsk->icsk_af_ops->net_header_len;
1357 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); 1355 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1358 icsk->icsk_mtup.probe_size = 0; 1356 icsk->icsk_mtup.probe_size = 0;
1359} 1357}
1360EXPORT_SYMBOL(tcp_mtup_init); 1358EXPORT_SYMBOL(tcp_mtup_init);
@@ -2019,7 +2017,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2019 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) 2017 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2020 break; 2018 break;
2021 2019
2022 if (tso_segs == 1) { 2020 if (tso_segs == 1 || !max_segs) {
2023 if (unlikely(!tcp_nagle_test(tp, skb, mss_now, 2021 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2024 (tcp_skb_is_last(sk, skb) ? 2022 (tcp_skb_is_last(sk, skb) ?
2025 nonagle : TCP_NAGLE_PUSH)))) 2023 nonagle : TCP_NAGLE_PUSH))))
@@ -2032,7 +2030,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2032 } 2030 }
2033 2031
2034 limit = mss_now; 2032 limit = mss_now;
2035 if (tso_segs > 1 && !tcp_urg_mode(tp)) 2033 if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp))
2036 limit = tcp_mss_split_point(sk, skb, mss_now, 2034 limit = tcp_mss_split_point(sk, skb, mss_now,
2037 min_t(unsigned int, 2035 min_t(unsigned int,
2038 cwnd_quota, 2036 cwnd_quota,
@@ -2775,15 +2773,11 @@ void tcp_send_fin(struct sock *sk)
2775 } else { 2773 } else {
2776 /* Socket is locked, keep trying until memory is available. */ 2774 /* Socket is locked, keep trying until memory is available. */
2777 for (;;) { 2775 for (;;) {
2778 skb = alloc_skb_fclone(MAX_TCP_HEADER, 2776 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
2779 sk->sk_allocation);
2780 if (skb) 2777 if (skb)
2781 break; 2778 break;
2782 yield(); 2779 yield();
2783 } 2780 }
2784
2785 /* Reserve space for headers and prepare control bits. */
2786 skb_reserve(skb, MAX_TCP_HEADER);
2787 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2781 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2788 tcp_init_nondata_skb(skb, tp->write_seq, 2782 tcp_init_nondata_skb(skb, tp->write_seq,
2789 TCPHDR_ACK | TCPHDR_FIN); 2783 TCPHDR_ACK | TCPHDR_FIN);
@@ -2939,6 +2933,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2939} 2933}
2940EXPORT_SYMBOL(tcp_make_synack); 2934EXPORT_SYMBOL(tcp_make_synack);
2941 2935
2936static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
2937{
2938 struct inet_connection_sock *icsk = inet_csk(sk);
2939 const struct tcp_congestion_ops *ca;
2940 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
2941
2942 if (ca_key == TCP_CA_UNSPEC)
2943 return;
2944
2945 rcu_read_lock();
2946 ca = tcp_ca_find_key(ca_key);
2947 if (likely(ca && try_module_get(ca->owner))) {
2948 module_put(icsk->icsk_ca_ops->owner);
2949 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
2950 icsk->icsk_ca_ops = ca;
2951 }
2952 rcu_read_unlock();
2953}
2954
2942/* Do all connect socket setups that can be done AF independent. */ 2955/* Do all connect socket setups that can be done AF independent. */
2943static void tcp_connect_init(struct sock *sk) 2956static void tcp_connect_init(struct sock *sk)
2944{ 2957{
@@ -2964,6 +2977,8 @@ static void tcp_connect_init(struct sock *sk)
2964 tcp_mtup_init(sk); 2977 tcp_mtup_init(sk);
2965 tcp_sync_mss(sk, dst_mtu(dst)); 2978 tcp_sync_mss(sk, dst_mtu(dst));
2966 2979
2980 tcp_ca_dst_init(sk, dst);
2981
2967 if (!tp->window_clamp) 2982 if (!tp->window_clamp)
2968 tp->window_clamp = dst_metric(dst, RTAX_WINDOW); 2983 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2969 tp->advmss = dst_metric_advmss(dst); 2984 tp->advmss = dst_metric_advmss(dst);
@@ -3034,7 +3049,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3034{ 3049{
3035 struct tcp_sock *tp = tcp_sk(sk); 3050 struct tcp_sock *tp = tcp_sk(sk);
3036 struct tcp_fastopen_request *fo = tp->fastopen_req; 3051 struct tcp_fastopen_request *fo = tp->fastopen_req;
3037 int syn_loss = 0, space, err = 0; 3052 int syn_loss = 0, space, err = 0, copied;
3038 unsigned long last_syn_loss = 0; 3053 unsigned long last_syn_loss = 0;
3039 struct sk_buff *syn_data; 3054 struct sk_buff *syn_data;
3040 3055
@@ -3072,11 +3087,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3072 goto fallback; 3087 goto fallback;
3073 syn_data->ip_summed = CHECKSUM_PARTIAL; 3088 syn_data->ip_summed = CHECKSUM_PARTIAL;
3074 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); 3089 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3075 if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), 3090 copied = copy_from_iter(skb_put(syn_data, space), space,
3076 fo->data->msg_iter.iov, 0, space))) { 3091 &fo->data->msg_iter);
3092 if (unlikely(!copied)) {
3077 kfree_skb(syn_data); 3093 kfree_skb(syn_data);
3078 goto fallback; 3094 goto fallback;
3079 } 3095 }
3096 if (copied != space) {
3097 skb_trim(syn_data, copied);
3098 space = copied;
3099 }
3080 3100
3081 /* No more data pending in inet_wait_for_connect() */ 3101 /* No more data pending in inet_wait_for_connect() */
3082 if (space == fo->size) 3102 if (space == fo->size)
@@ -3244,6 +3264,14 @@ void tcp_send_ack(struct sock *sk)
3244 skb_reserve(buff, MAX_TCP_HEADER); 3264 skb_reserve(buff, MAX_TCP_HEADER);
3245 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); 3265 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3246 3266
3267 /* We do not want pure acks influencing TCP Small Queues or fq/pacing
3268 * too much.
3269 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
3270 * We also avoid tcp_wfree() overhead (cache line miss accessing
3271 * tp->tsq_flags) by using regular sock_wfree()
3272 */
3273 skb_set_tcp_pure_ack(buff);
3274
3247 /* Send it off, this clears delayed acks for us. */ 3275 /* Send it off, this clears delayed acks for us. */
3248 skb_mstamp_get(&buff->skb_mstamp); 3276 skb_mstamp_get(&buff->skb_mstamp);
3249 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); 3277 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));