diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 56 |
1 files changed, 42 insertions, 14 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 65caf8b95e17..1db253e36045 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072; | |||
59 | */ | 59 | */ |
60 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; | 60 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; |
61 | 61 | ||
62 | int sysctl_tcp_mtu_probing __read_mostly = 0; | ||
63 | int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; | ||
64 | |||
65 | /* By default, RFC2861 behavior. */ | 62 | /* By default, RFC2861 behavior. */ |
66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 63 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
67 | 64 | ||
@@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
948 | 945 | ||
949 | skb_orphan(skb); | 946 | skb_orphan(skb); |
950 | skb->sk = sk; | 947 | skb->sk = sk; |
951 | skb->destructor = tcp_wfree; | 948 | skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; |
952 | skb_set_hash_from_sk(skb, sk); | 949 | skb_set_hash_from_sk(skb, sk); |
953 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 950 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
954 | 951 | ||
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk) | |||
1350 | { | 1347 | { |
1351 | struct tcp_sock *tp = tcp_sk(sk); | 1348 | struct tcp_sock *tp = tcp_sk(sk); |
1352 | struct inet_connection_sock *icsk = inet_csk(sk); | 1349 | struct inet_connection_sock *icsk = inet_csk(sk); |
1350 | struct net *net = sock_net(sk); | ||
1353 | 1351 | ||
1354 | icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1; | 1352 | icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; |
1355 | icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + | 1353 | icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + |
1356 | icsk->icsk_af_ops->net_header_len; | 1354 | icsk->icsk_af_ops->net_header_len; |
1357 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); | 1355 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); |
1358 | icsk->icsk_mtup.probe_size = 0; | 1356 | icsk->icsk_mtup.probe_size = 0; |
1359 | } | 1357 | } |
1360 | EXPORT_SYMBOL(tcp_mtup_init); | 1358 | EXPORT_SYMBOL(tcp_mtup_init); |
@@ -2775,15 +2773,11 @@ void tcp_send_fin(struct sock *sk) | |||
2775 | } else { | 2773 | } else { |
2776 | /* Socket is locked, keep trying until memory is available. */ | 2774 | /* Socket is locked, keep trying until memory is available. */ |
2777 | for (;;) { | 2775 | for (;;) { |
2778 | skb = alloc_skb_fclone(MAX_TCP_HEADER, | 2776 | skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); |
2779 | sk->sk_allocation); | ||
2780 | if (skb) | 2777 | if (skb) |
2781 | break; | 2778 | break; |
2782 | yield(); | 2779 | yield(); |
2783 | } | 2780 | } |
2784 | |||
2785 | /* Reserve space for headers and prepare control bits. */ | ||
2786 | skb_reserve(skb, MAX_TCP_HEADER); | ||
2787 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2781 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
2788 | tcp_init_nondata_skb(skb, tp->write_seq, | 2782 | tcp_init_nondata_skb(skb, tp->write_seq, |
2789 | TCPHDR_ACK | TCPHDR_FIN); | 2783 | TCPHDR_ACK | TCPHDR_FIN); |
@@ -2939,6 +2933,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2939 | } | 2933 | } |
2940 | EXPORT_SYMBOL(tcp_make_synack); | 2934 | EXPORT_SYMBOL(tcp_make_synack); |
2941 | 2935 | ||
2936 | static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) | ||
2937 | { | ||
2938 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2939 | const struct tcp_congestion_ops *ca; | ||
2940 | u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); | ||
2941 | |||
2942 | if (ca_key == TCP_CA_UNSPEC) | ||
2943 | return; | ||
2944 | |||
2945 | rcu_read_lock(); | ||
2946 | ca = tcp_ca_find_key(ca_key); | ||
2947 | if (likely(ca && try_module_get(ca->owner))) { | ||
2948 | module_put(icsk->icsk_ca_ops->owner); | ||
2949 | icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); | ||
2950 | icsk->icsk_ca_ops = ca; | ||
2951 | } | ||
2952 | rcu_read_unlock(); | ||
2953 | } | ||
2954 | |||
2942 | /* Do all connect socket setups that can be done AF independent. */ | 2955 | /* Do all connect socket setups that can be done AF independent. */ |
2943 | static void tcp_connect_init(struct sock *sk) | 2956 | static void tcp_connect_init(struct sock *sk) |
2944 | { | 2957 | { |
@@ -2964,6 +2977,8 @@ static void tcp_connect_init(struct sock *sk) | |||
2964 | tcp_mtup_init(sk); | 2977 | tcp_mtup_init(sk); |
2965 | tcp_sync_mss(sk, dst_mtu(dst)); | 2978 | tcp_sync_mss(sk, dst_mtu(dst)); |
2966 | 2979 | ||
2980 | tcp_ca_dst_init(sk, dst); | ||
2981 | |||
2967 | if (!tp->window_clamp) | 2982 | if (!tp->window_clamp) |
2968 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); | 2983 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
2969 | tp->advmss = dst_metric_advmss(dst); | 2984 | tp->advmss = dst_metric_advmss(dst); |
@@ -3034,7 +3049,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3034 | { | 3049 | { |
3035 | struct tcp_sock *tp = tcp_sk(sk); | 3050 | struct tcp_sock *tp = tcp_sk(sk); |
3036 | struct tcp_fastopen_request *fo = tp->fastopen_req; | 3051 | struct tcp_fastopen_request *fo = tp->fastopen_req; |
3037 | int syn_loss = 0, space, err = 0; | 3052 | int syn_loss = 0, space, err = 0, copied; |
3038 | unsigned long last_syn_loss = 0; | 3053 | unsigned long last_syn_loss = 0; |
3039 | struct sk_buff *syn_data; | 3054 | struct sk_buff *syn_data; |
3040 | 3055 | ||
@@ -3072,11 +3087,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3072 | goto fallback; | 3087 | goto fallback; |
3073 | syn_data->ip_summed = CHECKSUM_PARTIAL; | 3088 | syn_data->ip_summed = CHECKSUM_PARTIAL; |
3074 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); | 3089 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); |
3075 | if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), | 3090 | copied = copy_from_iter(skb_put(syn_data, space), space, |
3076 | fo->data->msg_iter.iov, 0, space))) { | 3091 | &fo->data->msg_iter); |
3092 | if (unlikely(!copied)) { | ||
3077 | kfree_skb(syn_data); | 3093 | kfree_skb(syn_data); |
3078 | goto fallback; | 3094 | goto fallback; |
3079 | } | 3095 | } |
3096 | if (copied != space) { | ||
3097 | skb_trim(syn_data, copied); | ||
3098 | space = copied; | ||
3099 | } | ||
3080 | 3100 | ||
3081 | /* No more data pending in inet_wait_for_connect() */ | 3101 | /* No more data pending in inet_wait_for_connect() */ |
3082 | if (space == fo->size) | 3102 | if (space == fo->size) |
@@ -3244,6 +3264,14 @@ void tcp_send_ack(struct sock *sk) | |||
3244 | skb_reserve(buff, MAX_TCP_HEADER); | 3264 | skb_reserve(buff, MAX_TCP_HEADER); |
3245 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); | 3265 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
3246 | 3266 | ||
3267 | /* We do not want pure acks influencing TCP Small Queues or fq/pacing | ||
3268 | * too much. | ||
3269 | * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 | ||
3270 | * We also avoid tcp_wfree() overhead (cache line miss accessing | ||
3271 | * tp->tsq_flags) by using regular sock_wfree() | ||
3272 | */ | ||
3273 | skb_set_tcp_pure_ack(buff); | ||
3274 | |||
3247 | /* Send it off, this clears delayed acks for us. */ | 3275 | /* Send it off, this clears delayed acks for us. */ |
3248 | skb_mstamp_get(&buff->skb_mstamp); | 3276 | skb_mstamp_get(&buff->skb_mstamp); |
3249 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); | 3277 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |