diff options
author | Eric Dumazet <edumazet@google.com> | 2014-11-18 02:06:20 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-11-19 14:57:01 -0500 |
commit | 355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a (patch) | |
tree | e91e6de26d24dd3105aa85b678d809e7ffd7749d | |
parent | baeababb5b85d5c4e6c917efe2a1504179438d3b (diff) |
tcp: make connect() mem charging friendly
While working on sk_forward_alloc problems reported by Denys
Fedoryshchenko, we found that tcp connect() (and fastopen) do not call
sk_wmem_schedule() for SYN packet (and/or SYN/DATA packet), so
sk_forward_alloc is negative while connect is in progress.
We can fix this by calling regular sk_stream_alloc_skb() both for the
SYN packet (in tcp_connect()) and the syn_data packet in
tcp_send_syn_data()
Then, tcp_send_syn_data() can avoid copying syn_data as we simply
can manipulate syn_data->cb[] to remove SYN flag (and increment seq)
Instead of open coding memcpy_fromiovecend(), simply use this helper.
This leaves in socket write queue clean fast clone skbs.
This was tested against our fastopen packetdrill tests.
Reported-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_output.c | 68 |
1 files changed, 28 insertions, 40 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index eb73a1dccf56..f5bd4bd3f7e6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -3011,9 +3011,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3011 | { | 3011 | { |
3012 | struct tcp_sock *tp = tcp_sk(sk); | 3012 | struct tcp_sock *tp = tcp_sk(sk); |
3013 | struct tcp_fastopen_request *fo = tp->fastopen_req; | 3013 | struct tcp_fastopen_request *fo = tp->fastopen_req; |
3014 | int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; | 3014 | int syn_loss = 0, space, err = 0; |
3015 | struct sk_buff *syn_data = NULL, *data; | ||
3016 | unsigned long last_syn_loss = 0; | 3015 | unsigned long last_syn_loss = 0; |
3016 | struct sk_buff *syn_data; | ||
3017 | 3017 | ||
3018 | tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ | 3018 | tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ |
3019 | tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, | 3019 | tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, |
@@ -3044,48 +3044,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3044 | /* limit to order-0 allocations */ | 3044 | /* limit to order-0 allocations */ |
3045 | space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); | 3045 | space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); |
3046 | 3046 | ||
3047 | syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, | 3047 | syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation); |
3048 | sk->sk_allocation); | 3048 | if (!syn_data) |
3049 | if (syn_data == NULL) | ||
3050 | goto fallback; | 3049 | goto fallback; |
3050 | syn_data->ip_summed = CHECKSUM_PARTIAL; | ||
3051 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); | ||
3052 | if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), | ||
3053 | fo->data->msg_iov, 0, space))) { | ||
3054 | kfree_skb(syn_data); | ||
3055 | goto fallback; | ||
3056 | } | ||
3051 | 3057 | ||
3052 | for (i = 0; i < iovlen && syn_data->len < space; ++i) { | 3058 | /* No more data pending in inet_wait_for_connect() */ |
3053 | struct iovec *iov = &fo->data->msg_iov[i]; | 3059 | if (space == fo->size) |
3054 | unsigned char __user *from = iov->iov_base; | 3060 | fo->data = NULL; |
3055 | int len = iov->iov_len; | 3061 | fo->copied = space; |
3056 | 3062 | ||
3057 | if (syn_data->len + len > space) | 3063 | tcp_connect_queue_skb(sk, syn_data); |
3058 | len = space - syn_data->len; | ||
3059 | else if (i + 1 == iovlen) | ||
3060 | /* No more data pending in inet_wait_for_connect() */ | ||
3061 | fo->data = NULL; | ||
3062 | 3064 | ||
3063 | if (skb_add_data(syn_data, from, len)) | 3065 | err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); |
3064 | goto fallback; | ||
3065 | } | ||
3066 | 3066 | ||
3067 | /* Queue a data-only packet after the regular SYN for retransmission */ | 3067 | syn->skb_mstamp = syn_data->skb_mstamp; |
3068 | data = pskb_copy(syn_data, sk->sk_allocation); | ||
3069 | if (data == NULL) | ||
3070 | goto fallback; | ||
3071 | TCP_SKB_CB(data)->seq++; | ||
3072 | TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; | ||
3073 | TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); | ||
3074 | tcp_connect_queue_skb(sk, data); | ||
3075 | fo->copied = data->len; | ||
3076 | |||
3077 | /* syn_data is about to be sent, we need to take current time stamps | ||
3078 | * for the packets that are in write queue : SYN packet and DATA | ||
3079 | */ | ||
3080 | skb_mstamp_get(&syn->skb_mstamp); | ||
3081 | data->skb_mstamp = syn->skb_mstamp; | ||
3082 | 3068 | ||
3083 | if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { | 3069 | /* Now full SYN+DATA was cloned and sent (or not), |
3070 | * remove the SYN from the original skb (syn_data) | ||
3071 | * we keep in write queue in case of a retransmit, as we | ||
3072 | * also have the SYN packet (with no data) in the same queue. | ||
3073 | */ | ||
3074 | TCP_SKB_CB(syn_data)->seq++; | ||
3075 | TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; | ||
3076 | if (!err) { | ||
3084 | tp->syn_data = (fo->copied > 0); | 3077 | tp->syn_data = (fo->copied > 0); |
3085 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); | 3078 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); |
3086 | goto done; | 3079 | goto done; |
3087 | } | 3080 | } |
3088 | syn_data = NULL; | ||
3089 | 3081 | ||
3090 | fallback: | 3082 | fallback: |
3091 | /* Send a regular SYN with Fast Open cookie request option */ | 3083 | /* Send a regular SYN with Fast Open cookie request option */ |
@@ -3094,7 +3086,6 @@ fallback: | |||
3094 | err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); | 3086 | err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); |
3095 | if (err) | 3087 | if (err) |
3096 | tp->syn_fastopen = 0; | 3088 | tp->syn_fastopen = 0; |
3097 | kfree_skb(syn_data); | ||
3098 | done: | 3089 | done: |
3099 | fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ | 3090 | fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ |
3100 | return err; | 3091 | return err; |
@@ -3114,13 +3105,10 @@ int tcp_connect(struct sock *sk) | |||
3114 | return 0; | 3105 | return 0; |
3115 | } | 3106 | } |
3116 | 3107 | ||
3117 | buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); | 3108 | buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); |
3118 | if (unlikely(buff == NULL)) | 3109 | if (unlikely(!buff)) |
3119 | return -ENOBUFS; | 3110 | return -ENOBUFS; |
3120 | 3111 | ||
3121 | /* Reserve space for headers. */ | ||
3122 | skb_reserve(buff, MAX_TCP_HEADER); | ||
3123 | |||
3124 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); | 3112 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
3125 | tp->retrans_stamp = tcp_time_stamp; | 3113 | tp->retrans_stamp = tcp_time_stamp; |
3126 | tcp_connect_queue_skb(sk, buff); | 3114 | tcp_connect_queue_skb(sk, buff); |