aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-11-18 02:06:20 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-19 14:57:01 -0500
commit355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a (patch)
treee91e6de26d24dd3105aa85b678d809e7ffd7749d
parentbaeababb5b85d5c4e6c917efe2a1504179438d3b (diff)
tcp: make connect() mem charging friendly
While working on sk_forward_alloc problems reported by Denys Fedoryshchenko, we found that tcp connect() (and fastopen) do not call sk_wmem_schedule() for SYN packet (and/or SYN/DATA packet), so sk_forward_alloc is negative while connect is in progress. We can fix this by calling regular sk_stream_alloc_skb() both for the SYN packet (in tcp_connect()) and the syn_data packet in tcp_send_syn_data() Then, tcp_send_syn_data() can avoid copying syn_data as we simply can manipulate syn_data->cb[] to remove SYN flag (and increment seq) Instead of open coding memcpy_fromiovecend(), simply use this helper. This leaves in socket write queue clean fast clone skbs. This was tested against our fastopen packetdrill tests. Reported-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_output.c68
1 files changed, 28 insertions, 40 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index eb73a1dccf56..f5bd4bd3f7e6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3011,9 +3011,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3011{ 3011{
3012 struct tcp_sock *tp = tcp_sk(sk); 3012 struct tcp_sock *tp = tcp_sk(sk);
3013 struct tcp_fastopen_request *fo = tp->fastopen_req; 3013 struct tcp_fastopen_request *fo = tp->fastopen_req;
3014 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; 3014 int syn_loss = 0, space, err = 0;
3015 struct sk_buff *syn_data = NULL, *data;
3016 unsigned long last_syn_loss = 0; 3015 unsigned long last_syn_loss = 0;
3016 struct sk_buff *syn_data;
3017 3017
3018 tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ 3018 tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
3019 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, 3019 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
@@ -3044,48 +3044,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3044 /* limit to order-0 allocations */ 3044 /* limit to order-0 allocations */
3045 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); 3045 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3046 3046
3047 syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, 3047 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
3048 sk->sk_allocation); 3048 if (!syn_data)
3049 if (syn_data == NULL)
3050 goto fallback; 3049 goto fallback;
3050 syn_data->ip_summed = CHECKSUM_PARTIAL;
3051 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3052 if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
3053 fo->data->msg_iov, 0, space))) {
3054 kfree_skb(syn_data);
3055 goto fallback;
3056 }
3051 3057
3052 for (i = 0; i < iovlen && syn_data->len < space; ++i) { 3058 /* No more data pending in inet_wait_for_connect() */
3053 struct iovec *iov = &fo->data->msg_iov[i]; 3059 if (space == fo->size)
3054 unsigned char __user *from = iov->iov_base; 3060 fo->data = NULL;
3055 int len = iov->iov_len; 3061 fo->copied = space;
3056 3062
3057 if (syn_data->len + len > space) 3063 tcp_connect_queue_skb(sk, syn_data);
3058 len = space - syn_data->len;
3059 else if (i + 1 == iovlen)
3060 /* No more data pending in inet_wait_for_connect() */
3061 fo->data = NULL;
3062 3064
3063 if (skb_add_data(syn_data, from, len)) 3065 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3064 goto fallback;
3065 }
3066 3066
3067 /* Queue a data-only packet after the regular SYN for retransmission */ 3067 syn->skb_mstamp = syn_data->skb_mstamp;
3068 data = pskb_copy(syn_data, sk->sk_allocation);
3069 if (data == NULL)
3070 goto fallback;
3071 TCP_SKB_CB(data)->seq++;
3072 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
3073 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
3074 tcp_connect_queue_skb(sk, data);
3075 fo->copied = data->len;
3076
3077 /* syn_data is about to be sent, we need to take current time stamps
3078 * for the packets that are in write queue : SYN packet and DATA
3079 */
3080 skb_mstamp_get(&syn->skb_mstamp);
3081 data->skb_mstamp = syn->skb_mstamp;
3082 3068
3083 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { 3069 /* Now full SYN+DATA was cloned and sent (or not),
3070 * remove the SYN from the original skb (syn_data)
3071 * we keep in write queue in case of a retransmit, as we
3072 * also have the SYN packet (with no data) in the same queue.
3073 */
3074 TCP_SKB_CB(syn_data)->seq++;
3075 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3076 if (!err) {
3084 tp->syn_data = (fo->copied > 0); 3077 tp->syn_data = (fo->copied > 0);
3085 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); 3078 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3086 goto done; 3079 goto done;
3087 } 3080 }
3088 syn_data = NULL;
3089 3081
3090fallback: 3082fallback:
3091 /* Send a regular SYN with Fast Open cookie request option */ 3083 /* Send a regular SYN with Fast Open cookie request option */
@@ -3094,7 +3086,6 @@ fallback:
3094 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); 3086 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3095 if (err) 3087 if (err)
3096 tp->syn_fastopen = 0; 3088 tp->syn_fastopen = 0;
3097 kfree_skb(syn_data);
3098done: 3089done:
3099 fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ 3090 fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
3100 return err; 3091 return err;
@@ -3114,13 +3105,10 @@ int tcp_connect(struct sock *sk)
3114 return 0; 3105 return 0;
3115 } 3106 }
3116 3107
3117 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); 3108 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
3118 if (unlikely(buff == NULL)) 3109 if (unlikely(!buff))
3119 return -ENOBUFS; 3110 return -ENOBUFS;
3120 3111
3121 /* Reserve space for headers. */
3122 skb_reserve(buff, MAX_TCP_HEADER);
3123
3124 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 3112 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3125 tp->retrans_stamp = tcp_time_stamp; 3113 tp->retrans_stamp = tcp_time_stamp;
3126 tcp_connect_queue_skb(sk, buff); 3114 tcp_connect_queue_skb(sk, buff);