aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorJason Baron <jbaron@akamai.com>2017-01-17 13:37:19 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-17 15:51:55 -0500
commit0e40f4c9593ba2c7c30150ed669da97bd581c0cd (patch)
tree274f40b12fadf479f3579290c406cbf93c4857b6 /net/ipv4/tcp_input.c
parenta870a97757dd4f165f4f7bb749350bee7df31716 (diff)
tcp: accept RST for rcv_nxt - 1 after receiving a FIN
Using a Mac OSX box as a client connecting to a Linux server, we have found that when certain applications (such as 'ab'), are abruptly terminated (via ^C), a FIN is sent followed by a RST packet on tcp connections. The FIN is accepted by the Linux stack but the RST is sent with the same sequence number as the FIN, and Linux responds with a challenge ACK per RFC 5961. The OSX client then sometimes (they are rate-limited) does not reply with any RST as would be expected on a closed socket. This results in sockets accumulating on the Linux server left mostly in the CLOSE_WAIT state, although LAST_ACK and CLOSING are also possible. This sequence of events can tie up a lot of resources on the Linux server since there may be a lot of data in write buffers at the time of the RST. Accepting a RST equal to rcv_nxt - 1, after we have already successfully processed a FIN, has made a significant difference for us in practice, by freeing up unneeded resources in a more expedient fashion. A packetdrill test demonstrating the behavior: // testing mac osx rst behavior // Establish a connection 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < S 0:0(0) win 32768 <mss 1460,nop,wscale 10> 0.100 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 5> 0.200 < . 1:1(0) ack 1 win 32768 0.200 accept(3, ..., ...) = 4 // Client closes the connection 0.300 < F. 1:1(0) ack 1 win 32768 // now send rst with same sequence 0.300 < R. 1:1(0) ack 1 win 32768 // make sure we are in TCP_CLOSE 0.400 %{ assert tcpi_state == 7 }% Signed-off-by: Jason Baron <jbaron@akamai.com> Cc: Eric Dumazet <edumazet@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c28
1 files changed, 25 insertions, 3 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a34e9278c07..bfa165cc455a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5199,6 +5199,23 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5199 return err; 5199 return err;
5200} 5200}
5201 5201
5202/* Accept RST for rcv_nxt - 1 after a FIN.
5203 * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
5204 * FIN is sent followed by a RST packet. The RST is sent with the same
5205 * sequence number as the FIN, and thus according to RFC 5961 a challenge
5206 * ACK should be sent. However, Mac OSX rate limits replies to challenge
5207 * ACKs on the closed socket. In addition middleboxes can drop either the
5208 * challenge ACK or a subsequent RST.
5209 */
5210static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
5211{
5212 struct tcp_sock *tp = tcp_sk(sk);
5213
5214 return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
5215 (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
5216 TCPF_CLOSING));
5217}
5218
5202/* Does PAWS and seqno based validation of an incoming segment, flags will 5219/* Does PAWS and seqno based validation of an incoming segment, flags will
5203 * play significant role here. 5220 * play significant role here.
5204 */ 5221 */
@@ -5237,20 +5254,25 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5237 LINUX_MIB_TCPACKSKIPPEDSEQ, 5254 LINUX_MIB_TCPACKSKIPPEDSEQ,
5238 &tp->last_oow_ack_time)) 5255 &tp->last_oow_ack_time))
5239 tcp_send_dupack(sk, skb); 5256 tcp_send_dupack(sk, skb);
5257 } else if (tcp_reset_check(sk, skb)) {
5258 tcp_reset(sk);
5240 } 5259 }
5241 goto discard; 5260 goto discard;
5242 } 5261 }
5243 5262
5244 /* Step 2: check RST bit */ 5263 /* Step 2: check RST bit */
5245 if (th->rst) { 5264 if (th->rst) {
5246 /* RFC 5961 3.2 (extend to match against SACK too if available): 5265 /* RFC 5961 3.2 (extend to match against (RCV.NXT - 1) after a
5247 * If seq num matches RCV.NXT or the right-most SACK block, 5266 * FIN and SACK too if available):
5267 * If seq num matches RCV.NXT or (RCV.NXT - 1) after a FIN, or
5268 * the right-most SACK block,
5248 * then 5269 * then
5249 * RESET the connection 5270 * RESET the connection
5250 * else 5271 * else
5251 * Send a challenge ACK 5272 * Send a challenge ACK
5252 */ 5273 */
5253 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { 5274 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
5275 tcp_reset_check(sk, skb)) {
5254 rst_seq_match = true; 5276 rst_seq_match = true;
5255 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) { 5277 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5256 struct tcp_sack_block *sp = &tp->selective_acks[0]; 5278 struct tcp_sack_block *sp = &tp->selective_acks[0];