summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-10-10 23:17:41 -0400
committerDavid S. Miller <davem@davemloft.net>2019-10-13 13:13:08 -0400
commit0f31746452e6793ad6271337438af8f4defb8940 (patch)
treeb30e39e3839addc642ceb8c87ce5e20be2c79fae
parent7db48e983930285b765743ebd665aecf9850582b (diff)
tcp: annotate tp->write_seq lockless reads
There are few places where we fetch tp->write_seq while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h2
-rw-r--r--net/ipv4/tcp.c20
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_ipv4.c21
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv6/tcp_ipv6.c13
7 files changed, 36 insertions, 28 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 35f6f7e0fdc2..8e7c3f6801a9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1917,7 +1917,7 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
1917static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) 1917static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
1918{ 1918{
1919 const struct tcp_sock *tp = tcp_sk(sk); 1919 const struct tcp_sock *tp = tcp_sk(sk);
1920 u32 notsent_bytes = tp->write_seq - tp->snd_nxt; 1920 u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt;
1921 1921
1922 return (notsent_bytes << wake) < tcp_notsent_lowat(tp); 1922 return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
1923} 1923}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c322ad071e17..96dd65cbeb85 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -616,7 +616,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
616 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 616 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
617 answ = 0; 617 answ = 0;
618 else 618 else
619 answ = tp->write_seq - tp->snd_una; 619 answ = READ_ONCE(tp->write_seq) - tp->snd_una;
620 break; 620 break;
621 case SIOCOUTQNSD: 621 case SIOCOUTQNSD:
622 if (sk->sk_state == TCP_LISTEN) 622 if (sk->sk_state == TCP_LISTEN)
@@ -625,7 +625,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
625 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 625 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
626 answ = 0; 626 answ = 0;
627 else 627 else
628 answ = tp->write_seq - tp->snd_nxt; 628 answ = READ_ONCE(tp->write_seq) - tp->snd_nxt;
629 break; 629 break;
630 default: 630 default:
631 return -ENOIOCTLCMD; 631 return -ENOIOCTLCMD;
@@ -1035,7 +1035,7 @@ new_segment:
1035 sk->sk_wmem_queued += copy; 1035 sk->sk_wmem_queued += copy;
1036 sk_mem_charge(sk, copy); 1036 sk_mem_charge(sk, copy);
1037 skb->ip_summed = CHECKSUM_PARTIAL; 1037 skb->ip_summed = CHECKSUM_PARTIAL;
1038 tp->write_seq += copy; 1038 WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
1039 TCP_SKB_CB(skb)->end_seq += copy; 1039 TCP_SKB_CB(skb)->end_seq += copy;
1040 tcp_skb_pcount_set(skb, 0); 1040 tcp_skb_pcount_set(skb, 0);
1041 1041
@@ -1362,7 +1362,7 @@ new_segment:
1362 if (!copied) 1362 if (!copied)
1363 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; 1363 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1364 1364
1365 tp->write_seq += copy; 1365 WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
1366 TCP_SKB_CB(skb)->end_seq += copy; 1366 TCP_SKB_CB(skb)->end_seq += copy;
1367 tcp_skb_pcount_set(skb, 0); 1367 tcp_skb_pcount_set(skb, 0);
1368 1368
@@ -2562,6 +2562,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2562 struct inet_connection_sock *icsk = inet_csk(sk); 2562 struct inet_connection_sock *icsk = inet_csk(sk);
2563 struct tcp_sock *tp = tcp_sk(sk); 2563 struct tcp_sock *tp = tcp_sk(sk);
2564 int old_state = sk->sk_state; 2564 int old_state = sk->sk_state;
2565 u32 seq;
2565 2566
2566 if (old_state != TCP_CLOSE) 2567 if (old_state != TCP_CLOSE)
2567 tcp_set_state(sk, TCP_CLOSE); 2568 tcp_set_state(sk, TCP_CLOSE);
@@ -2604,9 +2605,12 @@ int tcp_disconnect(struct sock *sk, int flags)
2604 tp->srtt_us = 0; 2605 tp->srtt_us = 0;
2605 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); 2606 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
2606 tp->rcv_rtt_last_tsecr = 0; 2607 tp->rcv_rtt_last_tsecr = 0;
2607 tp->write_seq += tp->max_window + 2; 2608
2608 if (tp->write_seq == 0) 2609 seq = tp->write_seq + tp->max_window + 2;
2609 tp->write_seq = 1; 2610 if (!seq)
2611 seq = 1;
2612 WRITE_ONCE(tp->write_seq, seq);
2613
2610 icsk->icsk_backoff = 0; 2614 icsk->icsk_backoff = 0;
2611 tp->snd_cwnd = 2; 2615 tp->snd_cwnd = 2;
2612 icsk->icsk_probes_out = 0; 2616 icsk->icsk_probes_out = 0;
@@ -2933,7 +2937,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2933 if (sk->sk_state != TCP_CLOSE) 2937 if (sk->sk_state != TCP_CLOSE)
2934 err = -EPERM; 2938 err = -EPERM;
2935 else if (tp->repair_queue == TCP_SEND_QUEUE) 2939 else if (tp->repair_queue == TCP_SEND_QUEUE)
2936 tp->write_seq = val; 2940 WRITE_ONCE(tp->write_seq, val);
2937 else if (tp->repair_queue == TCP_RECV_QUEUE) 2941 else if (tp->repair_queue == TCP_RECV_QUEUE)
2938 WRITE_ONCE(tp->rcv_nxt, val); 2942 WRITE_ONCE(tp->rcv_nxt, val);
2939 else 2943 else
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 66273c8a55c2..549506162dde 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -28,7 +28,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
28 28
29 r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - 29 r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) -
30 READ_ONCE(tp->copied_seq), 0); 30 READ_ONCE(tp->copied_seq), 0);
31 r->idiag_wqueue = tp->write_seq - tp->snd_una; 31 r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
32 } 32 }
33 if (info) 33 if (info)
34 tcp_get_info(sk, info); 34 tcp_get_info(sk, info);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 39560f482e0b..6be568334848 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -164,9 +164,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
164 * without appearing to create any others. 164 * without appearing to create any others.
165 */ 165 */
166 if (likely(!tp->repair)) { 166 if (likely(!tp->repair)) {
167 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 167 u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
168 if (tp->write_seq == 0) 168
169 tp->write_seq = 1; 169 if (!seq)
170 seq = 1;
171 WRITE_ONCE(tp->write_seq, seq);
170 tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 172 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
171 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 173 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
172 } 174 }
@@ -253,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
253 tp->rx_opt.ts_recent = 0; 255 tp->rx_opt.ts_recent = 0;
254 tp->rx_opt.ts_recent_stamp = 0; 256 tp->rx_opt.ts_recent_stamp = 0;
255 if (likely(!tp->repair)) 257 if (likely(!tp->repair))
256 tp->write_seq = 0; 258 WRITE_ONCE(tp->write_seq, 0);
257 } 259 }
258 260
259 inet->inet_dport = usin->sin_port; 261 inet->inet_dport = usin->sin_port;
@@ -291,10 +293,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
291 293
292 if (likely(!tp->repair)) { 294 if (likely(!tp->repair)) {
293 if (!tp->write_seq) 295 if (!tp->write_seq)
294 tp->write_seq = secure_tcp_seq(inet->inet_saddr, 296 WRITE_ONCE(tp->write_seq,
295 inet->inet_daddr, 297 secure_tcp_seq(inet->inet_saddr,
296 inet->inet_sport, 298 inet->inet_daddr,
297 usin->sin_port); 299 inet->inet_sport,
300 usin->sin_port));
298 tp->tsoffset = secure_tcp_ts_off(sock_net(sk), 301 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
299 inet->inet_saddr, 302 inet->inet_saddr,
300 inet->inet_daddr); 303 inet->inet_daddr);
@@ -2461,7 +2464,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2461 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2464 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2462 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2465 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2463 i, src, srcp, dest, destp, state, 2466 i, src, srcp, dest, destp, state,
2464 tp->write_seq - tp->snd_una, 2467 READ_ONCE(tp->write_seq) - tp->snd_una,
2465 rx_queue, 2468 rx_queue,
2466 timer_active, 2469 timer_active,
2467 jiffies_delta_to_clock_t(timer_expires - jiffies), 2470 jiffies_delta_to_clock_t(timer_expires - jiffies),
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index c4731d26ab4a..339944690329 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -498,7 +498,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
498 newtp->total_retrans = req->num_retrans; 498 newtp->total_retrans = req->num_retrans;
499 499
500 tcp_init_xmit_timers(newsk); 500 tcp_init_xmit_timers(newsk);
501 newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; 501 WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
502 502
503 if (sock_flag(newsk, SOCK_KEEPOPEN)) 503 if (sock_flag(newsk, SOCK_KEEPOPEN))
504 inet_csk_reset_keepalive_timer(newsk, 504 inet_csk_reset_keepalive_timer(newsk,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7dda12720169..c17c2a78809d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1196,7 +1196,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1196 struct tcp_sock *tp = tcp_sk(sk); 1196 struct tcp_sock *tp = tcp_sk(sk);
1197 1197
1198 /* Advance write_seq and place onto the write_queue. */ 1198 /* Advance write_seq and place onto the write_queue. */
1199 tp->write_seq = TCP_SKB_CB(skb)->end_seq; 1199 WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
1200 __skb_header_release(skb); 1200 __skb_header_release(skb);
1201 tcp_add_write_queue_tail(sk, skb); 1201 tcp_add_write_queue_tail(sk, skb);
1202 sk->sk_wmem_queued += skb->truesize; 1202 sk->sk_wmem_queued += skb->truesize;
@@ -3449,7 +3449,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3449 __skb_header_release(skb); 3449 __skb_header_release(skb);
3450 sk->sk_wmem_queued += skb->truesize; 3450 sk->sk_wmem_queued += skb->truesize;
3451 sk_mem_charge(sk, skb->truesize); 3451 sk_mem_charge(sk, skb->truesize);
3452 tp->write_seq = tcb->end_seq; 3452 WRITE_ONCE(tp->write_seq, tcb->end_seq);
3453 tp->packets_out += tcp_skb_pcount(skb); 3453 tp->packets_out += tcp_skb_pcount(skb);
3454} 3454}
3455 3455
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a62c7042fc4a..4804b6dc5e65 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -215,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0; 216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0; 217 tp->rx_opt.ts_recent_stamp = 0;
218 tp->write_seq = 0; 218 WRITE_ONCE(tp->write_seq, 0);
219 } 219 }
220 220
221 sk->sk_v6_daddr = usin->sin6_addr; 221 sk->sk_v6_daddr = usin->sin6_addr;
@@ -311,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
311 311
312 if (likely(!tp->repair)) { 312 if (likely(!tp->repair)) {
313 if (!tp->write_seq) 313 if (!tp->write_seq)
314 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, 314 WRITE_ONCE(tp->write_seq,
315 sk->sk_v6_daddr.s6_addr32, 315 secure_tcpv6_seq(np->saddr.s6_addr32,
316 inet->inet_sport, 316 sk->sk_v6_daddr.s6_addr32,
317 inet->inet_dport); 317 inet->inet_sport,
318 inet->inet_dport));
318 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 319 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
319 np->saddr.s6_addr32, 320 np->saddr.s6_addr32,
320 sk->sk_v6_daddr.s6_addr32); 321 sk->sk_v6_daddr.s6_addr32);
@@ -1907,7 +1908,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1907 dest->s6_addr32[0], dest->s6_addr32[1], 1908 dest->s6_addr32[0], dest->s6_addr32[1],
1908 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1909 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1909 state, 1910 state,
1910 tp->write_seq - tp->snd_una, 1911 READ_ONCE(tp->write_seq) - tp->snd_una,
1911 rx_queue, 1912 rx_queue,
1912 timer_active, 1913 timer_active,
1913 jiffies_delta_to_clock_t(timer_expires - jiffies), 1914 jiffies_delta_to_clock_t(timer_expires - jiffies),