diff options
author | Eric Dumazet <edumazet@google.com> | 2019-10-10 23:17:41 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-10-13 13:13:08 -0400 |
commit | 0f31746452e6793ad6271337438af8f4defb8940 (patch) | |
tree | b30e39e3839addc642ceb8c87ce5e20be2c79fae | |
parent | 7db48e983930285b765743ebd665aecf9850582b (diff) |
tcp: annotate tp->write_seq lockless reads
There are few places where we fetch tp->write_seq while
this field can change from IRQ or other cpu.
We need to add READ_ONCE() annotations, and also make
sure write sides use corresponding WRITE_ONCE() to avoid
store-tearing.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/tcp.h | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 20 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 21 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 4 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 13 |
7 files changed, 36 insertions, 28 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 35f6f7e0fdc2..8e7c3f6801a9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1917,7 +1917,7 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) | |||
1917 | static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) | 1917 | static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) |
1918 | { | 1918 | { |
1919 | const struct tcp_sock *tp = tcp_sk(sk); | 1919 | const struct tcp_sock *tp = tcp_sk(sk); |
1920 | u32 notsent_bytes = tp->write_seq - tp->snd_nxt; | 1920 | u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt; |
1921 | 1921 | ||
1922 | return (notsent_bytes << wake) < tcp_notsent_lowat(tp); | 1922 | return (notsent_bytes << wake) < tcp_notsent_lowat(tp); |
1923 | } | 1923 | } |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c322ad071e17..96dd65cbeb85 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -616,7 +616,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
616 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) | 616 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) |
617 | answ = 0; | 617 | answ = 0; |
618 | else | 618 | else |
619 | answ = tp->write_seq - tp->snd_una; | 619 | answ = READ_ONCE(tp->write_seq) - tp->snd_una; |
620 | break; | 620 | break; |
621 | case SIOCOUTQNSD: | 621 | case SIOCOUTQNSD: |
622 | if (sk->sk_state == TCP_LISTEN) | 622 | if (sk->sk_state == TCP_LISTEN) |
@@ -625,7 +625,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
625 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) | 625 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) |
626 | answ = 0; | 626 | answ = 0; |
627 | else | 627 | else |
628 | answ = tp->write_seq - tp->snd_nxt; | 628 | answ = READ_ONCE(tp->write_seq) - tp->snd_nxt; |
629 | break; | 629 | break; |
630 | default: | 630 | default: |
631 | return -ENOIOCTLCMD; | 631 | return -ENOIOCTLCMD; |
@@ -1035,7 +1035,7 @@ new_segment: | |||
1035 | sk->sk_wmem_queued += copy; | 1035 | sk->sk_wmem_queued += copy; |
1036 | sk_mem_charge(sk, copy); | 1036 | sk_mem_charge(sk, copy); |
1037 | skb->ip_summed = CHECKSUM_PARTIAL; | 1037 | skb->ip_summed = CHECKSUM_PARTIAL; |
1038 | tp->write_seq += copy; | 1038 | WRITE_ONCE(tp->write_seq, tp->write_seq + copy); |
1039 | TCP_SKB_CB(skb)->end_seq += copy; | 1039 | TCP_SKB_CB(skb)->end_seq += copy; |
1040 | tcp_skb_pcount_set(skb, 0); | 1040 | tcp_skb_pcount_set(skb, 0); |
1041 | 1041 | ||
@@ -1362,7 +1362,7 @@ new_segment: | |||
1362 | if (!copied) | 1362 | if (!copied) |
1363 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; | 1363 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; |
1364 | 1364 | ||
1365 | tp->write_seq += copy; | 1365 | WRITE_ONCE(tp->write_seq, tp->write_seq + copy); |
1366 | TCP_SKB_CB(skb)->end_seq += copy; | 1366 | TCP_SKB_CB(skb)->end_seq += copy; |
1367 | tcp_skb_pcount_set(skb, 0); | 1367 | tcp_skb_pcount_set(skb, 0); |
1368 | 1368 | ||
@@ -2562,6 +2562,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2562 | struct inet_connection_sock *icsk = inet_csk(sk); | 2562 | struct inet_connection_sock *icsk = inet_csk(sk); |
2563 | struct tcp_sock *tp = tcp_sk(sk); | 2563 | struct tcp_sock *tp = tcp_sk(sk); |
2564 | int old_state = sk->sk_state; | 2564 | int old_state = sk->sk_state; |
2565 | u32 seq; | ||
2565 | 2566 | ||
2566 | if (old_state != TCP_CLOSE) | 2567 | if (old_state != TCP_CLOSE) |
2567 | tcp_set_state(sk, TCP_CLOSE); | 2568 | tcp_set_state(sk, TCP_CLOSE); |
@@ -2604,9 +2605,12 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2604 | tp->srtt_us = 0; | 2605 | tp->srtt_us = 0; |
2605 | tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); | 2606 | tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); |
2606 | tp->rcv_rtt_last_tsecr = 0; | 2607 | tp->rcv_rtt_last_tsecr = 0; |
2607 | tp->write_seq += tp->max_window + 2; | 2608 | |
2608 | if (tp->write_seq == 0) | 2609 | seq = tp->write_seq + tp->max_window + 2; |
2609 | tp->write_seq = 1; | 2610 | if (!seq) |
2611 | seq = 1; | ||
2612 | WRITE_ONCE(tp->write_seq, seq); | ||
2613 | |||
2610 | icsk->icsk_backoff = 0; | 2614 | icsk->icsk_backoff = 0; |
2611 | tp->snd_cwnd = 2; | 2615 | tp->snd_cwnd = 2; |
2612 | icsk->icsk_probes_out = 0; | 2616 | icsk->icsk_probes_out = 0; |
@@ -2933,7 +2937,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2933 | if (sk->sk_state != TCP_CLOSE) | 2937 | if (sk->sk_state != TCP_CLOSE) |
2934 | err = -EPERM; | 2938 | err = -EPERM; |
2935 | else if (tp->repair_queue == TCP_SEND_QUEUE) | 2939 | else if (tp->repair_queue == TCP_SEND_QUEUE) |
2936 | tp->write_seq = val; | 2940 | WRITE_ONCE(tp->write_seq, val); |
2937 | else if (tp->repair_queue == TCP_RECV_QUEUE) | 2941 | else if (tp->repair_queue == TCP_RECV_QUEUE) |
2938 | WRITE_ONCE(tp->rcv_nxt, val); | 2942 | WRITE_ONCE(tp->rcv_nxt, val); |
2939 | else | 2943 | else |
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 66273c8a55c2..549506162dde 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
@@ -28,7 +28,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, | |||
28 | 28 | ||
29 | r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - | 29 | r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - |
30 | READ_ONCE(tp->copied_seq), 0); | 30 | READ_ONCE(tp->copied_seq), 0); |
31 | r->idiag_wqueue = tp->write_seq - tp->snd_una; | 31 | r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; |
32 | } | 32 | } |
33 | if (info) | 33 | if (info) |
34 | tcp_get_info(sk, info); | 34 | tcp_get_info(sk, info); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 39560f482e0b..6be568334848 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -164,9 +164,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) | |||
164 | * without appearing to create any others. | 164 | * without appearing to create any others. |
165 | */ | 165 | */ |
166 | if (likely(!tp->repair)) { | 166 | if (likely(!tp->repair)) { |
167 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; | 167 | u32 seq = tcptw->tw_snd_nxt + 65535 + 2; |
168 | if (tp->write_seq == 0) | 168 | |
169 | tp->write_seq = 1; | 169 | if (!seq) |
170 | seq = 1; | ||
171 | WRITE_ONCE(tp->write_seq, seq); | ||
170 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; | 172 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; |
171 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; | 173 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
172 | } | 174 | } |
@@ -253,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
253 | tp->rx_opt.ts_recent = 0; | 255 | tp->rx_opt.ts_recent = 0; |
254 | tp->rx_opt.ts_recent_stamp = 0; | 256 | tp->rx_opt.ts_recent_stamp = 0; |
255 | if (likely(!tp->repair)) | 257 | if (likely(!tp->repair)) |
256 | tp->write_seq = 0; | 258 | WRITE_ONCE(tp->write_seq, 0); |
257 | } | 259 | } |
258 | 260 | ||
259 | inet->inet_dport = usin->sin_port; | 261 | inet->inet_dport = usin->sin_port; |
@@ -291,10 +293,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
291 | 293 | ||
292 | if (likely(!tp->repair)) { | 294 | if (likely(!tp->repair)) { |
293 | if (!tp->write_seq) | 295 | if (!tp->write_seq) |
294 | tp->write_seq = secure_tcp_seq(inet->inet_saddr, | 296 | WRITE_ONCE(tp->write_seq, |
295 | inet->inet_daddr, | 297 | secure_tcp_seq(inet->inet_saddr, |
296 | inet->inet_sport, | 298 | inet->inet_daddr, |
297 | usin->sin_port); | 299 | inet->inet_sport, |
300 | usin->sin_port)); | ||
298 | tp->tsoffset = secure_tcp_ts_off(sock_net(sk), | 301 | tp->tsoffset = secure_tcp_ts_off(sock_net(sk), |
299 | inet->inet_saddr, | 302 | inet->inet_saddr, |
300 | inet->inet_daddr); | 303 | inet->inet_daddr); |
@@ -2461,7 +2464,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) | |||
2461 | seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " | 2464 | seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " |
2462 | "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", | 2465 | "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", |
2463 | i, src, srcp, dest, destp, state, | 2466 | i, src, srcp, dest, destp, state, |
2464 | tp->write_seq - tp->snd_una, | 2467 | READ_ONCE(tp->write_seq) - tp->snd_una, |
2465 | rx_queue, | 2468 | rx_queue, |
2466 | timer_active, | 2469 | timer_active, |
2467 | jiffies_delta_to_clock_t(timer_expires - jiffies), | 2470 | jiffies_delta_to_clock_t(timer_expires - jiffies), |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c4731d26ab4a..339944690329 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -498,7 +498,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, | |||
498 | newtp->total_retrans = req->num_retrans; | 498 | newtp->total_retrans = req->num_retrans; |
499 | 499 | ||
500 | tcp_init_xmit_timers(newsk); | 500 | tcp_init_xmit_timers(newsk); |
501 | newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; | 501 | WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); |
502 | 502 | ||
503 | if (sock_flag(newsk, SOCK_KEEPOPEN)) | 503 | if (sock_flag(newsk, SOCK_KEEPOPEN)) |
504 | inet_csk_reset_keepalive_timer(newsk, | 504 | inet_csk_reset_keepalive_timer(newsk, |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7dda12720169..c17c2a78809d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1196,7 +1196,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
1196 | struct tcp_sock *tp = tcp_sk(sk); | 1196 | struct tcp_sock *tp = tcp_sk(sk); |
1197 | 1197 | ||
1198 | /* Advance write_seq and place onto the write_queue. */ | 1198 | /* Advance write_seq and place onto the write_queue. */ |
1199 | tp->write_seq = TCP_SKB_CB(skb)->end_seq; | 1199 | WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq); |
1200 | __skb_header_release(skb); | 1200 | __skb_header_release(skb); |
1201 | tcp_add_write_queue_tail(sk, skb); | 1201 | tcp_add_write_queue_tail(sk, skb); |
1202 | sk->sk_wmem_queued += skb->truesize; | 1202 | sk->sk_wmem_queued += skb->truesize; |
@@ -3449,7 +3449,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
3449 | __skb_header_release(skb); | 3449 | __skb_header_release(skb); |
3450 | sk->sk_wmem_queued += skb->truesize; | 3450 | sk->sk_wmem_queued += skb->truesize; |
3451 | sk_mem_charge(sk, skb->truesize); | 3451 | sk_mem_charge(sk, skb->truesize); |
3452 | tp->write_seq = tcb->end_seq; | 3452 | WRITE_ONCE(tp->write_seq, tcb->end_seq); |
3453 | tp->packets_out += tcp_skb_pcount(skb); | 3453 | tp->packets_out += tcp_skb_pcount(skb); |
3454 | } | 3454 | } |
3455 | 3455 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a62c7042fc4a..4804b6dc5e65 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -215,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
215 | !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { | 215 | !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { |
216 | tp->rx_opt.ts_recent = 0; | 216 | tp->rx_opt.ts_recent = 0; |
217 | tp->rx_opt.ts_recent_stamp = 0; | 217 | tp->rx_opt.ts_recent_stamp = 0; |
218 | tp->write_seq = 0; | 218 | WRITE_ONCE(tp->write_seq, 0); |
219 | } | 219 | } |
220 | 220 | ||
221 | sk->sk_v6_daddr = usin->sin6_addr; | 221 | sk->sk_v6_daddr = usin->sin6_addr; |
@@ -311,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
311 | 311 | ||
312 | if (likely(!tp->repair)) { | 312 | if (likely(!tp->repair)) { |
313 | if (!tp->write_seq) | 313 | if (!tp->write_seq) |
314 | tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, | 314 | WRITE_ONCE(tp->write_seq, |
315 | sk->sk_v6_daddr.s6_addr32, | 315 | secure_tcpv6_seq(np->saddr.s6_addr32, |
316 | inet->inet_sport, | 316 | sk->sk_v6_daddr.s6_addr32, |
317 | inet->inet_dport); | 317 | inet->inet_sport, |
318 | inet->inet_dport)); | ||
318 | tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), | 319 | tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), |
319 | np->saddr.s6_addr32, | 320 | np->saddr.s6_addr32, |
320 | sk->sk_v6_daddr.s6_addr32); | 321 | sk->sk_v6_daddr.s6_addr32); |
@@ -1907,7 +1908,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) | |||
1907 | dest->s6_addr32[0], dest->s6_addr32[1], | 1908 | dest->s6_addr32[0], dest->s6_addr32[1], |
1908 | dest->s6_addr32[2], dest->s6_addr32[3], destp, | 1909 | dest->s6_addr32[2], dest->s6_addr32[3], destp, |
1909 | state, | 1910 | state, |
1910 | tp->write_seq - tp->snd_una, | 1911 | READ_ONCE(tp->write_seq) - tp->snd_una, |
1911 | rx_queue, | 1912 | rx_queue, |
1912 | timer_active, | 1913 | timer_active, |
1913 | jiffies_delta_to_clock_t(timer_expires - jiffies), | 1914 | jiffies_delta_to_clock_t(timer_expires - jiffies), |