aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-10-10 23:17:39 -0400
committerDavid S. Miller <davem@davemloft.net>2019-10-13 13:13:08 -0400
commitdba7d9b8c739df27ff3a234c81d6c6b23e3986fa (patch)
tree2550420425234768ffbfcdbe2295d1a8b045dfe5
parentd983ea6f16b835dcde2ee9a58a1e764ce68bfccc (diff)
tcp: annotate tp->rcv_nxt lockless reads
There are few places where we fetch tp->rcv_nxt while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Note that tcp_inq_hint() was already using READ_ONCE(tp->rcv_nxt) syzbot reported : BUG: KCSAN: data-race in tcp_poll / tcp_queue_rcv write to 0xffff888120425770 of 4 bytes by interrupt on cpu 0: tcp_rcv_nxt_update net/ipv4/tcp_input.c:3365 [inline] tcp_queue_rcv+0x180/0x380 net/ipv4/tcp_input.c:4638 tcp_rcv_established+0xbf1/0xf50 net/ipv4/tcp_input.c:5616 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1542 tcp_v4_rcv+0x1a03/0x1bf0 net/ipv4/tcp_ipv4.c:1923 ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208 napi_skb_finish net/core/dev.c:5671 [inline] napi_gro_receive+0x28f/0x330 net/core/dev.c:5704 receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061 read to 0xffff888120425770 of 4 bytes by task 7254 on cpu 1: tcp_stream_is_readable net/ipv4/tcp.c:480 [inline] tcp_poll+0x204/0x6b0 net/ipv4/tcp.c:554 sock_poll+0xed/0x250 net/socket.c:1256 vfs_poll include/linux/poll.h:90 [inline] ep_item_poll.isra.0+0x90/0x190 fs/eventpoll.c:892 ep_send_events_proc+0x113/0x5c0 fs/eventpoll.c:1749 ep_scan_ready_list.constprop.0+0x189/0x500 fs/eventpoll.c:704 ep_send_events fs/eventpoll.c:1793 [inline] ep_poll+0xe3/0x900 fs/eventpoll.c:1930 do_epoll_wait+0x162/0x180 fs/eventpoll.c:2294 __do_sys_epoll_pwait fs/eventpoll.c:2325 [inline] __se_sys_epoll_pwait fs/eventpoll.c:2311 [inline] __x64_sys_epoll_pwait+0xcd/0x170 fs/eventpoll.c:2311 do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 7254 Comm: syz-fuzzer Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv6/tcp_ipv6.c3
6 files changed, 15 insertions, 10 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c59d0bd29c5c..883ee863db43 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -477,7 +477,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
477static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, 477static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
478 int target, struct sock *sk) 478 int target, struct sock *sk)
479{ 479{
480 return (tp->rcv_nxt - tp->copied_seq >= target) || 480 return (READ_ONCE(tp->rcv_nxt) - tp->copied_seq >= target) ||
481 (sk->sk_prot->stream_memory_read ? 481 (sk->sk_prot->stream_memory_read ?
482 sk->sk_prot->stream_memory_read(sk) : false); 482 sk->sk_prot->stream_memory_read(sk) : false);
483} 483}
@@ -2935,7 +2935,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2935 else if (tp->repair_queue == TCP_SEND_QUEUE) 2935 else if (tp->repair_queue == TCP_SEND_QUEUE)
2936 tp->write_seq = val; 2936 tp->write_seq = val;
2937 else if (tp->repair_queue == TCP_RECV_QUEUE) 2937 else if (tp->repair_queue == TCP_RECV_QUEUE)
2938 tp->rcv_nxt = val; 2938 WRITE_ONCE(tp->rcv_nxt, val);
2939 else 2939 else
2940 err = -EINVAL; 2940 err = -EINVAL;
2941 break; 2941 break;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 81a8221d650a..cd219161f106 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -26,7 +26,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
26 } else if (sk->sk_type == SOCK_STREAM) { 26 } else if (sk->sk_type == SOCK_STREAM) {
27 const struct tcp_sock *tp = tcp_sk(sk); 27 const struct tcp_sock *tp = tcp_sk(sk);
28 28
29 r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 29 r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - tp->copied_seq, 0);
30 r->idiag_wqueue = tp->write_seq - tp->snd_una; 30 r->idiag_wqueue = tp->write_seq - tp->snd_una;
31 } 31 }
32 if (info) 32 if (info)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5f9b102c3b55..5b7c8768ed5f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3362,7 +3362,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3362 3362
3363 sock_owned_by_me((struct sock *)tp); 3363 sock_owned_by_me((struct sock *)tp);
3364 tp->bytes_received += delta; 3364 tp->bytes_received += delta;
3365 tp->rcv_nxt = seq; 3365 WRITE_ONCE(tp->rcv_nxt, seq);
3366} 3366}
3367 3367
3368/* Update our send window. 3368/* Update our send window.
@@ -5932,7 +5932,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5932 /* Ok.. it's good. Set up sequence numbers and 5932 /* Ok.. it's good. Set up sequence numbers and
5933 * move to established. 5933 * move to established.
5934 */ 5934 */
5935 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 5935 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
5936 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; 5936 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5937 5937
5938 /* RFC1323: The window in SYN & SYN/ACK segments is 5938 /* RFC1323: The window in SYN & SYN/ACK segments is
@@ -6035,7 +6035,7 @@ discard:
6035 tp->tcp_header_len = sizeof(struct tcphdr); 6035 tp->tcp_header_len = sizeof(struct tcphdr);
6036 } 6036 }
6037 6037
6038 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 6038 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6039 tp->copied_seq = tp->rcv_nxt; 6039 tp->copied_seq = tp->rcv_nxt;
6040 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; 6040 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6041 6041
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ffa366099eb2..5089dd6bee0f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2455,7 +2455,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2455 /* Because we don't lock the socket, 2455 /* Because we don't lock the socket,
2456 * we might find a transient negative value. 2456 * we might find a transient negative value.
2457 */ 2457 */
2458 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2458 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2459 tp->copied_seq, 0);
2459 2460
2460 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2461 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2461 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2462 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 5401dbd39c8f..adc6ce486a38 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -462,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
462 struct tcp_request_sock *treq = tcp_rsk(req); 462 struct tcp_request_sock *treq = tcp_rsk(req);
463 struct inet_connection_sock *newicsk; 463 struct inet_connection_sock *newicsk;
464 struct tcp_sock *oldtp, *newtp; 464 struct tcp_sock *oldtp, *newtp;
465 u32 seq;
465 466
466 if (!newsk) 467 if (!newsk)
467 return NULL; 468 return NULL;
@@ -475,8 +476,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
475 /* Now setup tcp_sock */ 476 /* Now setup tcp_sock */
476 newtp->pred_flags = 0; 477 newtp->pred_flags = 0;
477 478
478 newtp->rcv_wup = newtp->copied_seq = 479 seq = treq->rcv_isn + 1;
479 newtp->rcv_nxt = treq->rcv_isn + 1; 480 newtp->rcv_wup = seq;
481 newtp->copied_seq = seq;
482 WRITE_ONCE(newtp->rcv_nxt, seq);
480 newtp->segs_in = 1; 483 newtp->segs_in = 1;
481 484
482 newtp->snd_sml = newtp->snd_una = 485 newtp->snd_sml = newtp->snd_una =
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 45a95e032bdf..89ea0a7018b5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1895,7 +1895,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1895 /* Because we don't lock the socket, 1895 /* Because we don't lock the socket,
1896 * we might find a transient negative value. 1896 * we might find a transient negative value.
1897 */ 1897 */
1898 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 1898 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1899 tp->copied_seq, 0);
1899 1900
1900 seq_printf(seq, 1901 seq_printf(seq,
1901 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1902 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "