summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-10-10 23:17:44 -0400
committerDavid S. Miller <davem@davemloft.net>2019-10-13 13:13:08 -0400
commitebb3b78db7bf842270a46fd4fe7cc45c78fa5ed6 (patch)
tree9a0ecbc667473f785c77851f2db6c31f1fa3d2a4
parentd9b55bf7b6788ec0bd1db1acefbc4feb1399144a (diff)
tcp: annotate sk->sk_rcvbuf lockless reads
For the sake of tcp_poll(), there are few places where we fetch sk->sk_rcvbuf while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Note that other transports probably need similar fixes. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h4
-rw-r--r--include/trace/events/sock.h2
-rw-r--r--net/core/filter.c3
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_input.c7
7 files changed, 15 insertions, 12 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e1d08f69fd39..ab4eb5eb5d07 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1380,14 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
1380/* Note: caller must be prepared to deal with negative returns */ 1380/* Note: caller must be prepared to deal with negative returns */
1381static inline int tcp_space(const struct sock *sk) 1381static inline int tcp_space(const struct sock *sk)
1382{ 1382{
1383 return tcp_win_from_space(sk, sk->sk_rcvbuf - 1383 return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
1384 READ_ONCE(sk->sk_backlog.len) - 1384 READ_ONCE(sk->sk_backlog.len) -
1385 atomic_read(&sk->sk_rmem_alloc)); 1385 atomic_read(&sk->sk_rmem_alloc));
1386} 1386}
1387 1387
1388static inline int tcp_full_space(const struct sock *sk) 1388static inline int tcp_full_space(const struct sock *sk)
1389{ 1389{
1390 return tcp_win_from_space(sk, sk->sk_rcvbuf); 1390 return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
1391} 1391}
1392 1392
1393extern void tcp_openreq_init_rwin(struct request_sock *req, 1393extern void tcp_openreq_init_rwin(struct request_sock *req,
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index a0c4b8a30966..f720c32e7dfd 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full,
82 TP_fast_assign( 82 TP_fast_assign(
83 __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); 83 __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
84 __entry->truesize = skb->truesize; 84 __entry->truesize = skb->truesize;
85 __entry->sk_rcvbuf = sk->sk_rcvbuf; 85 __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf);
86 ), 86 ),
87 87
88 TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d", 88 TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
diff --git a/net/core/filter.c b/net/core/filter.c
index a50c0b6846f2..7deceaeeed7b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4252,7 +4252,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4252 case SO_RCVBUF: 4252 case SO_RCVBUF:
4253 val = min_t(u32, val, sysctl_rmem_max); 4253 val = min_t(u32, val, sysctl_rmem_max);
4254 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 4254 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
4255 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); 4255 WRITE_ONCE(sk->sk_rcvbuf,
4256 max_t(int, val * 2, SOCK_MIN_RCVBUF));
4256 break; 4257 break;
4257 case SO_SNDBUF: 4258 case SO_SNDBUF:
4258 val = min_t(u32, val, sysctl_wmem_max); 4259 val = min_t(u32, val, sysctl_wmem_max);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 529133611ea2..8c178703467b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4415,7 +4415,7 @@ static void skb_set_err_queue(struct sk_buff *skb)
4415int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) 4415int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
4416{ 4416{
4417 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 4417 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
4418 (unsigned int)sk->sk_rcvbuf) 4418 (unsigned int)READ_ONCE(sk->sk_rcvbuf))
4419 return -ENOMEM; 4419 return -ENOMEM;
4420 4420
4421 skb_orphan(skb); 4421 skb_orphan(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 2a053999df11..8c8f61e70141 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -831,7 +831,8 @@ set_rcvbuf:
831 * returning the value we actually used in getsockopt 831 * returning the value we actually used in getsockopt
832 * is the most desirable behavior. 832 * is the most desirable behavior.
833 */ 833 */
834 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); 834 WRITE_ONCE(sk->sk_rcvbuf,
835 max_t(int, val * 2, SOCK_MIN_RCVBUF));
835 break; 836 break;
836 837
837 case SO_RCVBUFFORCE: 838 case SO_RCVBUFFORCE:
@@ -3204,7 +3205,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
3204 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); 3205 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3205 3206
3206 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); 3207 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3207 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; 3208 mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3208 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); 3209 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3209 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; 3210 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
3210 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; 3211 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 577a8c6eef9f..bc0481aa6633 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,7 +451,7 @@ void tcp_init_sock(struct sock *sk)
451 icsk->icsk_sync_mss = tcp_sync_mss; 451 icsk->icsk_sync_mss = tcp_sync_mss;
452 452
453 sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; 453 sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
454 sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; 454 WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
455 455
456 sk_sockets_allocated_inc(sk); 456 sk_sockets_allocated_inc(sk);
457 sk->sk_route_forced_caps = NETIF_F_GSO; 457 sk->sk_route_forced_caps = NETIF_F_GSO;
@@ -1711,7 +1711,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
1711 1711
1712 val <<= 1; 1712 val <<= 1;
1713 if (val > sk->sk_rcvbuf) { 1713 if (val > sk->sk_rcvbuf) {
1714 sk->sk_rcvbuf = val; 1714 WRITE_ONCE(sk->sk_rcvbuf, val);
1715 tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); 1715 tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
1716 } 1716 }
1717 return 0; 1717 return 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 16342e043ab3..6995df20710a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -483,8 +483,9 @@ static void tcp_clamp_window(struct sock *sk)
483 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 483 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
484 !tcp_under_memory_pressure(sk) && 484 !tcp_under_memory_pressure(sk) &&
485 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { 485 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
486 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 486 WRITE_ONCE(sk->sk_rcvbuf,
487 net->ipv4.sysctl_tcp_rmem[2]); 487 min(atomic_read(&sk->sk_rmem_alloc),
488 net->ipv4.sysctl_tcp_rmem[2]));
488 } 489 }
489 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) 490 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
490 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); 491 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -648,7 +649,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
648 rcvbuf = min_t(u64, rcvwin * rcvmem, 649 rcvbuf = min_t(u64, rcvwin * rcvmem,
649 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); 650 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
650 if (rcvbuf > sk->sk_rcvbuf) { 651 if (rcvbuf > sk->sk_rcvbuf) {
651 sk->sk_rcvbuf = rcvbuf; 652 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
652 653
653 /* Make the window clamp follow along. */ 654 /* Make the window clamp follow along. */
654 tp->window_clamp = tcp_win_from_space(sk, rcvbuf); 655 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);