aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-05-22 00:51:19 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-22 13:46:06 -0400
commitd654976cbf852ee20612ee10dbe57cdacda9f452 (patch)
tree22f0214407da7ba40cd381c1277bd0be52f88816 /net
parentc78e1746d3ad7d548bdf3fe491898cc453911a49 (diff)
tcp: fix a potential deadlock in tcp_get_info()
Taking socket spinlock in tcp_get_info() can deadlock, as inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i], while packet processing can use the reverse locking order. We could avoid this locking for TCP_LISTEN states, but lockdep would certainly get confused as all TCP sockets share same lockdep classes. [ 523.722504] ====================================================== [ 523.728706] [ INFO: possible circular locking dependency detected ] [ 523.734990] 4.1.0-dbg-DEV #1676 Not tainted [ 523.739202] ------------------------------------------------------- [ 523.745474] ss/18032 is trying to acquire lock: [ 523.750002] (slock-AF_INET){+.-...}, at: [<ffffffff81669d44>] tcp_get_info+0x2c4/0x360 [ 523.758129] [ 523.758129] but task is already holding lock: [ 523.763968] (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [<ffffffff816bcb75>] inet_diag_dump_icsk+0x1d5/0x6c0 [ 523.774661] [ 523.774661] which lock already depends on the new lock. [ 523.774661] [ 523.782850] [ 523.782850] the existing dependency chain (in reverse order) is: [ 523.790326] -> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}: [ 523.796599] [<ffffffff811126bb>] lock_acquire+0xbb/0x270 [ 523.802565] [<ffffffff816f5868>] _raw_spin_lock+0x38/0x50 [ 523.808628] [<ffffffff81665af8>] __inet_hash_nolisten+0x78/0x110 [ 523.815273] [<ffffffff816819db>] tcp_v4_syn_recv_sock+0x24b/0x350 [ 523.822067] [<ffffffff81684d41>] tcp_check_req+0x3c1/0x500 [ 523.828199] [<ffffffff81682d09>] tcp_v4_do_rcv+0x239/0x3d0 [ 523.834331] [<ffffffff816842fe>] tcp_v4_rcv+0xa8e/0xc10 [ 523.840202] [<ffffffff81658fa3>] ip_local_deliver_finish+0x133/0x3e0 [ 523.847214] [<ffffffff81659a9a>] ip_local_deliver+0xaa/0xc0 [ 523.853440] [<ffffffff816593b8>] ip_rcv_finish+0x168/0x5c0 [ 523.859624] [<ffffffff81659db7>] ip_rcv+0x307/0x420 Lets use u64_sync infrastructure instead. As a bonus, 64bit arches get optimized, as these are nop for them. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_fastopen.c4
-rw-r--r--net/ipv4/tcp_input.c4
3 files changed, 15 insertions, 4 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46efa03d2b11..f1377f2a0472 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -402,6 +402,7 @@ void tcp_init_sock(struct sock *sk)
402 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 402 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
403 tp->snd_cwnd_clamp = ~0; 403 tp->snd_cwnd_clamp = ~0;
404 tp->mss_cache = TCP_MSS_DEFAULT; 404 tp->mss_cache = TCP_MSS_DEFAULT;
405 u64_stats_init(&tp->syncp);
405 406
406 tp->reordering = sysctl_tcp_reordering; 407 tp->reordering = sysctl_tcp_reordering;
407 tcp_enable_early_retrans(tp); 408 tcp_enable_early_retrans(tp);
@@ -2598,6 +2599,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2598 const struct tcp_sock *tp = tcp_sk(sk); 2599 const struct tcp_sock *tp = tcp_sk(sk);
2599 const struct inet_connection_sock *icsk = inet_csk(sk); 2600 const struct inet_connection_sock *icsk = inet_csk(sk);
2600 u32 now = tcp_time_stamp; 2601 u32 now = tcp_time_stamp;
2602 unsigned int start;
2601 u32 rate; 2603 u32 rate;
2602 2604
2603 memset(info, 0, sizeof(*info)); 2605 memset(info, 0, sizeof(*info));
@@ -2665,10 +2667,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2665 rate = READ_ONCE(sk->sk_max_pacing_rate); 2667 rate = READ_ONCE(sk->sk_max_pacing_rate);
2666 info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; 2668 info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
2667 2669
2668 spin_lock_bh(&sk->sk_lock.slock); 2670 do {
2669 info->tcpi_bytes_acked = tp->bytes_acked; 2671 start = u64_stats_fetch_begin_irq(&tp->syncp);
2670 info->tcpi_bytes_received = tp->bytes_received; 2672 info->tcpi_bytes_acked = tp->bytes_acked;
2671 spin_unlock_bh(&sk->sk_lock.slock); 2673 info->tcpi_bytes_received = tp->bytes_received;
2674 } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
2672} 2675}
2673EXPORT_SYMBOL_GPL(tcp_get_info); 2676EXPORT_SYMBOL_GPL(tcp_get_info);
2674 2677
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 3c673d5e6cff..46b087a27503 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -206,6 +206,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
206 skb_set_owner_r(skb2, child); 206 skb_set_owner_r(skb2, child);
207 __skb_queue_tail(&child->sk_receive_queue, skb2); 207 __skb_queue_tail(&child->sk_receive_queue, skb2);
208 tp->syn_data_acked = 1; 208 tp->syn_data_acked = 1;
209
210 /* u64_stats_update_begin(&tp->syncp) not needed here,
211 * as we certainly are not changing upper 32bit value (0)
212 */
209 tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; 213 tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
210 } else { 214 } else {
211 end_seq = TCP_SKB_CB(skb)->seq + 1; 215 end_seq = TCP_SKB_CB(skb)->seq + 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 243d674b3ef5..c9ab964189a0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3286,7 +3286,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3286{ 3286{
3287 u32 delta = ack - tp->snd_una; 3287 u32 delta = ack - tp->snd_una;
3288 3288
3289 u64_stats_update_begin(&tp->syncp);
3289 tp->bytes_acked += delta; 3290 tp->bytes_acked += delta;
3291 u64_stats_update_end(&tp->syncp);
3290 tp->snd_una = ack; 3292 tp->snd_una = ack;
3291} 3293}
3292 3294
@@ -3295,7 +3297,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3295{ 3297{
3296 u32 delta = seq - tp->rcv_nxt; 3298 u32 delta = seq - tp->rcv_nxt;
3297 3299
3300 u64_stats_update_begin(&tp->syncp);
3298 tp->bytes_received += delta; 3301 tp->bytes_received += delta;
3302 u64_stats_update_end(&tp->syncp);
3299 tp->rcv_nxt = seq; 3303 tp->rcv_nxt = seq;
3300} 3304}
3301 3305