diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-09-22 08:43:39 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-09-25 01:26:10 -0400 |
commit | f064af1e500a2bf4607706f0f458163bdb2a6ea5 (patch) | |
tree | ebbbd1c3c00030c70cab6fe587a5c03252a46ebc /net/rds | |
parent | 605c82bab5abe0816e5e32716875c245f89f39da (diff) |
net: fix a lockdep splat
We have for each socket :
One spinlock (sk_slock.slock)
One rwlock (sk_callback_lock)
Possible scenarios are :
(A) (this is used in net/sunrpc/xprtsock.c)
read_lock(&sk->sk_callback_lock) (without blocking BH)
<BH>
spin_lock(&sk->sk_slock.slock);
...
read_lock(&sk->sk_callback_lock);
...
(B)
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
(C)
spin_lock_bh(&sk->sk_slock)
...
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
spin_unlock_bh(&sk->sk_slock)
This (C) case conflicts with (A) :
CPU1 [A] CPU2 [C]
read_lock(callback_lock)
<BH> spin_lock_bh(slock)
<wait to spin_lock(slock)>
<wait to write_lock_bh(callback_lock)>
We have one problematic (C) use case in inet_csk_listen_stop() :
local_bh_disable();
bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock)
WARN_ON(sock_owned_by_user(child));
...
sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock)
lockdep is not happy with this, as reported by Tetsuo Handa
It seems only way to deal with this is to use read_lock_bh(callbacklock)
everywhere.
Thanks to Jarek for pointing a bug in my first attempt and suggesting
this solution.
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/tcp_connect.c | 4 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 4 | ||||
-rw-r--r-- | net/rds/tcp_recv.c | 4 | ||||
-rw-r--r-- | net/rds/tcp_send.c | 4 |
4 files changed, 8 insertions, 8 deletions
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index c397524c039c..c519939e8da9 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c | |||
@@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) | |||
43 | struct rds_connection *conn; | 43 | struct rds_connection *conn; |
44 | struct rds_tcp_connection *tc; | 44 | struct rds_tcp_connection *tc; |
45 | 45 | ||
46 | read_lock(&sk->sk_callback_lock); | 46 | read_lock_bh(&sk->sk_callback_lock); |
47 | conn = sk->sk_user_data; | 47 | conn = sk->sk_user_data; |
48 | if (conn == NULL) { | 48 | if (conn == NULL) { |
49 | state_change = sk->sk_state_change; | 49 | state_change = sk->sk_state_change; |
@@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) | |||
68 | break; | 68 | break; |
69 | } | 69 | } |
70 | out: | 70 | out: |
71 | read_unlock(&sk->sk_callback_lock); | 71 | read_unlock_bh(&sk->sk_callback_lock); |
72 | state_change(sk); | 72 | state_change(sk); |
73 | } | 73 | } |
74 | 74 | ||
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 975183fe6950..27844f231d10 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c | |||
@@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) | |||
114 | 114 | ||
115 | rdsdebug("listen data ready sk %p\n", sk); | 115 | rdsdebug("listen data ready sk %p\n", sk); |
116 | 116 | ||
117 | read_lock(&sk->sk_callback_lock); | 117 | read_lock_bh(&sk->sk_callback_lock); |
118 | ready = sk->sk_user_data; | 118 | ready = sk->sk_user_data; |
119 | if (ready == NULL) { /* check for teardown race */ | 119 | if (ready == NULL) { /* check for teardown race */ |
120 | ready = sk->sk_data_ready; | 120 | ready = sk->sk_data_ready; |
@@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) | |||
131 | queue_work(rds_wq, &rds_tcp_listen_work); | 131 | queue_work(rds_wq, &rds_tcp_listen_work); |
132 | 132 | ||
133 | out: | 133 | out: |
134 | read_unlock(&sk->sk_callback_lock); | 134 | read_unlock_bh(&sk->sk_callback_lock); |
135 | ready(sk, bytes); | 135 | ready(sk, bytes); |
136 | } | 136 | } |
137 | 137 | ||
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 1aba6878fa5d..e43797404102 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c | |||
@@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) | |||
324 | 324 | ||
325 | rdsdebug("data ready sk %p bytes %d\n", sk, bytes); | 325 | rdsdebug("data ready sk %p bytes %d\n", sk, bytes); |
326 | 326 | ||
327 | read_lock(&sk->sk_callback_lock); | 327 | read_lock_bh(&sk->sk_callback_lock); |
328 | conn = sk->sk_user_data; | 328 | conn = sk->sk_user_data; |
329 | if (conn == NULL) { /* check for teardown race */ | 329 | if (conn == NULL) { /* check for teardown race */ |
330 | ready = sk->sk_data_ready; | 330 | ready = sk->sk_data_ready; |
@@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) | |||
338 | if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) | 338 | if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) |
339 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | 339 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); |
340 | out: | 340 | out: |
341 | read_unlock(&sk->sk_callback_lock); | 341 | read_unlock_bh(&sk->sk_callback_lock); |
342 | ready(sk, bytes); | 342 | ready(sk, bytes); |
343 | } | 343 | } |
344 | 344 | ||
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index a28b895ff0d1..2f012a07d94d 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c | |||
@@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk) | |||
224 | struct rds_connection *conn; | 224 | struct rds_connection *conn; |
225 | struct rds_tcp_connection *tc; | 225 | struct rds_tcp_connection *tc; |
226 | 226 | ||
227 | read_lock(&sk->sk_callback_lock); | 227 | read_lock_bh(&sk->sk_callback_lock); |
228 | conn = sk->sk_user_data; | 228 | conn = sk->sk_user_data; |
229 | if (conn == NULL) { | 229 | if (conn == NULL) { |
230 | write_space = sk->sk_write_space; | 230 | write_space = sk->sk_write_space; |
@@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk) | |||
244 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | 244 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); |
245 | 245 | ||
246 | out: | 246 | out: |
247 | read_unlock(&sk->sk_callback_lock); | 247 | read_unlock_bh(&sk->sk_callback_lock); |
248 | 248 | ||
249 | /* | 249 | /* |
250 | * write_space is only called when data leaves tcp's send queue if | 250 | * write_space is only called when data leaves tcp's send queue if |