diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /net/unix | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/unix')
-rw-r--r-- | net/unix/af_unix.c | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 87c0360eaa2..fef2cc5e9d2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk) | |||
313 | 313 | ||
314 | static void unix_write_space(struct sock *sk) | 314 | static void unix_write_space(struct sock *sk) |
315 | { | 315 | { |
316 | read_lock(&sk->sk_callback_lock); | 316 | struct socket_wq *wq; |
317 | |||
318 | rcu_read_lock(); | ||
317 | if (unix_writable(sk)) { | 319 | if (unix_writable(sk)) { |
318 | if (sk_has_sleeper(sk)) | 320 | wq = rcu_dereference(sk->sk_wq); |
319 | wake_up_interruptible_sync(sk_sleep(sk)); | 321 | if (wq_has_sleeper(wq)) |
322 | wake_up_interruptible_sync(&wq->wait); | ||
320 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 323 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
321 | } | 324 | } |
322 | read_unlock(&sk->sk_callback_lock); | 325 | rcu_read_unlock(); |
323 | } | 326 | } |
324 | 327 | ||
325 | /* When dgram socket disconnects (or changes its peer), we clear its receive | 328 | /* When dgram socket disconnects (or changes its peer), we clear its receive |
@@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion) | |||
406 | skpair->sk_err = ECONNRESET; | 409 | skpair->sk_err = ECONNRESET; |
407 | unix_state_unlock(skpair); | 410 | unix_state_unlock(skpair); |
408 | skpair->sk_state_change(skpair); | 411 | skpair->sk_state_change(skpair); |
409 | read_lock(&skpair->sk_callback_lock); | ||
410 | sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); | 412 | sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); |
411 | read_unlock(&skpair->sk_callback_lock); | ||
412 | } | 413 | } |
413 | sock_put(skpair); /* It may now die */ | 414 | sock_put(skpair); /* It may now die */ |
414 | unix_peer(sk) = NULL; | 415 | unix_peer(sk) = NULL; |
@@ -1142,7 +1143,7 @@ restart: | |||
1142 | newsk->sk_peercred.pid = task_tgid_vnr(current); | 1143 | newsk->sk_peercred.pid = task_tgid_vnr(current); |
1143 | current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); | 1144 | current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); |
1144 | newu = unix_sk(newsk); | 1145 | newu = unix_sk(newsk); |
1145 | newsk->sk_sleep = &newu->peer_wait; | 1146 | newsk->sk_wq = &newu->peer_wq; |
1146 | otheru = unix_sk(other); | 1147 | otheru = unix_sk(other); |
1147 | 1148 | ||
1148 | /* copy address information from listening to new sock*/ | 1149 | /* copy address information from listening to new sock*/ |
@@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode) | |||
1931 | other->sk_shutdown |= peer_mode; | 1932 | other->sk_shutdown |= peer_mode; |
1932 | unix_state_unlock(other); | 1933 | unix_state_unlock(other); |
1933 | other->sk_state_change(other); | 1934 | other->sk_state_change(other); |
1934 | read_lock(&other->sk_callback_lock); | ||
1935 | if (peer_mode == SHUTDOWN_MASK) | 1935 | if (peer_mode == SHUTDOWN_MASK) |
1936 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); | 1936 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); |
1937 | else if (peer_mode & RCV_SHUTDOWN) | 1937 | else if (peer_mode & RCV_SHUTDOWN) |
1938 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); | 1938 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); |
1939 | read_unlock(&other->sk_callback_lock); | ||
1940 | } | 1939 | } |
1941 | if (other) | 1940 | if (other) |
1942 | sock_put(other); | 1941 | sock_put(other); |