diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/macvtap.c | 13 | ||||
-rw-r--r-- | drivers/net/tun.c | 21 | ||||
-rw-r--r-- | include/linux/net.h | 14 | ||||
-rw-r--r-- | include/net/af_unix.h | 20 | ||||
-rw-r--r-- | include/net/sock.h | 38 | ||||
-rw-r--r-- | net/atm/common.c | 22 | ||||
-rw-r--r-- | net/core/sock.c | 50 | ||||
-rw-r--r-- | net/core/stream.c | 10 | ||||
-rw-r--r-- | net/dccp/output.c | 10 | ||||
-rw-r--r-- | net/iucv/af_iucv.c | 11 | ||||
-rw-r--r-- | net/phonet/pep.c | 8 | ||||
-rw-r--r-- | net/phonet/socket.c | 2 | ||||
-rw-r--r-- | net/rxrpc/af_rxrpc.c | 10 | ||||
-rw-r--r-- | net/sctp/socket.c | 2 | ||||
-rw-r--r-- | net/socket.c | 47 | ||||
-rw-r--r-- | net/unix/af_unix.c | 17 |
16 files changed, 181 insertions, 114 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d97e1fd234ba..1c4110df343e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -37,6 +37,7 @@ | |||
37 | struct macvtap_queue { | 37 | struct macvtap_queue { |
38 | struct sock sk; | 38 | struct sock sk; |
39 | struct socket sock; | 39 | struct socket sock; |
40 | struct socket_wq wq; | ||
40 | struct macvlan_dev *vlan; | 41 | struct macvlan_dev *vlan; |
41 | struct file *file; | 42 | struct file *file; |
42 | unsigned int flags; | 43 | unsigned int flags; |
@@ -242,12 +243,15 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { | |||
242 | 243 | ||
243 | static void macvtap_sock_write_space(struct sock *sk) | 244 | static void macvtap_sock_write_space(struct sock *sk) |
244 | { | 245 | { |
246 | wait_queue_head_t *wqueue; | ||
247 | |||
245 | if (!sock_writeable(sk) || | 248 | if (!sock_writeable(sk) || |
246 | !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) | 249 | !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) |
247 | return; | 250 | return; |
248 | 251 | ||
249 | if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 252 | wqueue = sk_sleep(sk); |
250 | wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); | 253 | if (wqueue && waitqueue_active(wqueue)) |
254 | wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); | ||
251 | } | 255 | } |
252 | 256 | ||
253 | static int macvtap_open(struct inode *inode, struct file *file) | 257 | static int macvtap_open(struct inode *inode, struct file *file) |
@@ -272,7 +276,8 @@ static int macvtap_open(struct inode *inode, struct file *file) | |||
272 | if (!q) | 276 | if (!q) |
273 | goto out; | 277 | goto out; |
274 | 278 | ||
275 | init_waitqueue_head(&q->sock.wait); | 279 | q->sock.wq = &q->wq; |
280 | init_waitqueue_head(&q->wq.wait); | ||
276 | q->sock.type = SOCK_RAW; | 281 | q->sock.type = SOCK_RAW; |
277 | q->sock.state = SS_CONNECTED; | 282 | q->sock.state = SS_CONNECTED; |
278 | q->sock.file = file; | 283 | q->sock.file = file; |
@@ -308,7 +313,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) | |||
308 | goto out; | 313 | goto out; |
309 | 314 | ||
310 | mask = 0; | 315 | mask = 0; |
311 | poll_wait(file, &q->sock.wait, wait); | 316 | poll_wait(file, &q->wq.wait, wait); |
312 | 317 | ||
313 | if (!skb_queue_empty(&q->sk.sk_receive_queue)) | 318 | if (!skb_queue_empty(&q->sk.sk_receive_queue)) |
314 | mask |= POLLIN | POLLRDNORM; | 319 | mask |= POLLIN | POLLRDNORM; |
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 20a17938c62b..e525a6cf5587 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -109,7 +109,7 @@ struct tun_struct { | |||
109 | 109 | ||
110 | struct tap_filter txflt; | 110 | struct tap_filter txflt; |
111 | struct socket socket; | 111 | struct socket socket; |
112 | 112 | struct socket_wq wq; | |
113 | #ifdef TUN_DEBUG | 113 | #ifdef TUN_DEBUG |
114 | int debug; | 114 | int debug; |
115 | #endif | 115 | #endif |
@@ -323,7 +323,7 @@ static void tun_net_uninit(struct net_device *dev) | |||
323 | /* Inform the methods they need to stop using the dev. | 323 | /* Inform the methods they need to stop using the dev. |
324 | */ | 324 | */ |
325 | if (tfile) { | 325 | if (tfile) { |
326 | wake_up_all(&tun->socket.wait); | 326 | wake_up_all(&tun->wq.wait); |
327 | if (atomic_dec_and_test(&tfile->count)) | 327 | if (atomic_dec_and_test(&tfile->count)) |
328 | __tun_detach(tun); | 328 | __tun_detach(tun); |
329 | } | 329 | } |
@@ -398,7 +398,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) | |||
398 | /* Notify and wake up reader process */ | 398 | /* Notify and wake up reader process */ |
399 | if (tun->flags & TUN_FASYNC) | 399 | if (tun->flags & TUN_FASYNC) |
400 | kill_fasync(&tun->fasync, SIGIO, POLL_IN); | 400 | kill_fasync(&tun->fasync, SIGIO, POLL_IN); |
401 | wake_up_interruptible_poll(&tun->socket.wait, POLLIN | | 401 | wake_up_interruptible_poll(&tun->wq.wait, POLLIN | |
402 | POLLRDNORM | POLLRDBAND); | 402 | POLLRDNORM | POLLRDBAND); |
403 | return NETDEV_TX_OK; | 403 | return NETDEV_TX_OK; |
404 | 404 | ||
@@ -498,7 +498,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) | |||
498 | 498 | ||
499 | DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); | 499 | DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); |
500 | 500 | ||
501 | poll_wait(file, &tun->socket.wait, wait); | 501 | poll_wait(file, &tun->wq.wait, wait); |
502 | 502 | ||
503 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 503 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
504 | mask |= POLLIN | POLLRDNORM; | 504 | mask |= POLLIN | POLLRDNORM; |
@@ -773,7 +773,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, | |||
773 | 773 | ||
774 | DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); | 774 | DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); |
775 | 775 | ||
776 | add_wait_queue(&tun->socket.wait, &wait); | 776 | add_wait_queue(&tun->wq.wait, &wait); |
777 | while (len) { | 777 | while (len) { |
778 | current->state = TASK_INTERRUPTIBLE; | 778 | current->state = TASK_INTERRUPTIBLE; |
779 | 779 | ||
@@ -804,7 +804,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, | |||
804 | } | 804 | } |
805 | 805 | ||
806 | current->state = TASK_RUNNING; | 806 | current->state = TASK_RUNNING; |
807 | remove_wait_queue(&tun->socket.wait, &wait); | 807 | remove_wait_queue(&tun->wq.wait, &wait); |
808 | 808 | ||
809 | return ret; | 809 | return ret; |
810 | } | 810 | } |
@@ -861,6 +861,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { | |||
861 | static void tun_sock_write_space(struct sock *sk) | 861 | static void tun_sock_write_space(struct sock *sk) |
862 | { | 862 | { |
863 | struct tun_struct *tun; | 863 | struct tun_struct *tun; |
864 | wait_queue_head_t *wqueue; | ||
864 | 865 | ||
865 | if (!sock_writeable(sk)) | 866 | if (!sock_writeable(sk)) |
866 | return; | 867 | return; |
@@ -868,8 +869,9 @@ static void tun_sock_write_space(struct sock *sk) | |||
868 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) | 869 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) |
869 | return; | 870 | return; |
870 | 871 | ||
871 | if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 872 | wqueue = sk_sleep(sk); |
872 | wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | | 873 | if (wqueue && waitqueue_active(wqueue)) |
874 | wake_up_interruptible_sync_poll(wqueue, POLLOUT | | ||
873 | POLLWRNORM | POLLWRBAND); | 875 | POLLWRNORM | POLLWRBAND); |
874 | 876 | ||
875 | tun = tun_sk(sk)->tun; | 877 | tun = tun_sk(sk)->tun; |
@@ -1039,7 +1041,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1039 | if (!sk) | 1041 | if (!sk) |
1040 | goto err_free_dev; | 1042 | goto err_free_dev; |
1041 | 1043 | ||
1042 | init_waitqueue_head(&tun->socket.wait); | 1044 | tun->socket.wq = &tun->wq; |
1045 | init_waitqueue_head(&tun->wq.wait); | ||
1043 | tun->socket.ops = &tun_socket_ops; | 1046 | tun->socket.ops = &tun_socket_ops; |
1044 | sock_init_data(&tun->socket, sk); | 1047 | sock_init_data(&tun->socket, sk); |
1045 | sk->sk_write_space = tun_sock_write_space; | 1048 | sk->sk_write_space = tun_sock_write_space; |
diff --git a/include/linux/net.h b/include/linux/net.h index 4157b5d42bd6..2b4deeeb8646 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -59,6 +59,7 @@ typedef enum { | |||
59 | #include <linux/wait.h> | 59 | #include <linux/wait.h> |
60 | #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ | 60 | #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ |
61 | #include <linux/kmemcheck.h> | 61 | #include <linux/kmemcheck.h> |
62 | #include <linux/rcupdate.h> | ||
62 | 63 | ||
63 | struct poll_table_struct; | 64 | struct poll_table_struct; |
64 | struct pipe_inode_info; | 65 | struct pipe_inode_info; |
@@ -116,6 +117,12 @@ enum sock_shutdown_cmd { | |||
116 | SHUT_RDWR = 2, | 117 | SHUT_RDWR = 2, |
117 | }; | 118 | }; |
118 | 119 | ||
120 | struct socket_wq { | ||
121 | wait_queue_head_t wait; | ||
122 | struct fasync_struct *fasync_list; | ||
123 | struct rcu_head rcu; | ||
124 | } ____cacheline_aligned_in_smp; | ||
125 | |||
119 | /** | 126 | /** |
120 | * struct socket - general BSD socket | 127 | * struct socket - general BSD socket |
121 | * @state: socket state (%SS_CONNECTED, etc) | 128 | * @state: socket state (%SS_CONNECTED, etc) |
@@ -135,11 +142,8 @@ struct socket { | |||
135 | kmemcheck_bitfield_end(type); | 142 | kmemcheck_bitfield_end(type); |
136 | 143 | ||
137 | unsigned long flags; | 144 | unsigned long flags; |
138 | /* | 145 | |
139 | * Please keep fasync_list & wait fields in the same cache line | 146 | struct socket_wq *wq; |
140 | */ | ||
141 | struct fasync_struct *fasync_list; | ||
142 | wait_queue_head_t wait; | ||
143 | 147 | ||
144 | struct file *file; | 148 | struct file *file; |
145 | struct sock *sk; | 149 | struct sock *sk; |
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1614d78c60ed..20725e213aee 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
@@ -30,7 +30,7 @@ struct unix_skb_parms { | |||
30 | #endif | 30 | #endif |
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) | 33 | #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) |
34 | #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) | 34 | #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) |
35 | #define UNIXSID(skb) (&UNIXCB((skb)).secid) | 35 | #define UNIXSID(skb) (&UNIXCB((skb)).secid) |
36 | 36 | ||
@@ -45,21 +45,23 @@ struct unix_skb_parms { | |||
45 | struct unix_sock { | 45 | struct unix_sock { |
46 | /* WARNING: sk has to be the first member */ | 46 | /* WARNING: sk has to be the first member */ |
47 | struct sock sk; | 47 | struct sock sk; |
48 | struct unix_address *addr; | 48 | struct unix_address *addr; |
49 | struct dentry *dentry; | 49 | struct dentry *dentry; |
50 | struct vfsmount *mnt; | 50 | struct vfsmount *mnt; |
51 | struct mutex readlock; | 51 | struct mutex readlock; |
52 | struct sock *peer; | 52 | struct sock *peer; |
53 | struct sock *other; | 53 | struct sock *other; |
54 | struct list_head link; | 54 | struct list_head link; |
55 | atomic_long_t inflight; | 55 | atomic_long_t inflight; |
56 | spinlock_t lock; | 56 | spinlock_t lock; |
57 | unsigned int gc_candidate : 1; | 57 | unsigned int gc_candidate : 1; |
58 | unsigned int gc_maybe_cycle : 1; | 58 | unsigned int gc_maybe_cycle : 1; |
59 | wait_queue_head_t peer_wait; | 59 | struct socket_wq peer_wq; |
60 | }; | 60 | }; |
61 | #define unix_sk(__sk) ((struct unix_sock *)__sk) | 61 | #define unix_sk(__sk) ((struct unix_sock *)__sk) |
62 | 62 | ||
63 | #define peer_wait peer_wq.wait | ||
64 | |||
63 | #ifdef CONFIG_SYSCTL | 65 | #ifdef CONFIG_SYSCTL |
64 | extern int unix_sysctl_register(struct net *net); | 66 | extern int unix_sysctl_register(struct net *net); |
65 | extern void unix_sysctl_unregister(struct net *net); | 67 | extern void unix_sysctl_unregister(struct net *net); |
diff --git a/include/net/sock.h b/include/net/sock.h index e1777db5b9ab..cc7f91ec972c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -159,7 +159,7 @@ struct sock_common { | |||
159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
160 | * @sk_lock: synchronizer | 160 | * @sk_lock: synchronizer |
161 | * @sk_rcvbuf: size of receive buffer in bytes | 161 | * @sk_rcvbuf: size of receive buffer in bytes |
162 | * @sk_sleep: sock wait queue | 162 | * @sk_wq: sock wait queue and async head |
163 | * @sk_dst_cache: destination cache | 163 | * @sk_dst_cache: destination cache |
164 | * @sk_dst_lock: destination cache lock | 164 | * @sk_dst_lock: destination cache lock |
165 | * @sk_policy: flow policy | 165 | * @sk_policy: flow policy |
@@ -257,7 +257,7 @@ struct sock { | |||
257 | struct sk_buff *tail; | 257 | struct sk_buff *tail; |
258 | int len; | 258 | int len; |
259 | } sk_backlog; | 259 | } sk_backlog; |
260 | wait_queue_head_t *sk_sleep; | 260 | struct socket_wq *sk_wq; |
261 | struct dst_entry *sk_dst_cache; | 261 | struct dst_entry *sk_dst_cache; |
262 | #ifdef CONFIG_XFRM | 262 | #ifdef CONFIG_XFRM |
263 | struct xfrm_policy *sk_policy[2]; | 263 | struct xfrm_policy *sk_policy[2]; |
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) | |||
1219 | 1219 | ||
1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) | 1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) |
1221 | { | 1221 | { |
1222 | return sk->sk_sleep; | 1222 | return &sk->sk_wq->wait; |
1223 | } | 1223 | } |
1224 | /* Detach socket from process context. | 1224 | /* Detach socket from process context. |
1225 | * Announce socket dead, detach it from wait queue and inode. | 1225 | * Announce socket dead, detach it from wait queue and inode. |
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk) | |||
1233 | write_lock_bh(&sk->sk_callback_lock); | 1233 | write_lock_bh(&sk->sk_callback_lock); |
1234 | sock_set_flag(sk, SOCK_DEAD); | 1234 | sock_set_flag(sk, SOCK_DEAD); |
1235 | sk_set_socket(sk, NULL); | 1235 | sk_set_socket(sk, NULL); |
1236 | sk->sk_sleep = NULL; | 1236 | sk->sk_wq = NULL; |
1237 | write_unlock_bh(&sk->sk_callback_lock); | 1237 | write_unlock_bh(&sk->sk_callback_lock); |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) | 1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) |
1241 | { | 1241 | { |
1242 | write_lock_bh(&sk->sk_callback_lock); | 1242 | write_lock_bh(&sk->sk_callback_lock); |
1243 | sk->sk_sleep = &parent->wait; | 1243 | rcu_assign_pointer(sk->sk_wq, parent->wq); |
1244 | parent->sk = sk; | 1244 | parent->sk = sk; |
1245 | sk_set_socket(sk, parent); | 1245 | sk_set_socket(sk, parent); |
1246 | security_sock_graft(sk, parent); | 1246 | security_sock_graft(sk, parent); |
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | /** | 1394 | /** |
1395 | * sk_has_sleeper - check if there are any waiting processes | 1395 | * wq_has_sleeper - check if there are any waiting processes |
1396 | * @sk: socket | 1396 | * @sk: struct socket_wq |
1397 | * | 1397 | * |
1398 | * Returns true if socket has waiting processes | 1398 | * Returns true if socket_wq has waiting processes |
1399 | * | 1399 | * |
1400 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | 1400 | * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory |
1401 | * barrier call. They were added due to the race found within the tcp code. | 1401 | * barrier call. They were added due to the race found within the tcp code. |
1402 | * | 1402 | * |
1403 | * Consider following tcp code paths: | 1403 | * Consider following tcp code paths: |
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1410 | * ... ... | 1410 | * ... ... |
1411 | * tp->rcv_nxt check sock_def_readable | 1411 | * tp->rcv_nxt check sock_def_readable |
1412 | * ... { | 1412 | * ... { |
1413 | * schedule ... | 1413 | * schedule rcu_read_lock(); |
1414 | * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 1414 | * wq = rcu_dereference(sk->sk_wq); |
1415 | * wake_up_interruptible(sk_sleep(sk)) | 1415 | * if (wq && waitqueue_active(&wq->wait)) |
1416 | * wake_up_interruptible(&wq->wait) | ||
1416 | * ... | 1417 | * ... |
1417 | * } | 1418 | * } |
1418 | * | 1419 | * |
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1421 | * could then endup calling schedule and sleep forever if there are no more | 1422 | * could then endup calling schedule and sleep forever if there are no more |
1422 | * data on the socket. | 1423 | * data on the socket. |
1423 | * | 1424 | * |
1424 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
1425 | * can use smp_mb__after_lock barrier. | ||
1426 | */ | 1425 | */ |
1427 | static inline int sk_has_sleeper(struct sock *sk) | 1426 | static inline bool wq_has_sleeper(struct socket_wq *wq) |
1428 | { | 1427 | { |
1428 | |||
1429 | /* | 1429 | /* |
1430 | * We need to be sure we are in sync with the | 1430 | * We need to be sure we are in sync with the |
1431 | * add_wait_queue modifications to the wait queue. | 1431 | * add_wait_queue modifications to the wait queue. |
1432 | * | 1432 | * |
1433 | * This memory barrier is paired in the sock_poll_wait. | 1433 | * This memory barrier is paired in the sock_poll_wait. |
1434 | */ | 1434 | */ |
1435 | smp_mb__after_lock(); | 1435 | smp_mb(); |
1436 | return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); | 1436 | return wq && waitqueue_active(&wq->wait); |
1437 | } | 1437 | } |
1438 | 1438 | ||
1439 | /** | 1439 | /** |
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk) | |||
1442 | * @wait_address: socket wait queue | 1442 | * @wait_address: socket wait queue |
1443 | * @p: poll_table | 1443 | * @p: poll_table |
1444 | * | 1444 | * |
1445 | * See the comments in the sk_has_sleeper function. | 1445 | * See the comments in the wq_has_sleeper function. |
1446 | */ | 1446 | */ |
1447 | static inline void sock_poll_wait(struct file *filp, | 1447 | static inline void sock_poll_wait(struct file *filp, |
1448 | wait_queue_head_t *wait_address, poll_table *p) | 1448 | wait_queue_head_t *wait_address, poll_table *p) |
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp, | |||
1453 | * We need to be sure we are in sync with the | 1453 | * We need to be sure we are in sync with the |
1454 | * socket flags modification. | 1454 | * socket flags modification. |
1455 | * | 1455 | * |
1456 | * This memory barrier is paired in the sk_has_sleeper. | 1456 | * This memory barrier is paired in the wq_has_sleeper. |
1457 | */ | 1457 | */ |
1458 | smp_mb(); | 1458 | smp_mb(); |
1459 | } | 1459 | } |
diff --git a/net/atm/common.c b/net/atm/common.c index e3e10e6f8628..b43feb1a3995 100644 --- a/net/atm/common.c +++ b/net/atm/common.c | |||
@@ -90,10 +90,13 @@ static void vcc_sock_destruct(struct sock *sk) | |||
90 | 90 | ||
91 | static void vcc_def_wakeup(struct sock *sk) | 91 | static void vcc_def_wakeup(struct sock *sk) |
92 | { | 92 | { |
93 | read_lock(&sk->sk_callback_lock); | 93 | struct socket_wq *wq; |
94 | if (sk_has_sleeper(sk)) | 94 | |
95 | wake_up(sk_sleep(sk)); | 95 | rcu_read_lock(); |
96 | read_unlock(&sk->sk_callback_lock); | 96 | wq = rcu_dereference(sk->sk_wq); |
97 | if (wq_has_sleeper(wq)) | ||
98 | wake_up(&wq->wait); | ||
99 | rcu_read_unlock(); | ||
97 | } | 100 | } |
98 | 101 | ||
99 | static inline int vcc_writable(struct sock *sk) | 102 | static inline int vcc_writable(struct sock *sk) |
@@ -106,16 +109,19 @@ static inline int vcc_writable(struct sock *sk) | |||
106 | 109 | ||
107 | static void vcc_write_space(struct sock *sk) | 110 | static void vcc_write_space(struct sock *sk) |
108 | { | 111 | { |
109 | read_lock(&sk->sk_callback_lock); | 112 | struct socket_wq *wq; |
113 | |||
114 | rcu_read_lock(); | ||
110 | 115 | ||
111 | if (vcc_writable(sk)) { | 116 | if (vcc_writable(sk)) { |
112 | if (sk_has_sleeper(sk)) | 117 | wq = rcu_dereference(sk->sk_wq); |
113 | wake_up_interruptible(sk_sleep(sk)); | 118 | if (wq_has_sleeper(wq)) |
119 | wake_up_interruptible(&wq->wait); | ||
114 | 120 | ||
115 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 121 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
116 | } | 122 | } |
117 | 123 | ||
118 | read_unlock(&sk->sk_callback_lock); | 124 | rcu_read_unlock(); |
119 | } | 125 | } |
120 | 126 | ||
121 | static struct proto vcc_proto = { | 127 | static struct proto vcc_proto = { |
diff --git a/net/core/sock.c b/net/core/sock.c index 51041759517e..94c4affdda9b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1211 | */ | 1211 | */ |
1212 | sk_refcnt_debug_inc(newsk); | 1212 | sk_refcnt_debug_inc(newsk); |
1213 | sk_set_socket(newsk, NULL); | 1213 | sk_set_socket(newsk, NULL); |
1214 | newsk->sk_sleep = NULL; | 1214 | newsk->sk_wq = NULL; |
1215 | 1215 | ||
1216 | if (newsk->sk_prot->sockets_allocated) | 1216 | if (newsk->sk_prot->sockets_allocated) |
1217 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1217 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); |
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage); | |||
1800 | 1800 | ||
1801 | static void sock_def_wakeup(struct sock *sk) | 1801 | static void sock_def_wakeup(struct sock *sk) |
1802 | { | 1802 | { |
1803 | read_lock(&sk->sk_callback_lock); | 1803 | struct socket_wq *wq; |
1804 | if (sk_has_sleeper(sk)) | 1804 | |
1805 | wake_up_interruptible_all(sk_sleep(sk)); | 1805 | rcu_read_lock(); |
1806 | read_unlock(&sk->sk_callback_lock); | 1806 | wq = rcu_dereference(sk->sk_wq); |
1807 | if (wq_has_sleeper(wq)) | ||
1808 | wake_up_interruptible_all(&wq->wait); | ||
1809 | rcu_read_unlock(); | ||
1807 | } | 1810 | } |
1808 | 1811 | ||
1809 | static void sock_def_error_report(struct sock *sk) | 1812 | static void sock_def_error_report(struct sock *sk) |
1810 | { | 1813 | { |
1811 | read_lock(&sk->sk_callback_lock); | 1814 | struct socket_wq *wq; |
1812 | if (sk_has_sleeper(sk)) | 1815 | |
1813 | wake_up_interruptible_poll(sk_sleep(sk), POLLERR); | 1816 | rcu_read_lock(); |
1817 | wq = rcu_dereference(sk->sk_wq); | ||
1818 | if (wq_has_sleeper(wq)) | ||
1819 | wake_up_interruptible_poll(&wq->wait, POLLERR); | ||
1814 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); | 1820 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); |
1815 | read_unlock(&sk->sk_callback_lock); | 1821 | rcu_read_unlock(); |
1816 | } | 1822 | } |
1817 | 1823 | ||
1818 | static void sock_def_readable(struct sock *sk, int len) | 1824 | static void sock_def_readable(struct sock *sk, int len) |
1819 | { | 1825 | { |
1820 | read_lock(&sk->sk_callback_lock); | 1826 | struct socket_wq *wq; |
1821 | if (sk_has_sleeper(sk)) | 1827 | |
1822 | wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | | 1828 | rcu_read_lock(); |
1829 | wq = rcu_dereference(sk->sk_wq); | ||
1830 | if (wq_has_sleeper(wq)) | ||
1831 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | ||
1823 | POLLRDNORM | POLLRDBAND); | 1832 | POLLRDNORM | POLLRDBAND); |
1824 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | 1833 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
1825 | read_unlock(&sk->sk_callback_lock); | 1834 | rcu_read_unlock(); |
1826 | } | 1835 | } |
1827 | 1836 | ||
1828 | static void sock_def_write_space(struct sock *sk) | 1837 | static void sock_def_write_space(struct sock *sk) |
1829 | { | 1838 | { |
1830 | read_lock(&sk->sk_callback_lock); | 1839 | struct socket_wq *wq; |
1840 | |||
1841 | rcu_read_lock(); | ||
1831 | 1842 | ||
1832 | /* Do not wake up a writer until he can make "significant" | 1843 | /* Do not wake up a writer until he can make "significant" |
1833 | * progress. --DaveM | 1844 | * progress. --DaveM |
1834 | */ | 1845 | */ |
1835 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { | 1846 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
1836 | if (sk_has_sleeper(sk)) | 1847 | wq = rcu_dereference(sk->sk_wq); |
1837 | wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | | 1848 | if (wq_has_sleeper(wq)) |
1849 | wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | | ||
1838 | POLLWRNORM | POLLWRBAND); | 1850 | POLLWRNORM | POLLWRBAND); |
1839 | 1851 | ||
1840 | /* Should agree with poll, otherwise some programs break */ | 1852 | /* Should agree with poll, otherwise some programs break */ |
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk) | |||
1842 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 1854 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
1843 | } | 1855 | } |
1844 | 1856 | ||
1845 | read_unlock(&sk->sk_callback_lock); | 1857 | rcu_read_unlock(); |
1846 | } | 1858 | } |
1847 | 1859 | ||
1848 | static void sock_def_destruct(struct sock *sk) | 1860 | static void sock_def_destruct(struct sock *sk) |
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
1896 | 1908 | ||
1897 | if (sock) { | 1909 | if (sock) { |
1898 | sk->sk_type = sock->type; | 1910 | sk->sk_type = sock->type; |
1899 | sk->sk_sleep = &sock->wait; | 1911 | sk->sk_wq = sock->wq; |
1900 | sock->sk = sk; | 1912 | sock->sk = sk; |
1901 | } else | 1913 | } else |
1902 | sk->sk_sleep = NULL; | 1914 | sk->sk_wq = NULL; |
1903 | 1915 | ||
1904 | spin_lock_init(&sk->sk_dst_lock); | 1916 | spin_lock_init(&sk->sk_dst_lock); |
1905 | rwlock_init(&sk->sk_callback_lock); | 1917 | rwlock_init(&sk->sk_callback_lock); |
diff --git a/net/core/stream.c b/net/core/stream.c index 7b3c3f30b107..cc196f42b8d8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
@@ -28,15 +28,19 @@ | |||
28 | void sk_stream_write_space(struct sock *sk) | 28 | void sk_stream_write_space(struct sock *sk) |
29 | { | 29 | { |
30 | struct socket *sock = sk->sk_socket; | 30 | struct socket *sock = sk->sk_socket; |
31 | struct socket_wq *wq; | ||
31 | 32 | ||
32 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { | 33 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { |
33 | clear_bit(SOCK_NOSPACE, &sock->flags); | 34 | clear_bit(SOCK_NOSPACE, &sock->flags); |
34 | 35 | ||
35 | if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 36 | rcu_read_lock(); |
36 | wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | | 37 | wq = rcu_dereference(sk->sk_wq); |
38 | if (wq_has_sleeper(wq)) | ||
39 | wake_up_interruptible_poll(&wq->wait, POLLOUT | | ||
37 | POLLWRNORM | POLLWRBAND); | 40 | POLLWRNORM | POLLWRBAND); |
38 | if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) | 41 | if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) |
39 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); | 42 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); |
43 | rcu_read_unlock(); | ||
40 | } | 44 | } |
41 | } | 45 | } |
42 | 46 | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c index 2d3dcb39851f..aadbdb58758b 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss); | |||
195 | 195 | ||
196 | void dccp_write_space(struct sock *sk) | 196 | void dccp_write_space(struct sock *sk) |
197 | { | 197 | { |
198 | read_lock(&sk->sk_callback_lock); | 198 | struct socket_wq *wq; |
199 | 199 | ||
200 | if (sk_has_sleeper(sk)) | 200 | rcu_read_lock(); |
201 | wake_up_interruptible(sk_sleep(sk)); | 201 | wq = rcu_dereference(sk->sk_wq); |
202 | if (wq_has_sleeper(wq)) | ||
203 | wake_up_interruptible(&wq->wait); | ||
202 | /* Should agree with poll, otherwise some programs break */ | 204 | /* Should agree with poll, otherwise some programs break */ |
203 | if (sock_writeable(sk)) | 205 | if (sock_writeable(sk)) |
204 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 206 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
205 | 207 | ||
206 | read_unlock(&sk->sk_callback_lock); | 208 | rcu_read_unlock(); |
207 | } | 209 | } |
208 | 210 | ||
209 | /** | 211 | /** |
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 9636b7d27b48..8be324fe08b9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c | |||
@@ -305,11 +305,14 @@ static inline int iucv_below_msglim(struct sock *sk) | |||
305 | */ | 305 | */ |
306 | static void iucv_sock_wake_msglim(struct sock *sk) | 306 | static void iucv_sock_wake_msglim(struct sock *sk) |
307 | { | 307 | { |
308 | read_lock(&sk->sk_callback_lock); | 308 | struct socket_wq *wq; |
309 | if (sk_has_sleeper(sk)) | 309 | |
310 | wake_up_interruptible_all(sk_sleep(sk)); | 310 | rcu_read_lock(); |
311 | wq = rcu_dereference(sk->sk_wq); | ||
312 | if (wq_has_sleeper(wq)) | ||
313 | wake_up_interruptible_all(&wq->wait); | ||
311 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 314 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
312 | read_unlock(&sk->sk_callback_lock); | 315 | rcu_read_unlock(); |
313 | } | 316 | } |
314 | 317 | ||
315 | /* Timers */ | 318 | /* Timers */ |
diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e2a95762abd3..af4d38bc3b22 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c | |||
@@ -664,12 +664,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock) | |||
664 | if (signal_pending(tsk)) | 664 | if (signal_pending(tsk)) |
665 | return sock_intr_errno(timeo); | 665 | return sock_intr_errno(timeo); |
666 | 666 | ||
667 | prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, | 667 | prepare_to_wait_exclusive(sk_sleep(sk), &wait, |
668 | TASK_INTERRUPTIBLE); | 668 | TASK_INTERRUPTIBLE); |
669 | release_sock(sk); | 669 | release_sock(sk); |
670 | timeo = schedule_timeout(timeo); | 670 | timeo = schedule_timeout(timeo); |
671 | lock_sock(sk); | 671 | lock_sock(sk); |
672 | finish_wait(&sk->sk_socket->wait, &wait); | 672 | finish_wait(sk_sleep(sk), &wait); |
673 | } | 673 | } |
674 | 674 | ||
675 | return 0; | 675 | return 0; |
@@ -910,10 +910,10 @@ disabled: | |||
910 | goto out; | 910 | goto out; |
911 | } | 911 | } |
912 | 912 | ||
913 | prepare_to_wait(&sk->sk_socket->wait, &wait, | 913 | prepare_to_wait(sk_sleep(sk), &wait, |
914 | TASK_INTERRUPTIBLE); | 914 | TASK_INTERRUPTIBLE); |
915 | done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); | 915 | done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); |
916 | finish_wait(&sk->sk_socket->wait, &wait); | 916 | finish_wait(sk_sleep(sk), &wait); |
917 | 917 | ||
918 | if (sk->sk_state != TCP_ESTABLISHED) | 918 | if (sk->sk_state != TCP_ESTABLISHED) |
919 | goto disabled; | 919 | goto disabled; |
diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c785bfd0744f..6e9848bf0370 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c | |||
@@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, | |||
265 | struct pep_sock *pn = pep_sk(sk); | 265 | struct pep_sock *pn = pep_sk(sk); |
266 | unsigned int mask = 0; | 266 | unsigned int mask = 0; |
267 | 267 | ||
268 | poll_wait(file, &sock->wait, wait); | 268 | poll_wait(file, sk_sleep(sk), wait); |
269 | 269 | ||
270 | switch (sk->sk_state) { | 270 | switch (sk->sk_state) { |
271 | case TCP_LISTEN: | 271 | case TCP_LISTEN: |
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c432d76f415e..0b9bb2085ce4 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c | |||
@@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk) | |||
62 | static void rxrpc_write_space(struct sock *sk) | 62 | static void rxrpc_write_space(struct sock *sk) |
63 | { | 63 | { |
64 | _enter("%p", sk); | 64 | _enter("%p", sk); |
65 | read_lock(&sk->sk_callback_lock); | 65 | rcu_read_lock(); |
66 | if (rxrpc_writable(sk)) { | 66 | if (rxrpc_writable(sk)) { |
67 | if (sk_has_sleeper(sk)) | 67 | struct socket_wq *wq = rcu_dereference(sk->sk_wq); |
68 | wake_up_interruptible(sk_sleep(sk)); | 68 | |
69 | if (wq_has_sleeper(wq)) | ||
70 | wake_up_interruptible(&wq->wait); | ||
69 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 71 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
70 | } | 72 | } |
71 | read_unlock(&sk->sk_callback_lock); | 73 | rcu_read_unlock(); |
72 | } | 74 | } |
73 | 75 | ||
74 | /* | 76 | /* |
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 13d8229f3a9c..d54700af927a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
@@ -6065,7 +6065,7 @@ static void __sctp_write_space(struct sctp_association *asoc) | |||
6065 | * here by modeling from the current TCP/UDP code. | 6065 | * here by modeling from the current TCP/UDP code. |
6066 | * We have not tested with it yet. | 6066 | * We have not tested with it yet. |
6067 | */ | 6067 | */ |
6068 | if (sock->fasync_list && | 6068 | if (sock->wq->fasync_list && |
6069 | !(sk->sk_shutdown & SEND_SHUTDOWN)) | 6069 | !(sk->sk_shutdown & SEND_SHUTDOWN)) |
6070 | sock_wake_async(sock, | 6070 | sock_wake_async(sock, |
6071 | SOCK_WAKE_SPACE, POLL_OUT); | 6071 | SOCK_WAKE_SPACE, POLL_OUT); |
diff --git a/net/socket.c b/net/socket.c index cb7c1f6c0d6e..dae8c6b84a09 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb) | |||
252 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); | 252 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); |
253 | if (!ei) | 253 | if (!ei) |
254 | return NULL; | 254 | return NULL; |
255 | init_waitqueue_head(&ei->socket.wait); | 255 | ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); |
256 | if (!ei->socket.wq) { | ||
257 | kmem_cache_free(sock_inode_cachep, ei); | ||
258 | return NULL; | ||
259 | } | ||
260 | init_waitqueue_head(&ei->socket.wq->wait); | ||
261 | ei->socket.wq->fasync_list = NULL; | ||
256 | 262 | ||
257 | ei->socket.fasync_list = NULL; | ||
258 | ei->socket.state = SS_UNCONNECTED; | 263 | ei->socket.state = SS_UNCONNECTED; |
259 | ei->socket.flags = 0; | 264 | ei->socket.flags = 0; |
260 | ei->socket.ops = NULL; | 265 | ei->socket.ops = NULL; |
@@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) | |||
264 | return &ei->vfs_inode; | 269 | return &ei->vfs_inode; |
265 | } | 270 | } |
266 | 271 | ||
272 | |||
273 | static void wq_free_rcu(struct rcu_head *head) | ||
274 | { | ||
275 | struct socket_wq *wq = container_of(head, struct socket_wq, rcu); | ||
276 | |||
277 | kfree(wq); | ||
278 | } | ||
279 | |||
267 | static void sock_destroy_inode(struct inode *inode) | 280 | static void sock_destroy_inode(struct inode *inode) |
268 | { | 281 | { |
269 | kmem_cache_free(sock_inode_cachep, | 282 | struct socket_alloc *ei; |
270 | container_of(inode, struct socket_alloc, vfs_inode)); | 283 | |
284 | ei = container_of(inode, struct socket_alloc, vfs_inode); | ||
285 | call_rcu(&ei->socket.wq->rcu, wq_free_rcu); | ||
286 | kmem_cache_free(sock_inode_cachep, ei); | ||
271 | } | 287 | } |
272 | 288 | ||
273 | static void init_once(void *foo) | 289 | static void init_once(void *foo) |
@@ -513,7 +529,7 @@ void sock_release(struct socket *sock) | |||
513 | module_put(owner); | 529 | module_put(owner); |
514 | } | 530 | } |
515 | 531 | ||
516 | if (sock->fasync_list) | 532 | if (sock->wq->fasync_list) |
517 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); | 533 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); |
518 | 534 | ||
519 | percpu_sub(sockets_in_use, 1); | 535 | percpu_sub(sockets_in_use, 1); |
@@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on) | |||
1080 | 1096 | ||
1081 | lock_sock(sk); | 1097 | lock_sock(sk); |
1082 | 1098 | ||
1083 | fasync_helper(fd, filp, on, &sock->fasync_list); | 1099 | fasync_helper(fd, filp, on, &sock->wq->fasync_list); |
1084 | 1100 | ||
1085 | if (!sock->fasync_list) | 1101 | if (!sock->wq->fasync_list) |
1086 | sock_reset_flag(sk, SOCK_FASYNC); | 1102 | sock_reset_flag(sk, SOCK_FASYNC); |
1087 | else | 1103 | else |
1088 | sock_set_flag(sk, SOCK_FASYNC); | 1104 | sock_set_flag(sk, SOCK_FASYNC); |
@@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on) | |||
1091 | return 0; | 1107 | return 0; |
1092 | } | 1108 | } |
1093 | 1109 | ||
1094 | /* This function may be called only under socket lock or callback_lock */ | 1110 | /* This function may be called only under socket lock or callback_lock or rcu_lock */ |
1095 | 1111 | ||
1096 | int sock_wake_async(struct socket *sock, int how, int band) | 1112 | int sock_wake_async(struct socket *sock, int how, int band) |
1097 | { | 1113 | { |
1098 | if (!sock || !sock->fasync_list) | 1114 | struct socket_wq *wq; |
1115 | |||
1116 | if (!sock) | ||
1099 | return -1; | 1117 | return -1; |
1118 | rcu_read_lock(); | ||
1119 | wq = rcu_dereference(sock->wq); | ||
1120 | if (!wq || !wq->fasync_list) { | ||
1121 | rcu_read_unlock(); | ||
1122 | return -1; | ||
1123 | } | ||
1100 | switch (how) { | 1124 | switch (how) { |
1101 | case SOCK_WAKE_WAITD: | 1125 | case SOCK_WAKE_WAITD: |
1102 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) | 1126 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) |
@@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band) | |||
1108 | /* fall through */ | 1132 | /* fall through */ |
1109 | case SOCK_WAKE_IO: | 1133 | case SOCK_WAKE_IO: |
1110 | call_kill: | 1134 | call_kill: |
1111 | kill_fasync(&sock->fasync_list, SIGIO, band); | 1135 | kill_fasync(&wq->fasync_list, SIGIO, band); |
1112 | break; | 1136 | break; |
1113 | case SOCK_WAKE_URG: | 1137 | case SOCK_WAKE_URG: |
1114 | kill_fasync(&sock->fasync_list, SIGURG, band); | 1138 | kill_fasync(&wq->fasync_list, SIGURG, band); |
1115 | } | 1139 | } |
1140 | rcu_read_unlock(); | ||
1116 | return 0; | 1141 | return 0; |
1117 | } | 1142 | } |
1118 | 1143 | ||
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 87c0360eaa25..fef2cc5e9d2b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk) | |||
313 | 313 | ||
314 | static void unix_write_space(struct sock *sk) | 314 | static void unix_write_space(struct sock *sk) |
315 | { | 315 | { |
316 | read_lock(&sk->sk_callback_lock); | 316 | struct socket_wq *wq; |
317 | |||
318 | rcu_read_lock(); | ||
317 | if (unix_writable(sk)) { | 319 | if (unix_writable(sk)) { |
318 | if (sk_has_sleeper(sk)) | 320 | wq = rcu_dereference(sk->sk_wq); |
319 | wake_up_interruptible_sync(sk_sleep(sk)); | 321 | if (wq_has_sleeper(wq)) |
322 | wake_up_interruptible_sync(&wq->wait); | ||
320 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 323 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
321 | } | 324 | } |
322 | read_unlock(&sk->sk_callback_lock); | 325 | rcu_read_unlock(); |
323 | } | 326 | } |
324 | 327 | ||
325 | /* When dgram socket disconnects (or changes its peer), we clear its receive | 328 | /* When dgram socket disconnects (or changes its peer), we clear its receive |
@@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion) | |||
406 | skpair->sk_err = ECONNRESET; | 409 | skpair->sk_err = ECONNRESET; |
407 | unix_state_unlock(skpair); | 410 | unix_state_unlock(skpair); |
408 | skpair->sk_state_change(skpair); | 411 | skpair->sk_state_change(skpair); |
409 | read_lock(&skpair->sk_callback_lock); | ||
410 | sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); | 412 | sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); |
411 | read_unlock(&skpair->sk_callback_lock); | ||
412 | } | 413 | } |
413 | sock_put(skpair); /* It may now die */ | 414 | sock_put(skpair); /* It may now die */ |
414 | unix_peer(sk) = NULL; | 415 | unix_peer(sk) = NULL; |
@@ -1142,7 +1143,7 @@ restart: | |||
1142 | newsk->sk_peercred.pid = task_tgid_vnr(current); | 1143 | newsk->sk_peercred.pid = task_tgid_vnr(current); |
1143 | current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); | 1144 | current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); |
1144 | newu = unix_sk(newsk); | 1145 | newu = unix_sk(newsk); |
1145 | newsk->sk_sleep = &newu->peer_wait; | 1146 | newsk->sk_wq = &newu->peer_wq; |
1146 | otheru = unix_sk(other); | 1147 | otheru = unix_sk(other); |
1147 | 1148 | ||
1148 | /* copy address information from listening to new sock*/ | 1149 | /* copy address information from listening to new sock*/ |
@@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode) | |||
1931 | other->sk_shutdown |= peer_mode; | 1932 | other->sk_shutdown |= peer_mode; |
1932 | unix_state_unlock(other); | 1933 | unix_state_unlock(other); |
1933 | other->sk_state_change(other); | 1934 | other->sk_state_change(other); |
1934 | read_lock(&other->sk_callback_lock); | ||
1935 | if (peer_mode == SHUTDOWN_MASK) | 1935 | if (peer_mode == SHUTDOWN_MASK) |
1936 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); | 1936 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); |
1937 | else if (peer_mode & RCV_SHUTDOWN) | 1937 | else if (peer_mode & RCV_SHUTDOWN) |
1938 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); | 1938 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); |
1939 | read_unlock(&other->sk_callback_lock); | ||
1940 | } | 1939 | } |
1941 | if (other) | 1940 | if (other) |
1942 | sock_put(other); | 1941 | sock_put(other); |