aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/macvtap.c13
-rw-r--r--drivers/net/tun.c21
-rw-r--r--include/linux/net.h14
-rw-r--r--include/net/af_unix.h20
-rw-r--r--include/net/sock.h38
-rw-r--r--net/atm/common.c22
-rw-r--r--net/core/sock.c50
-rw-r--r--net/core/stream.c10
-rw-r--r--net/dccp/output.c10
-rw-r--r--net/iucv/af_iucv.c11
-rw-r--r--net/phonet/pep.c8
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/rxrpc/af_rxrpc.c10
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/socket.c47
-rw-r--r--net/unix/af_unix.c17
16 files changed, 181 insertions, 114 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index d97e1fd234ba..1c4110df343e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -37,6 +37,7 @@
37struct macvtap_queue { 37struct macvtap_queue {
38 struct sock sk; 38 struct sock sk;
39 struct socket sock; 39 struct socket sock;
40 struct socket_wq wq;
40 struct macvlan_dev *vlan; 41 struct macvlan_dev *vlan;
41 struct file *file; 42 struct file *file;
42 unsigned int flags; 43 unsigned int flags;
@@ -242,12 +243,15 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
242 243
243static void macvtap_sock_write_space(struct sock *sk) 244static void macvtap_sock_write_space(struct sock *sk)
244{ 245{
246 wait_queue_head_t *wqueue;
247
245 if (!sock_writeable(sk) || 248 if (!sock_writeable(sk) ||
246 !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) 249 !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
247 return; 250 return;
248 251
249 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 252 wqueue = sk_sleep(sk);
250 wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); 253 if (wqueue && waitqueue_active(wqueue))
254 wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
251} 255}
252 256
253static int macvtap_open(struct inode *inode, struct file *file) 257static int macvtap_open(struct inode *inode, struct file *file)
@@ -272,7 +276,8 @@ static int macvtap_open(struct inode *inode, struct file *file)
272 if (!q) 276 if (!q)
273 goto out; 277 goto out;
274 278
275 init_waitqueue_head(&q->sock.wait); 279 q->sock.wq = &q->wq;
280 init_waitqueue_head(&q->wq.wait);
276 q->sock.type = SOCK_RAW; 281 q->sock.type = SOCK_RAW;
277 q->sock.state = SS_CONNECTED; 282 q->sock.state = SS_CONNECTED;
278 q->sock.file = file; 283 q->sock.file = file;
@@ -308,7 +313,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
308 goto out; 313 goto out;
309 314
310 mask = 0; 315 mask = 0;
311 poll_wait(file, &q->sock.wait, wait); 316 poll_wait(file, &q->wq.wait, wait);
312 317
313 if (!skb_queue_empty(&q->sk.sk_receive_queue)) 318 if (!skb_queue_empty(&q->sk.sk_receive_queue))
314 mask |= POLLIN | POLLRDNORM; 319 mask |= POLLIN | POLLRDNORM;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 20a17938c62b..e525a6cf5587 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -109,7 +109,7 @@ struct tun_struct {
109 109
110 struct tap_filter txflt; 110 struct tap_filter txflt;
111 struct socket socket; 111 struct socket socket;
112 112 struct socket_wq wq;
113#ifdef TUN_DEBUG 113#ifdef TUN_DEBUG
114 int debug; 114 int debug;
115#endif 115#endif
@@ -323,7 +323,7 @@ static void tun_net_uninit(struct net_device *dev)
323 /* Inform the methods they need to stop using the dev. 323 /* Inform the methods they need to stop using the dev.
324 */ 324 */
325 if (tfile) { 325 if (tfile) {
326 wake_up_all(&tun->socket.wait); 326 wake_up_all(&tun->wq.wait);
327 if (atomic_dec_and_test(&tfile->count)) 327 if (atomic_dec_and_test(&tfile->count))
328 __tun_detach(tun); 328 __tun_detach(tun);
329 } 329 }
@@ -398,7 +398,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
398 /* Notify and wake up reader process */ 398 /* Notify and wake up reader process */
399 if (tun->flags & TUN_FASYNC) 399 if (tun->flags & TUN_FASYNC)
400 kill_fasync(&tun->fasync, SIGIO, POLL_IN); 400 kill_fasync(&tun->fasync, SIGIO, POLL_IN);
401 wake_up_interruptible_poll(&tun->socket.wait, POLLIN | 401 wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
402 POLLRDNORM | POLLRDBAND); 402 POLLRDNORM | POLLRDBAND);
403 return NETDEV_TX_OK; 403 return NETDEV_TX_OK;
404 404
@@ -498,7 +498,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
498 498
499 DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); 499 DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
500 500
501 poll_wait(file, &tun->socket.wait, wait); 501 poll_wait(file, &tun->wq.wait, wait);
502 502
503 if (!skb_queue_empty(&sk->sk_receive_queue)) 503 if (!skb_queue_empty(&sk->sk_receive_queue))
504 mask |= POLLIN | POLLRDNORM; 504 mask |= POLLIN | POLLRDNORM;
@@ -773,7 +773,7 @@ static ssize_t tun_do_read(struct tun_struct *tun,
773 773
774 DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); 774 DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
775 775
776 add_wait_queue(&tun->socket.wait, &wait); 776 add_wait_queue(&tun->wq.wait, &wait);
777 while (len) { 777 while (len) {
778 current->state = TASK_INTERRUPTIBLE; 778 current->state = TASK_INTERRUPTIBLE;
779 779
@@ -804,7 +804,7 @@ static ssize_t tun_do_read(struct tun_struct *tun,
804 } 804 }
805 805
806 current->state = TASK_RUNNING; 806 current->state = TASK_RUNNING;
807 remove_wait_queue(&tun->socket.wait, &wait); 807 remove_wait_queue(&tun->wq.wait, &wait);
808 808
809 return ret; 809 return ret;
810} 810}
@@ -861,6 +861,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = {
861static void tun_sock_write_space(struct sock *sk) 861static void tun_sock_write_space(struct sock *sk)
862{ 862{
863 struct tun_struct *tun; 863 struct tun_struct *tun;
864 wait_queue_head_t *wqueue;
864 865
865 if (!sock_writeable(sk)) 866 if (!sock_writeable(sk))
866 return; 867 return;
@@ -868,8 +869,9 @@ static void tun_sock_write_space(struct sock *sk)
868 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) 869 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
869 return; 870 return;
870 871
871 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 872 wqueue = sk_sleep(sk);
872 wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | 873 if (wqueue && waitqueue_active(wqueue))
874 wake_up_interruptible_sync_poll(wqueue, POLLOUT |
873 POLLWRNORM | POLLWRBAND); 875 POLLWRNORM | POLLWRBAND);
874 876
875 tun = tun_sk(sk)->tun; 877 tun = tun_sk(sk)->tun;
@@ -1039,7 +1041,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1039 if (!sk) 1041 if (!sk)
1040 goto err_free_dev; 1042 goto err_free_dev;
1041 1043
1042 init_waitqueue_head(&tun->socket.wait); 1044 tun->socket.wq = &tun->wq;
1045 init_waitqueue_head(&tun->wq.wait);
1043 tun->socket.ops = &tun_socket_ops; 1046 tun->socket.ops = &tun_socket_ops;
1044 sock_init_data(&tun->socket, sk); 1047 sock_init_data(&tun->socket, sk);
1045 sk->sk_write_space = tun_sock_write_space; 1048 sk->sk_write_space = tun_sock_write_space;
diff --git a/include/linux/net.h b/include/linux/net.h
index 4157b5d42bd6..2b4deeeb8646 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -59,6 +59,7 @@ typedef enum {
59#include <linux/wait.h> 59#include <linux/wait.h>
60#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ 60#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
61#include <linux/kmemcheck.h> 61#include <linux/kmemcheck.h>
62#include <linux/rcupdate.h>
62 63
63struct poll_table_struct; 64struct poll_table_struct;
64struct pipe_inode_info; 65struct pipe_inode_info;
@@ -116,6 +117,12 @@ enum sock_shutdown_cmd {
116 SHUT_RDWR = 2, 117 SHUT_RDWR = 2,
117}; 118};
118 119
120struct socket_wq {
121 wait_queue_head_t wait;
122 struct fasync_struct *fasync_list;
123 struct rcu_head rcu;
124} ____cacheline_aligned_in_smp;
125
119/** 126/**
120 * struct socket - general BSD socket 127 * struct socket - general BSD socket
121 * @state: socket state (%SS_CONNECTED, etc) 128 * @state: socket state (%SS_CONNECTED, etc)
@@ -135,11 +142,8 @@ struct socket {
135 kmemcheck_bitfield_end(type); 142 kmemcheck_bitfield_end(type);
136 143
137 unsigned long flags; 144 unsigned long flags;
138 /* 145
139 * Please keep fasync_list & wait fields in the same cache line 146 struct socket_wq *wq;
140 */
141 struct fasync_struct *fasync_list;
142 wait_queue_head_t wait;
143 147
144 struct file *file; 148 struct file *file;
145 struct sock *sk; 149 struct sock *sk;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 1614d78c60ed..20725e213aee 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -30,7 +30,7 @@ struct unix_skb_parms {
30#endif 30#endif
31}; 31};
32 32
33#define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) 33#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
34#define UNIXCREDS(skb) (&UNIXCB((skb)).creds) 34#define UNIXCREDS(skb) (&UNIXCB((skb)).creds)
35#define UNIXSID(skb) (&UNIXCB((skb)).secid) 35#define UNIXSID(skb) (&UNIXCB((skb)).secid)
36 36
@@ -45,21 +45,23 @@ struct unix_skb_parms {
45struct unix_sock { 45struct unix_sock {
46 /* WARNING: sk has to be the first member */ 46 /* WARNING: sk has to be the first member */
47 struct sock sk; 47 struct sock sk;
48 struct unix_address *addr; 48 struct unix_address *addr;
49 struct dentry *dentry; 49 struct dentry *dentry;
50 struct vfsmount *mnt; 50 struct vfsmount *mnt;
51 struct mutex readlock; 51 struct mutex readlock;
52 struct sock *peer; 52 struct sock *peer;
53 struct sock *other; 53 struct sock *other;
54 struct list_head link; 54 struct list_head link;
55 atomic_long_t inflight; 55 atomic_long_t inflight;
56 spinlock_t lock; 56 spinlock_t lock;
57 unsigned int gc_candidate : 1; 57 unsigned int gc_candidate : 1;
58 unsigned int gc_maybe_cycle : 1; 58 unsigned int gc_maybe_cycle : 1;
59 wait_queue_head_t peer_wait; 59 struct socket_wq peer_wq;
60}; 60};
61#define unix_sk(__sk) ((struct unix_sock *)__sk) 61#define unix_sk(__sk) ((struct unix_sock *)__sk)
62 62
63#define peer_wait peer_wq.wait
64
63#ifdef CONFIG_SYSCTL 65#ifdef CONFIG_SYSCTL
64extern int unix_sysctl_register(struct net *net); 66extern int unix_sysctl_register(struct net *net);
65extern void unix_sysctl_unregister(struct net *net); 67extern void unix_sysctl_unregister(struct net *net);
diff --git a/include/net/sock.h b/include/net/sock.h
index e1777db5b9ab..cc7f91ec972c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -159,7 +159,7 @@ struct sock_common {
159 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 159 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
160 * @sk_lock: synchronizer 160 * @sk_lock: synchronizer
161 * @sk_rcvbuf: size of receive buffer in bytes 161 * @sk_rcvbuf: size of receive buffer in bytes
162 * @sk_sleep: sock wait queue 162 * @sk_wq: sock wait queue and async head
163 * @sk_dst_cache: destination cache 163 * @sk_dst_cache: destination cache
164 * @sk_dst_lock: destination cache lock 164 * @sk_dst_lock: destination cache lock
165 * @sk_policy: flow policy 165 * @sk_policy: flow policy
@@ -257,7 +257,7 @@ struct sock {
257 struct sk_buff *tail; 257 struct sk_buff *tail;
258 int len; 258 int len;
259 } sk_backlog; 259 } sk_backlog;
260 wait_queue_head_t *sk_sleep; 260 struct socket_wq *sk_wq;
261 struct dst_entry *sk_dst_cache; 261 struct dst_entry *sk_dst_cache;
262#ifdef CONFIG_XFRM 262#ifdef CONFIG_XFRM
263 struct xfrm_policy *sk_policy[2]; 263 struct xfrm_policy *sk_policy[2];
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
1219 1219
1220static inline wait_queue_head_t *sk_sleep(struct sock *sk) 1220static inline wait_queue_head_t *sk_sleep(struct sock *sk)
1221{ 1221{
1222 return sk->sk_sleep; 1222 return &sk->sk_wq->wait;
1223} 1223}
1224/* Detach socket from process context. 1224/* Detach socket from process context.
1225 * Announce socket dead, detach it from wait queue and inode. 1225 * Announce socket dead, detach it from wait queue and inode.
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk)
1233 write_lock_bh(&sk->sk_callback_lock); 1233 write_lock_bh(&sk->sk_callback_lock);
1234 sock_set_flag(sk, SOCK_DEAD); 1234 sock_set_flag(sk, SOCK_DEAD);
1235 sk_set_socket(sk, NULL); 1235 sk_set_socket(sk, NULL);
1236 sk->sk_sleep = NULL; 1236 sk->sk_wq = NULL;
1237 write_unlock_bh(&sk->sk_callback_lock); 1237 write_unlock_bh(&sk->sk_callback_lock);
1238} 1238}
1239 1239
1240static inline void sock_graft(struct sock *sk, struct socket *parent) 1240static inline void sock_graft(struct sock *sk, struct socket *parent)
1241{ 1241{
1242 write_lock_bh(&sk->sk_callback_lock); 1242 write_lock_bh(&sk->sk_callback_lock);
1243 sk->sk_sleep = &parent->wait; 1243 rcu_assign_pointer(sk->sk_wq, parent->wq);
1244 parent->sk = sk; 1244 parent->sk = sk;
1245 sk_set_socket(sk, parent); 1245 sk_set_socket(sk, parent);
1246 security_sock_graft(sk, parent); 1246 security_sock_graft(sk, parent);
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk)
1392} 1392}
1393 1393
1394/** 1394/**
1395 * sk_has_sleeper - check if there are any waiting processes 1395 * wq_has_sleeper - check if there are any waiting processes
1396 * @sk: socket 1396 * @sk: struct socket_wq
1397 * 1397 *
1398 * Returns true if socket has waiting processes 1398 * Returns true if socket_wq has waiting processes
1399 * 1399 *
1400 * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory 1400 * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
1401 * barrier call. They were added due to the race found within the tcp code. 1401 * barrier call. They were added due to the race found within the tcp code.
1402 * 1402 *
1403 * Consider following tcp code paths: 1403 * Consider following tcp code paths:
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk)
1410 * ... ... 1410 * ... ...
1411 * tp->rcv_nxt check sock_def_readable 1411 * tp->rcv_nxt check sock_def_readable
1412 * ... { 1412 * ... {
1413 * schedule ... 1413 * schedule rcu_read_lock();
1414 * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 1414 * wq = rcu_dereference(sk->sk_wq);
1415 * wake_up_interruptible(sk_sleep(sk)) 1415 * if (wq && waitqueue_active(&wq->wait))
1416 * wake_up_interruptible(&wq->wait)
1416 * ... 1417 * ...
1417 * } 1418 * }
1418 * 1419 *
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk)
1421 * could then endup calling schedule and sleep forever if there are no more 1422 * could then endup calling schedule and sleep forever if there are no more
1422 * data on the socket. 1423 * data on the socket.
1423 * 1424 *
1424 * The sk_has_sleeper is always called right after a call to read_lock, so we
1425 * can use smp_mb__after_lock barrier.
1426 */ 1425 */
1427static inline int sk_has_sleeper(struct sock *sk) 1426static inline bool wq_has_sleeper(struct socket_wq *wq)
1428{ 1427{
1428
1429 /* 1429 /*
1430 * We need to be sure we are in sync with the 1430 * We need to be sure we are in sync with the
1431 * add_wait_queue modifications to the wait queue. 1431 * add_wait_queue modifications to the wait queue.
1432 * 1432 *
1433 * This memory barrier is paired in the sock_poll_wait. 1433 * This memory barrier is paired in the sock_poll_wait.
1434 */ 1434 */
1435 smp_mb__after_lock(); 1435 smp_mb();
1436 return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); 1436 return wq && waitqueue_active(&wq->wait);
1437} 1437}
1438 1438
1439/** 1439/**
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk)
1442 * @wait_address: socket wait queue 1442 * @wait_address: socket wait queue
1443 * @p: poll_table 1443 * @p: poll_table
1444 * 1444 *
1445 * See the comments in the sk_has_sleeper function. 1445 * See the comments in the wq_has_sleeper function.
1446 */ 1446 */
1447static inline void sock_poll_wait(struct file *filp, 1447static inline void sock_poll_wait(struct file *filp,
1448 wait_queue_head_t *wait_address, poll_table *p) 1448 wait_queue_head_t *wait_address, poll_table *p)
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp,
1453 * We need to be sure we are in sync with the 1453 * We need to be sure we are in sync with the
1454 * socket flags modification. 1454 * socket flags modification.
1455 * 1455 *
1456 * This memory barrier is paired in the sk_has_sleeper. 1456 * This memory barrier is paired in the wq_has_sleeper.
1457 */ 1457 */
1458 smp_mb(); 1458 smp_mb();
1459 } 1459 }
diff --git a/net/atm/common.c b/net/atm/common.c
index e3e10e6f8628..b43feb1a3995 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -90,10 +90,13 @@ static void vcc_sock_destruct(struct sock *sk)
90 90
91static void vcc_def_wakeup(struct sock *sk) 91static void vcc_def_wakeup(struct sock *sk)
92{ 92{
93 read_lock(&sk->sk_callback_lock); 93 struct socket_wq *wq;
94 if (sk_has_sleeper(sk)) 94
95 wake_up(sk_sleep(sk)); 95 rcu_read_lock();
96 read_unlock(&sk->sk_callback_lock); 96 wq = rcu_dereference(sk->sk_wq);
97 if (wq_has_sleeper(wq))
98 wake_up(&wq->wait);
99 rcu_read_unlock();
97} 100}
98 101
99static inline int vcc_writable(struct sock *sk) 102static inline int vcc_writable(struct sock *sk)
@@ -106,16 +109,19 @@ static inline int vcc_writable(struct sock *sk)
106 109
107static void vcc_write_space(struct sock *sk) 110static void vcc_write_space(struct sock *sk)
108{ 111{
109 read_lock(&sk->sk_callback_lock); 112 struct socket_wq *wq;
113
114 rcu_read_lock();
110 115
111 if (vcc_writable(sk)) { 116 if (vcc_writable(sk)) {
112 if (sk_has_sleeper(sk)) 117 wq = rcu_dereference(sk->sk_wq);
113 wake_up_interruptible(sk_sleep(sk)); 118 if (wq_has_sleeper(wq))
119 wake_up_interruptible(&wq->wait);
114 120
115 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 121 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
116 } 122 }
117 123
118 read_unlock(&sk->sk_callback_lock); 124 rcu_read_unlock();
119} 125}
120 126
121static struct proto vcc_proto = { 127static struct proto vcc_proto = {
diff --git a/net/core/sock.c b/net/core/sock.c
index 51041759517e..94c4affdda9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1211 */ 1211 */
1212 sk_refcnt_debug_inc(newsk); 1212 sk_refcnt_debug_inc(newsk);
1213 sk_set_socket(newsk, NULL); 1213 sk_set_socket(newsk, NULL);
1214 newsk->sk_sleep = NULL; 1214 newsk->sk_wq = NULL;
1215 1215
1216 if (newsk->sk_prot->sockets_allocated) 1216 if (newsk->sk_prot->sockets_allocated)
1217 percpu_counter_inc(newsk->sk_prot->sockets_allocated); 1217 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
1800 1800
1801static void sock_def_wakeup(struct sock *sk) 1801static void sock_def_wakeup(struct sock *sk)
1802{ 1802{
1803 read_lock(&sk->sk_callback_lock); 1803 struct socket_wq *wq;
1804 if (sk_has_sleeper(sk)) 1804
1805 wake_up_interruptible_all(sk_sleep(sk)); 1805 rcu_read_lock();
1806 read_unlock(&sk->sk_callback_lock); 1806 wq = rcu_dereference(sk->sk_wq);
1807 if (wq_has_sleeper(wq))
1808 wake_up_interruptible_all(&wq->wait);
1809 rcu_read_unlock();
1807} 1810}
1808 1811
1809static void sock_def_error_report(struct sock *sk) 1812static void sock_def_error_report(struct sock *sk)
1810{ 1813{
1811 read_lock(&sk->sk_callback_lock); 1814 struct socket_wq *wq;
1812 if (sk_has_sleeper(sk)) 1815
1813 wake_up_interruptible_poll(sk_sleep(sk), POLLERR); 1816 rcu_read_lock();
1817 wq = rcu_dereference(sk->sk_wq);
1818 if (wq_has_sleeper(wq))
1819 wake_up_interruptible_poll(&wq->wait, POLLERR);
1814 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1820 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1815 read_unlock(&sk->sk_callback_lock); 1821 rcu_read_unlock();
1816} 1822}
1817 1823
1818static void sock_def_readable(struct sock *sk, int len) 1824static void sock_def_readable(struct sock *sk, int len)
1819{ 1825{
1820 read_lock(&sk->sk_callback_lock); 1826 struct socket_wq *wq;
1821 if (sk_has_sleeper(sk)) 1827
1822 wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | 1828 rcu_read_lock();
1829 wq = rcu_dereference(sk->sk_wq);
1830 if (wq_has_sleeper(wq))
1831 wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
1823 POLLRDNORM | POLLRDBAND); 1832 POLLRDNORM | POLLRDBAND);
1824 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1833 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1825 read_unlock(&sk->sk_callback_lock); 1834 rcu_read_unlock();
1826} 1835}
1827 1836
1828static void sock_def_write_space(struct sock *sk) 1837static void sock_def_write_space(struct sock *sk)
1829{ 1838{
1830 read_lock(&sk->sk_callback_lock); 1839 struct socket_wq *wq;
1840
1841 rcu_read_lock();
1831 1842
1832 /* Do not wake up a writer until he can make "significant" 1843 /* Do not wake up a writer until he can make "significant"
1833 * progress. --DaveM 1844 * progress. --DaveM
1834 */ 1845 */
1835 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1846 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1836 if (sk_has_sleeper(sk)) 1847 wq = rcu_dereference(sk->sk_wq);
1837 wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | 1848 if (wq_has_sleeper(wq))
1849 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1838 POLLWRNORM | POLLWRBAND); 1850 POLLWRNORM | POLLWRBAND);
1839 1851
1840 /* Should agree with poll, otherwise some programs break */ 1852 /* Should agree with poll, otherwise some programs break */
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk)
1842 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 1854 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1843 } 1855 }
1844 1856
1845 read_unlock(&sk->sk_callback_lock); 1857 rcu_read_unlock();
1846} 1858}
1847 1859
1848static void sock_def_destruct(struct sock *sk) 1860static void sock_def_destruct(struct sock *sk)
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1896 1908
1897 if (sock) { 1909 if (sock) {
1898 sk->sk_type = sock->type; 1910 sk->sk_type = sock->type;
1899 sk->sk_sleep = &sock->wait; 1911 sk->sk_wq = sock->wq;
1900 sock->sk = sk; 1912 sock->sk = sk;
1901 } else 1913 } else
1902 sk->sk_sleep = NULL; 1914 sk->sk_wq = NULL;
1903 1915
1904 spin_lock_init(&sk->sk_dst_lock); 1916 spin_lock_init(&sk->sk_dst_lock);
1905 rwlock_init(&sk->sk_callback_lock); 1917 rwlock_init(&sk->sk_callback_lock);
diff --git a/net/core/stream.c b/net/core/stream.c
index 7b3c3f30b107..cc196f42b8d8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,15 +28,19 @@
28void sk_stream_write_space(struct sock *sk) 28void sk_stream_write_space(struct sock *sk)
29{ 29{
30 struct socket *sock = sk->sk_socket; 30 struct socket *sock = sk->sk_socket;
31 struct socket_wq *wq;
31 32
32 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { 33 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
33 clear_bit(SOCK_NOSPACE, &sock->flags); 34 clear_bit(SOCK_NOSPACE, &sock->flags);
34 35
35 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 36 rcu_read_lock();
36 wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | 37 wq = rcu_dereference(sk->sk_wq);
38 if (wq_has_sleeper(wq))
39 wake_up_interruptible_poll(&wq->wait, POLLOUT |
37 POLLWRNORM | POLLWRBAND); 40 POLLWRNORM | POLLWRBAND);
38 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 41 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
39 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); 42 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
43 rcu_read_unlock();
40 } 44 }
41} 45}
42 46
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 2d3dcb39851f..aadbdb58758b 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss);
195 195
196void dccp_write_space(struct sock *sk) 196void dccp_write_space(struct sock *sk)
197{ 197{
198 read_lock(&sk->sk_callback_lock); 198 struct socket_wq *wq;
199 199
200 if (sk_has_sleeper(sk)) 200 rcu_read_lock();
201 wake_up_interruptible(sk_sleep(sk)); 201 wq = rcu_dereference(sk->sk_wq);
202 if (wq_has_sleeper(wq))
203 wake_up_interruptible(&wq->wait);
202 /* Should agree with poll, otherwise some programs break */ 204 /* Should agree with poll, otherwise some programs break */
203 if (sock_writeable(sk)) 205 if (sock_writeable(sk))
204 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 206 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
205 207
206 read_unlock(&sk->sk_callback_lock); 208 rcu_read_unlock();
207} 209}
208 210
209/** 211/**
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9636b7d27b48..8be324fe08b9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -305,11 +305,14 @@ static inline int iucv_below_msglim(struct sock *sk)
305 */ 305 */
306static void iucv_sock_wake_msglim(struct sock *sk) 306static void iucv_sock_wake_msglim(struct sock *sk)
307{ 307{
308 read_lock(&sk->sk_callback_lock); 308 struct socket_wq *wq;
309 if (sk_has_sleeper(sk)) 309
310 wake_up_interruptible_all(sk_sleep(sk)); 310 rcu_read_lock();
311 wq = rcu_dereference(sk->sk_wq);
312 if (wq_has_sleeper(wq))
313 wake_up_interruptible_all(&wq->wait);
311 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 314 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
312 read_unlock(&sk->sk_callback_lock); 315 rcu_read_unlock();
313} 316}
314 317
315/* Timers */ 318/* Timers */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e2a95762abd3..af4d38bc3b22 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -664,12 +664,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock)
664 if (signal_pending(tsk)) 664 if (signal_pending(tsk))
665 return sock_intr_errno(timeo); 665 return sock_intr_errno(timeo);
666 666
667 prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, 667 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
668 TASK_INTERRUPTIBLE); 668 TASK_INTERRUPTIBLE);
669 release_sock(sk); 669 release_sock(sk);
670 timeo = schedule_timeout(timeo); 670 timeo = schedule_timeout(timeo);
671 lock_sock(sk); 671 lock_sock(sk);
672 finish_wait(&sk->sk_socket->wait, &wait); 672 finish_wait(sk_sleep(sk), &wait);
673 } 673 }
674 674
675 return 0; 675 return 0;
@@ -910,10 +910,10 @@ disabled:
910 goto out; 910 goto out;
911 } 911 }
912 912
913 prepare_to_wait(&sk->sk_socket->wait, &wait, 913 prepare_to_wait(sk_sleep(sk), &wait,
914 TASK_INTERRUPTIBLE); 914 TASK_INTERRUPTIBLE);
915 done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); 915 done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
916 finish_wait(&sk->sk_socket->wait, &wait); 916 finish_wait(sk_sleep(sk), &wait);
917 917
918 if (sk->sk_state != TCP_ESTABLISHED) 918 if (sk->sk_state != TCP_ESTABLISHED)
919 goto disabled; 919 goto disabled;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c785bfd0744f..6e9848bf0370 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
265 struct pep_sock *pn = pep_sk(sk); 265 struct pep_sock *pn = pep_sk(sk);
266 unsigned int mask = 0; 266 unsigned int mask = 0;
267 267
268 poll_wait(file, &sock->wait, wait); 268 poll_wait(file, sk_sleep(sk), wait);
269 269
270 switch (sk->sk_state) { 270 switch (sk->sk_state) {
271 case TCP_LISTEN: 271 case TCP_LISTEN:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c432d76f415e..0b9bb2085ce4 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk)
62static void rxrpc_write_space(struct sock *sk) 62static void rxrpc_write_space(struct sock *sk)
63{ 63{
64 _enter("%p", sk); 64 _enter("%p", sk);
65 read_lock(&sk->sk_callback_lock); 65 rcu_read_lock();
66 if (rxrpc_writable(sk)) { 66 if (rxrpc_writable(sk)) {
67 if (sk_has_sleeper(sk)) 67 struct socket_wq *wq = rcu_dereference(sk->sk_wq);
68 wake_up_interruptible(sk_sleep(sk)); 68
69 if (wq_has_sleeper(wq))
70 wake_up_interruptible(&wq->wait);
69 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 71 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
70 } 72 }
71 read_unlock(&sk->sk_callback_lock); 73 rcu_read_unlock();
72} 74}
73 75
74/* 76/*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13d8229f3a9c..d54700af927a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6065,7 +6065,7 @@ static void __sctp_write_space(struct sctp_association *asoc)
6065 * here by modeling from the current TCP/UDP code. 6065 * here by modeling from the current TCP/UDP code.
6066 * We have not tested with it yet. 6066 * We have not tested with it yet.
6067 */ 6067 */
6068 if (sock->fasync_list && 6068 if (sock->wq->fasync_list &&
6069 !(sk->sk_shutdown & SEND_SHUTDOWN)) 6069 !(sk->sk_shutdown & SEND_SHUTDOWN))
6070 sock_wake_async(sock, 6070 sock_wake_async(sock,
6071 SOCK_WAKE_SPACE, POLL_OUT); 6071 SOCK_WAKE_SPACE, POLL_OUT);
diff --git a/net/socket.c b/net/socket.c
index cb7c1f6c0d6e..dae8c6b84a09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
253 if (!ei) 253 if (!ei)
254 return NULL; 254 return NULL;
255 init_waitqueue_head(&ei->socket.wait); 255 ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
256 if (!ei->socket.wq) {
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
260 init_waitqueue_head(&ei->socket.wq->wait);
261 ei->socket.wq->fasync_list = NULL;
256 262
257 ei->socket.fasync_list = NULL;
258 ei->socket.state = SS_UNCONNECTED; 263 ei->socket.state = SS_UNCONNECTED;
259 ei->socket.flags = 0; 264 ei->socket.flags = 0;
260 ei->socket.ops = NULL; 265 ei->socket.ops = NULL;
@@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
264 return &ei->vfs_inode; 269 return &ei->vfs_inode;
265} 270}
266 271
272
273static void wq_free_rcu(struct rcu_head *head)
274{
275 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
276
277 kfree(wq);
278}
279
267static void sock_destroy_inode(struct inode *inode) 280static void sock_destroy_inode(struct inode *inode)
268{ 281{
269 kmem_cache_free(sock_inode_cachep, 282 struct socket_alloc *ei;
270 container_of(inode, struct socket_alloc, vfs_inode)); 283
284 ei = container_of(inode, struct socket_alloc, vfs_inode);
285 call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
286 kmem_cache_free(sock_inode_cachep, ei);
271} 287}
272 288
273static void init_once(void *foo) 289static void init_once(void *foo)
@@ -513,7 +529,7 @@ void sock_release(struct socket *sock)
513 module_put(owner); 529 module_put(owner);
514 } 530 }
515 531
516 if (sock->fasync_list) 532 if (sock->wq->fasync_list)
517 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 533 printk(KERN_ERR "sock_release: fasync list not empty!\n");
518 534
519 percpu_sub(sockets_in_use, 1); 535 percpu_sub(sockets_in_use, 1);
@@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on)
1080 1096
1081 lock_sock(sk); 1097 lock_sock(sk);
1082 1098
1083 fasync_helper(fd, filp, on, &sock->fasync_list); 1099 fasync_helper(fd, filp, on, &sock->wq->fasync_list);
1084 1100
1085 if (!sock->fasync_list) 1101 if (!sock->wq->fasync_list)
1086 sock_reset_flag(sk, SOCK_FASYNC); 1102 sock_reset_flag(sk, SOCK_FASYNC);
1087 else 1103 else
1088 sock_set_flag(sk, SOCK_FASYNC); 1104 sock_set_flag(sk, SOCK_FASYNC);
@@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
1091 return 0; 1107 return 0;
1092} 1108}
1093 1109
1094/* This function may be called only under socket lock or callback_lock */ 1110/* This function may be called only under socket lock or callback_lock or rcu_lock */
1095 1111
1096int sock_wake_async(struct socket *sock, int how, int band) 1112int sock_wake_async(struct socket *sock, int how, int band)
1097{ 1113{
1098 if (!sock || !sock->fasync_list) 1114 struct socket_wq *wq;
1115
1116 if (!sock)
1099 return -1; 1117 return -1;
1118 rcu_read_lock();
1119 wq = rcu_dereference(sock->wq);
1120 if (!wq || !wq->fasync_list) {
1121 rcu_read_unlock();
1122 return -1;
1123 }
1100 switch (how) { 1124 switch (how) {
1101 case SOCK_WAKE_WAITD: 1125 case SOCK_WAKE_WAITD:
1102 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1126 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
@@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band)
1108 /* fall through */ 1132 /* fall through */
1109 case SOCK_WAKE_IO: 1133 case SOCK_WAKE_IO:
1110call_kill: 1134call_kill:
1111 kill_fasync(&sock->fasync_list, SIGIO, band); 1135 kill_fasync(&wq->fasync_list, SIGIO, band);
1112 break; 1136 break;
1113 case SOCK_WAKE_URG: 1137 case SOCK_WAKE_URG:
1114 kill_fasync(&sock->fasync_list, SIGURG, band); 1138 kill_fasync(&wq->fasync_list, SIGURG, band);
1115 } 1139 }
1140 rcu_read_unlock();
1116 return 0; 1141 return 0;
1117} 1142}
1118 1143
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 87c0360eaa25..fef2cc5e9d2b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk)
313 313
314static void unix_write_space(struct sock *sk) 314static void unix_write_space(struct sock *sk)
315{ 315{
316 read_lock(&sk->sk_callback_lock); 316 struct socket_wq *wq;
317
318 rcu_read_lock();
317 if (unix_writable(sk)) { 319 if (unix_writable(sk)) {
318 if (sk_has_sleeper(sk)) 320 wq = rcu_dereference(sk->sk_wq);
319 wake_up_interruptible_sync(sk_sleep(sk)); 321 if (wq_has_sleeper(wq))
322 wake_up_interruptible_sync(&wq->wait);
320 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 323 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 } 324 }
322 read_unlock(&sk->sk_callback_lock); 325 rcu_read_unlock();
323} 326}
324 327
325/* When dgram socket disconnects (or changes its peer), we clear its receive 328/* When dgram socket disconnects (or changes its peer), we clear its receive
@@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
406 skpair->sk_err = ECONNRESET; 409 skpair->sk_err = ECONNRESET;
407 unix_state_unlock(skpair); 410 unix_state_unlock(skpair);
408 skpair->sk_state_change(skpair); 411 skpair->sk_state_change(skpair);
409 read_lock(&skpair->sk_callback_lock);
410 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 412 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 read_unlock(&skpair->sk_callback_lock);
412 } 413 }
413 sock_put(skpair); /* It may now die */ 414 sock_put(skpair); /* It may now die */
414 unix_peer(sk) = NULL; 415 unix_peer(sk) = NULL;
@@ -1142,7 +1143,7 @@ restart:
1142 newsk->sk_peercred.pid = task_tgid_vnr(current); 1143 newsk->sk_peercred.pid = task_tgid_vnr(current);
1143 current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); 1144 current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1144 newu = unix_sk(newsk); 1145 newu = unix_sk(newsk);
1145 newsk->sk_sleep = &newu->peer_wait; 1146 newsk->sk_wq = &newu->peer_wq;
1146 otheru = unix_sk(other); 1147 otheru = unix_sk(other);
1147 1148
1148 /* copy address information from listening to new sock*/ 1149 /* copy address information from listening to new sock*/
@@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode)
1931 other->sk_shutdown |= peer_mode; 1932 other->sk_shutdown |= peer_mode;
1932 unix_state_unlock(other); 1933 unix_state_unlock(other);
1933 other->sk_state_change(other); 1934 other->sk_state_change(other);
1934 read_lock(&other->sk_callback_lock);
1935 if (peer_mode == SHUTDOWN_MASK) 1935 if (peer_mode == SHUTDOWN_MASK)
1936 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 1936 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1937 else if (peer_mode & RCV_SHUTDOWN) 1937 else if (peer_mode & RCV_SHUTDOWN)
1938 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 1938 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1939 read_unlock(&other->sk_callback_lock);
1940 } 1939 }
1941 if (other) 1940 if (other)
1942 sock_put(other); 1941 sock_put(other);