aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-29 07:01:49 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-01 18:00:15 -0400
commit43815482370c510c569fd18edb57afcb0fa8cab6 (patch)
tree063efaae3758402b84f056438b704d1de68f7837 /include
parent83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff)
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/net.h14
-rw-r--r--include/net/af_unix.h20
-rw-r--r--include/net/sock.h38
3 files changed, 39 insertions, 33 deletions
diff --git a/include/linux/net.h b/include/linux/net.h
index 4157b5d42bd6..2b4deeeb8646 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -59,6 +59,7 @@ typedef enum {
59#include <linux/wait.h> 59#include <linux/wait.h>
60#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ 60#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
61#include <linux/kmemcheck.h> 61#include <linux/kmemcheck.h>
62#include <linux/rcupdate.h>
62 63
63struct poll_table_struct; 64struct poll_table_struct;
64struct pipe_inode_info; 65struct pipe_inode_info;
@@ -116,6 +117,12 @@ enum sock_shutdown_cmd {
116 SHUT_RDWR = 2, 117 SHUT_RDWR = 2,
117}; 118};
118 119
120struct socket_wq {
121 wait_queue_head_t wait;
122 struct fasync_struct *fasync_list;
123 struct rcu_head rcu;
124} ____cacheline_aligned_in_smp;
125
119/** 126/**
120 * struct socket - general BSD socket 127 * struct socket - general BSD socket
121 * @state: socket state (%SS_CONNECTED, etc) 128 * @state: socket state (%SS_CONNECTED, etc)
@@ -135,11 +142,8 @@ struct socket {
135 kmemcheck_bitfield_end(type); 142 kmemcheck_bitfield_end(type);
136 143
137 unsigned long flags; 144 unsigned long flags;
138 /* 145
139 * Please keep fasync_list & wait fields in the same cache line 146 struct socket_wq *wq;
140 */
141 struct fasync_struct *fasync_list;
142 wait_queue_head_t wait;
143 147
144 struct file *file; 148 struct file *file;
145 struct sock *sk; 149 struct sock *sk;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 1614d78c60ed..20725e213aee 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -30,7 +30,7 @@ struct unix_skb_parms {
30#endif 30#endif
31}; 31};
32 32
33#define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) 33#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
34#define UNIXCREDS(skb) (&UNIXCB((skb)).creds) 34#define UNIXCREDS(skb) (&UNIXCB((skb)).creds)
35#define UNIXSID(skb) (&UNIXCB((skb)).secid) 35#define UNIXSID(skb) (&UNIXCB((skb)).secid)
36 36
@@ -45,21 +45,23 @@ struct unix_skb_parms {
45struct unix_sock { 45struct unix_sock {
46 /* WARNING: sk has to be the first member */ 46 /* WARNING: sk has to be the first member */
47 struct sock sk; 47 struct sock sk;
48 struct unix_address *addr; 48 struct unix_address *addr;
49 struct dentry *dentry; 49 struct dentry *dentry;
50 struct vfsmount *mnt; 50 struct vfsmount *mnt;
51 struct mutex readlock; 51 struct mutex readlock;
52 struct sock *peer; 52 struct sock *peer;
53 struct sock *other; 53 struct sock *other;
54 struct list_head link; 54 struct list_head link;
55 atomic_long_t inflight; 55 atomic_long_t inflight;
56 spinlock_t lock; 56 spinlock_t lock;
57 unsigned int gc_candidate : 1; 57 unsigned int gc_candidate : 1;
58 unsigned int gc_maybe_cycle : 1; 58 unsigned int gc_maybe_cycle : 1;
59 wait_queue_head_t peer_wait; 59 struct socket_wq peer_wq;
60}; 60};
61#define unix_sk(__sk) ((struct unix_sock *)__sk) 61#define unix_sk(__sk) ((struct unix_sock *)__sk)
62 62
63#define peer_wait peer_wq.wait
64
63#ifdef CONFIG_SYSCTL 65#ifdef CONFIG_SYSCTL
64extern int unix_sysctl_register(struct net *net); 66extern int unix_sysctl_register(struct net *net);
65extern void unix_sysctl_unregister(struct net *net); 67extern void unix_sysctl_unregister(struct net *net);
diff --git a/include/net/sock.h b/include/net/sock.h
index e1777db5b9ab..cc7f91ec972c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -159,7 +159,7 @@ struct sock_common {
159 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 159 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
160 * @sk_lock: synchronizer 160 * @sk_lock: synchronizer
161 * @sk_rcvbuf: size of receive buffer in bytes 161 * @sk_rcvbuf: size of receive buffer in bytes
162 * @sk_sleep: sock wait queue 162 * @sk_wq: sock wait queue and async head
163 * @sk_dst_cache: destination cache 163 * @sk_dst_cache: destination cache
164 * @sk_dst_lock: destination cache lock 164 * @sk_dst_lock: destination cache lock
165 * @sk_policy: flow policy 165 * @sk_policy: flow policy
@@ -257,7 +257,7 @@ struct sock {
257 struct sk_buff *tail; 257 struct sk_buff *tail;
258 int len; 258 int len;
259 } sk_backlog; 259 } sk_backlog;
260 wait_queue_head_t *sk_sleep; 260 struct socket_wq *sk_wq;
261 struct dst_entry *sk_dst_cache; 261 struct dst_entry *sk_dst_cache;
262#ifdef CONFIG_XFRM 262#ifdef CONFIG_XFRM
263 struct xfrm_policy *sk_policy[2]; 263 struct xfrm_policy *sk_policy[2];
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
1219 1219
1220static inline wait_queue_head_t *sk_sleep(struct sock *sk) 1220static inline wait_queue_head_t *sk_sleep(struct sock *sk)
1221{ 1221{
1222 return sk->sk_sleep; 1222 return &sk->sk_wq->wait;
1223} 1223}
1224/* Detach socket from process context. 1224/* Detach socket from process context.
1225 * Announce socket dead, detach it from wait queue and inode. 1225 * Announce socket dead, detach it from wait queue and inode.
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk)
1233 write_lock_bh(&sk->sk_callback_lock); 1233 write_lock_bh(&sk->sk_callback_lock);
1234 sock_set_flag(sk, SOCK_DEAD); 1234 sock_set_flag(sk, SOCK_DEAD);
1235 sk_set_socket(sk, NULL); 1235 sk_set_socket(sk, NULL);
1236 sk->sk_sleep = NULL; 1236 sk->sk_wq = NULL;
1237 write_unlock_bh(&sk->sk_callback_lock); 1237 write_unlock_bh(&sk->sk_callback_lock);
1238} 1238}
1239 1239
1240static inline void sock_graft(struct sock *sk, struct socket *parent) 1240static inline void sock_graft(struct sock *sk, struct socket *parent)
1241{ 1241{
1242 write_lock_bh(&sk->sk_callback_lock); 1242 write_lock_bh(&sk->sk_callback_lock);
1243 sk->sk_sleep = &parent->wait; 1243 rcu_assign_pointer(sk->sk_wq, parent->wq);
1244 parent->sk = sk; 1244 parent->sk = sk;
1245 sk_set_socket(sk, parent); 1245 sk_set_socket(sk, parent);
1246 security_sock_graft(sk, parent); 1246 security_sock_graft(sk, parent);
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk)
1392} 1392}
1393 1393
1394/** 1394/**
1395 * sk_has_sleeper - check if there are any waiting processes 1395 * wq_has_sleeper - check if there are any waiting processes
1396 * @sk: socket 1396 * @sk: struct socket_wq
1397 * 1397 *
1398 * Returns true if socket has waiting processes 1398 * Returns true if socket_wq has waiting processes
1399 * 1399 *
1400 * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory 1400 * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
1401 * barrier call. They were added due to the race found within the tcp code. 1401 * barrier call. They were added due to the race found within the tcp code.
1402 * 1402 *
1403 * Consider following tcp code paths: 1403 * Consider following tcp code paths:
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk)
1410 * ... ... 1410 * ... ...
1411 * tp->rcv_nxt check sock_def_readable 1411 * tp->rcv_nxt check sock_def_readable
1412 * ... { 1412 * ... {
1413 * schedule ... 1413 * schedule rcu_read_lock();
1414 * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 1414 * wq = rcu_dereference(sk->sk_wq);
1415 * wake_up_interruptible(sk_sleep(sk)) 1415 * if (wq && waitqueue_active(&wq->wait))
1416 * wake_up_interruptible(&wq->wait)
1416 * ... 1417 * ...
1417 * } 1418 * }
1418 * 1419 *
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk)
1421 * could then endup calling schedule and sleep forever if there are no more 1422 * could then endup calling schedule and sleep forever if there are no more
1422 * data on the socket. 1423 * data on the socket.
1423 * 1424 *
1424 * The sk_has_sleeper is always called right after a call to read_lock, so we
1425 * can use smp_mb__after_lock barrier.
1426 */ 1425 */
1427static inline int sk_has_sleeper(struct sock *sk) 1426static inline bool wq_has_sleeper(struct socket_wq *wq)
1428{ 1427{
1428
1429 /* 1429 /*
1430 * We need to be sure we are in sync with the 1430 * We need to be sure we are in sync with the
1431 * add_wait_queue modifications to the wait queue. 1431 * add_wait_queue modifications to the wait queue.
1432 * 1432 *
1433 * This memory barrier is paired in the sock_poll_wait. 1433 * This memory barrier is paired in the sock_poll_wait.
1434 */ 1434 */
1435 smp_mb__after_lock(); 1435 smp_mb();
1436 return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); 1436 return wq && waitqueue_active(&wq->wait);
1437} 1437}
1438 1438
1439/** 1439/**
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk)
1442 * @wait_address: socket wait queue 1442 * @wait_address: socket wait queue
1443 * @p: poll_table 1443 * @p: poll_table
1444 * 1444 *
1445 * See the comments in the sk_has_sleeper function. 1445 * See the comments in the wq_has_sleeper function.
1446 */ 1446 */
1447static inline void sock_poll_wait(struct file *filp, 1447static inline void sock_poll_wait(struct file *filp,
1448 wait_queue_head_t *wait_address, poll_table *p) 1448 wait_queue_head_t *wait_address, poll_table *p)
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp,
1453 * We need to be sure we are in sync with the 1453 * We need to be sure we are in sync with the
1454 * socket flags modification. 1454 * socket flags modification.
1455 * 1455 *
1456 * This memory barrier is paired in the sk_has_sleeper. 1456 * This memory barrier is paired in the wq_has_sleeper.
1457 */ 1457 */
1458 smp_mb(); 1458 smp_mb();
1459 } 1459 }