diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /include | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/net.h | 14 | ||||
-rw-r--r-- | include/net/af_unix.h | 20 | ||||
-rw-r--r-- | include/net/sock.h | 38 |
3 files changed, 39 insertions, 33 deletions
diff --git a/include/linux/net.h b/include/linux/net.h index 4157b5d42bd6..2b4deeeb8646 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -59,6 +59,7 @@ typedef enum { | |||
59 | #include <linux/wait.h> | 59 | #include <linux/wait.h> |
60 | #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ | 60 | #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ |
61 | #include <linux/kmemcheck.h> | 61 | #include <linux/kmemcheck.h> |
62 | #include <linux/rcupdate.h> | ||
62 | 63 | ||
63 | struct poll_table_struct; | 64 | struct poll_table_struct; |
64 | struct pipe_inode_info; | 65 | struct pipe_inode_info; |
@@ -116,6 +117,12 @@ enum sock_shutdown_cmd { | |||
116 | SHUT_RDWR = 2, | 117 | SHUT_RDWR = 2, |
117 | }; | 118 | }; |
118 | 119 | ||
120 | struct socket_wq { | ||
121 | wait_queue_head_t wait; | ||
122 | struct fasync_struct *fasync_list; | ||
123 | struct rcu_head rcu; | ||
124 | } ____cacheline_aligned_in_smp; | ||
125 | |||
119 | /** | 126 | /** |
120 | * struct socket - general BSD socket | 127 | * struct socket - general BSD socket |
121 | * @state: socket state (%SS_CONNECTED, etc) | 128 | * @state: socket state (%SS_CONNECTED, etc) |
@@ -135,11 +142,8 @@ struct socket { | |||
135 | kmemcheck_bitfield_end(type); | 142 | kmemcheck_bitfield_end(type); |
136 | 143 | ||
137 | unsigned long flags; | 144 | unsigned long flags; |
138 | /* | 145 | |
139 | * Please keep fasync_list & wait fields in the same cache line | 146 | struct socket_wq *wq; |
140 | */ | ||
141 | struct fasync_struct *fasync_list; | ||
142 | wait_queue_head_t wait; | ||
143 | 147 | ||
144 | struct file *file; | 148 | struct file *file; |
145 | struct sock *sk; | 149 | struct sock *sk; |
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1614d78c60ed..20725e213aee 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
@@ -30,7 +30,7 @@ struct unix_skb_parms { | |||
30 | #endif | 30 | #endif |
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) | 33 | #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) |
34 | #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) | 34 | #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) |
35 | #define UNIXSID(skb) (&UNIXCB((skb)).secid) | 35 | #define UNIXSID(skb) (&UNIXCB((skb)).secid) |
36 | 36 | ||
@@ -45,21 +45,23 @@ struct unix_skb_parms { | |||
45 | struct unix_sock { | 45 | struct unix_sock { |
46 | /* WARNING: sk has to be the first member */ | 46 | /* WARNING: sk has to be the first member */ |
47 | struct sock sk; | 47 | struct sock sk; |
48 | struct unix_address *addr; | 48 | struct unix_address *addr; |
49 | struct dentry *dentry; | 49 | struct dentry *dentry; |
50 | struct vfsmount *mnt; | 50 | struct vfsmount *mnt; |
51 | struct mutex readlock; | 51 | struct mutex readlock; |
52 | struct sock *peer; | 52 | struct sock *peer; |
53 | struct sock *other; | 53 | struct sock *other; |
54 | struct list_head link; | 54 | struct list_head link; |
55 | atomic_long_t inflight; | 55 | atomic_long_t inflight; |
56 | spinlock_t lock; | 56 | spinlock_t lock; |
57 | unsigned int gc_candidate : 1; | 57 | unsigned int gc_candidate : 1; |
58 | unsigned int gc_maybe_cycle : 1; | 58 | unsigned int gc_maybe_cycle : 1; |
59 | wait_queue_head_t peer_wait; | 59 | struct socket_wq peer_wq; |
60 | }; | 60 | }; |
61 | #define unix_sk(__sk) ((struct unix_sock *)__sk) | 61 | #define unix_sk(__sk) ((struct unix_sock *)__sk) |
62 | 62 | ||
63 | #define peer_wait peer_wq.wait | ||
64 | |||
63 | #ifdef CONFIG_SYSCTL | 65 | #ifdef CONFIG_SYSCTL |
64 | extern int unix_sysctl_register(struct net *net); | 66 | extern int unix_sysctl_register(struct net *net); |
65 | extern void unix_sysctl_unregister(struct net *net); | 67 | extern void unix_sysctl_unregister(struct net *net); |
diff --git a/include/net/sock.h b/include/net/sock.h index e1777db5b9ab..cc7f91ec972c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -159,7 +159,7 @@ struct sock_common { | |||
159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
160 | * @sk_lock: synchronizer | 160 | * @sk_lock: synchronizer |
161 | * @sk_rcvbuf: size of receive buffer in bytes | 161 | * @sk_rcvbuf: size of receive buffer in bytes |
162 | * @sk_sleep: sock wait queue | 162 | * @sk_wq: sock wait queue and async head |
163 | * @sk_dst_cache: destination cache | 163 | * @sk_dst_cache: destination cache |
164 | * @sk_dst_lock: destination cache lock | 164 | * @sk_dst_lock: destination cache lock |
165 | * @sk_policy: flow policy | 165 | * @sk_policy: flow policy |
@@ -257,7 +257,7 @@ struct sock { | |||
257 | struct sk_buff *tail; | 257 | struct sk_buff *tail; |
258 | int len; | 258 | int len; |
259 | } sk_backlog; | 259 | } sk_backlog; |
260 | wait_queue_head_t *sk_sleep; | 260 | struct socket_wq *sk_wq; |
261 | struct dst_entry *sk_dst_cache; | 261 | struct dst_entry *sk_dst_cache; |
262 | #ifdef CONFIG_XFRM | 262 | #ifdef CONFIG_XFRM |
263 | struct xfrm_policy *sk_policy[2]; | 263 | struct xfrm_policy *sk_policy[2]; |
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) | |||
1219 | 1219 | ||
1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) | 1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) |
1221 | { | 1221 | { |
1222 | return sk->sk_sleep; | 1222 | return &sk->sk_wq->wait; |
1223 | } | 1223 | } |
1224 | /* Detach socket from process context. | 1224 | /* Detach socket from process context. |
1225 | * Announce socket dead, detach it from wait queue and inode. | 1225 | * Announce socket dead, detach it from wait queue and inode. |
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk) | |||
1233 | write_lock_bh(&sk->sk_callback_lock); | 1233 | write_lock_bh(&sk->sk_callback_lock); |
1234 | sock_set_flag(sk, SOCK_DEAD); | 1234 | sock_set_flag(sk, SOCK_DEAD); |
1235 | sk_set_socket(sk, NULL); | 1235 | sk_set_socket(sk, NULL); |
1236 | sk->sk_sleep = NULL; | 1236 | sk->sk_wq = NULL; |
1237 | write_unlock_bh(&sk->sk_callback_lock); | 1237 | write_unlock_bh(&sk->sk_callback_lock); |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) | 1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) |
1241 | { | 1241 | { |
1242 | write_lock_bh(&sk->sk_callback_lock); | 1242 | write_lock_bh(&sk->sk_callback_lock); |
1243 | sk->sk_sleep = &parent->wait; | 1243 | rcu_assign_pointer(sk->sk_wq, parent->wq); |
1244 | parent->sk = sk; | 1244 | parent->sk = sk; |
1245 | sk_set_socket(sk, parent); | 1245 | sk_set_socket(sk, parent); |
1246 | security_sock_graft(sk, parent); | 1246 | security_sock_graft(sk, parent); |
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | /** | 1394 | /** |
1395 | * sk_has_sleeper - check if there are any waiting processes | 1395 | * wq_has_sleeper - check if there are any waiting processes |
1396 | * @sk: socket | 1396 | * @sk: struct socket_wq |
1397 | * | 1397 | * |
1398 | * Returns true if socket has waiting processes | 1398 | * Returns true if socket_wq has waiting processes |
1399 | * | 1399 | * |
1400 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | 1400 | * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory |
1401 | * barrier call. They were added due to the race found within the tcp code. | 1401 | * barrier call. They were added due to the race found within the tcp code. |
1402 | * | 1402 | * |
1403 | * Consider following tcp code paths: | 1403 | * Consider following tcp code paths: |
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1410 | * ... ... | 1410 | * ... ... |
1411 | * tp->rcv_nxt check sock_def_readable | 1411 | * tp->rcv_nxt check sock_def_readable |
1412 | * ... { | 1412 | * ... { |
1413 | * schedule ... | 1413 | * schedule rcu_read_lock(); |
1414 | * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 1414 | * wq = rcu_dereference(sk->sk_wq); |
1415 | * wake_up_interruptible(sk_sleep(sk)) | 1415 | * if (wq && waitqueue_active(&wq->wait)) |
1416 | * wake_up_interruptible(&wq->wait) | ||
1416 | * ... | 1417 | * ... |
1417 | * } | 1418 | * } |
1418 | * | 1419 | * |
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1421 | * could then endup calling schedule and sleep forever if there are no more | 1422 | * could then endup calling schedule and sleep forever if there are no more |
1422 | * data on the socket. | 1423 | * data on the socket. |
1423 | * | 1424 | * |
1424 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
1425 | * can use smp_mb__after_lock barrier. | ||
1426 | */ | 1425 | */ |
1427 | static inline int sk_has_sleeper(struct sock *sk) | 1426 | static inline bool wq_has_sleeper(struct socket_wq *wq) |
1428 | { | 1427 | { |
1428 | |||
1429 | /* | 1429 | /* |
1430 | * We need to be sure we are in sync with the | 1430 | * We need to be sure we are in sync with the |
1431 | * add_wait_queue modifications to the wait queue. | 1431 | * add_wait_queue modifications to the wait queue. |
1432 | * | 1432 | * |
1433 | * This memory barrier is paired in the sock_poll_wait. | 1433 | * This memory barrier is paired in the sock_poll_wait. |
1434 | */ | 1434 | */ |
1435 | smp_mb__after_lock(); | 1435 | smp_mb(); |
1436 | return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); | 1436 | return wq && waitqueue_active(&wq->wait); |
1437 | } | 1437 | } |
1438 | 1438 | ||
1439 | /** | 1439 | /** |
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk) | |||
1442 | * @wait_address: socket wait queue | 1442 | * @wait_address: socket wait queue |
1443 | * @p: poll_table | 1443 | * @p: poll_table |
1444 | * | 1444 | * |
1445 | * See the comments in the sk_has_sleeper function. | 1445 | * See the comments in the wq_has_sleeper function. |
1446 | */ | 1446 | */ |
1447 | static inline void sock_poll_wait(struct file *filp, | 1447 | static inline void sock_poll_wait(struct file *filp, |
1448 | wait_queue_head_t *wait_address, poll_table *p) | 1448 | wait_queue_head_t *wait_address, poll_table *p) |
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp, | |||
1453 | * We need to be sure we are in sync with the | 1453 | * We need to be sure we are in sync with the |
1454 | * socket flags modification. | 1454 | * socket flags modification. |
1455 | * | 1455 | * |
1456 | * This memory barrier is paired in the sk_has_sleeper. | 1456 | * This memory barrier is paired in the wq_has_sleeper. |
1457 | */ | 1457 | */ |
1458 | smp_mb(); | 1458 | smp_mb(); |
1459 | } | 1459 | } |