diff options
| author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
| commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
| tree | 063efaae3758402b84f056438b704d1de68f7837 /include/net | |
| parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) | |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
| -rw-r--r-- | include/net/af_unix.h | 20 | ||||
| -rw-r--r-- | include/net/sock.h | 38 |
2 files changed, 30 insertions, 28 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1614d78c60ed..20725e213aee 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
| @@ -30,7 +30,7 @@ struct unix_skb_parms { | |||
| 30 | #endif | 30 | #endif |
| 31 | }; | 31 | }; |
| 32 | 32 | ||
| 33 | #define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) | 33 | #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) |
| 34 | #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) | 34 | #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) |
| 35 | #define UNIXSID(skb) (&UNIXCB((skb)).secid) | 35 | #define UNIXSID(skb) (&UNIXCB((skb)).secid) |
| 36 | 36 | ||
| @@ -45,21 +45,23 @@ struct unix_skb_parms { | |||
| 45 | struct unix_sock { | 45 | struct unix_sock { |
| 46 | /* WARNING: sk has to be the first member */ | 46 | /* WARNING: sk has to be the first member */ |
| 47 | struct sock sk; | 47 | struct sock sk; |
| 48 | struct unix_address *addr; | 48 | struct unix_address *addr; |
| 49 | struct dentry *dentry; | 49 | struct dentry *dentry; |
| 50 | struct vfsmount *mnt; | 50 | struct vfsmount *mnt; |
| 51 | struct mutex readlock; | 51 | struct mutex readlock; |
| 52 | struct sock *peer; | 52 | struct sock *peer; |
| 53 | struct sock *other; | 53 | struct sock *other; |
| 54 | struct list_head link; | 54 | struct list_head link; |
| 55 | atomic_long_t inflight; | 55 | atomic_long_t inflight; |
| 56 | spinlock_t lock; | 56 | spinlock_t lock; |
| 57 | unsigned int gc_candidate : 1; | 57 | unsigned int gc_candidate : 1; |
| 58 | unsigned int gc_maybe_cycle : 1; | 58 | unsigned int gc_maybe_cycle : 1; |
| 59 | wait_queue_head_t peer_wait; | 59 | struct socket_wq peer_wq; |
| 60 | }; | 60 | }; |
| 61 | #define unix_sk(__sk) ((struct unix_sock *)__sk) | 61 | #define unix_sk(__sk) ((struct unix_sock *)__sk) |
| 62 | 62 | ||
| 63 | #define peer_wait peer_wq.wait | ||
| 64 | |||
| 63 | #ifdef CONFIG_SYSCTL | 65 | #ifdef CONFIG_SYSCTL |
| 64 | extern int unix_sysctl_register(struct net *net); | 66 | extern int unix_sysctl_register(struct net *net); |
| 65 | extern void unix_sysctl_unregister(struct net *net); | 67 | extern void unix_sysctl_unregister(struct net *net); |
diff --git a/include/net/sock.h b/include/net/sock.h index e1777db5b9ab..cc7f91ec972c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -159,7 +159,7 @@ struct sock_common { | |||
| 159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
| 160 | * @sk_lock: synchronizer | 160 | * @sk_lock: synchronizer |
| 161 | * @sk_rcvbuf: size of receive buffer in bytes | 161 | * @sk_rcvbuf: size of receive buffer in bytes |
| 162 | * @sk_sleep: sock wait queue | 162 | * @sk_wq: sock wait queue and async head |
| 163 | * @sk_dst_cache: destination cache | 163 | * @sk_dst_cache: destination cache |
| 164 | * @sk_dst_lock: destination cache lock | 164 | * @sk_dst_lock: destination cache lock |
| 165 | * @sk_policy: flow policy | 165 | * @sk_policy: flow policy |
| @@ -257,7 +257,7 @@ struct sock { | |||
| 257 | struct sk_buff *tail; | 257 | struct sk_buff *tail; |
| 258 | int len; | 258 | int len; |
| 259 | } sk_backlog; | 259 | } sk_backlog; |
| 260 | wait_queue_head_t *sk_sleep; | 260 | struct socket_wq *sk_wq; |
| 261 | struct dst_entry *sk_dst_cache; | 261 | struct dst_entry *sk_dst_cache; |
| 262 | #ifdef CONFIG_XFRM | 262 | #ifdef CONFIG_XFRM |
| 263 | struct xfrm_policy *sk_policy[2]; | 263 | struct xfrm_policy *sk_policy[2]; |
| @@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) | |||
| 1219 | 1219 | ||
| 1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) | 1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) |
| 1221 | { | 1221 | { |
| 1222 | return sk->sk_sleep; | 1222 | return &sk->sk_wq->wait; |
| 1223 | } | 1223 | } |
| 1224 | /* Detach socket from process context. | 1224 | /* Detach socket from process context. |
| 1225 | * Announce socket dead, detach it from wait queue and inode. | 1225 | * Announce socket dead, detach it from wait queue and inode. |
| @@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk) | |||
| 1233 | write_lock_bh(&sk->sk_callback_lock); | 1233 | write_lock_bh(&sk->sk_callback_lock); |
| 1234 | sock_set_flag(sk, SOCK_DEAD); | 1234 | sock_set_flag(sk, SOCK_DEAD); |
| 1235 | sk_set_socket(sk, NULL); | 1235 | sk_set_socket(sk, NULL); |
| 1236 | sk->sk_sleep = NULL; | 1236 | sk->sk_wq = NULL; |
| 1237 | write_unlock_bh(&sk->sk_callback_lock); | 1237 | write_unlock_bh(&sk->sk_callback_lock); |
| 1238 | } | 1238 | } |
| 1239 | 1239 | ||
| 1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) | 1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) |
| 1241 | { | 1241 | { |
| 1242 | write_lock_bh(&sk->sk_callback_lock); | 1242 | write_lock_bh(&sk->sk_callback_lock); |
| 1243 | sk->sk_sleep = &parent->wait; | 1243 | rcu_assign_pointer(sk->sk_wq, parent->wq); |
| 1244 | parent->sk = sk; | 1244 | parent->sk = sk; |
| 1245 | sk_set_socket(sk, parent); | 1245 | sk_set_socket(sk, parent); |
| 1246 | security_sock_graft(sk, parent); | 1246 | security_sock_graft(sk, parent); |
| @@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
| 1392 | } | 1392 | } |
| 1393 | 1393 | ||
| 1394 | /** | 1394 | /** |
| 1395 | * sk_has_sleeper - check if there are any waiting processes | 1395 | * wq_has_sleeper - check if there are any waiting processes |
| 1396 | * @sk: socket | 1396 | * @sk: struct socket_wq |
| 1397 | * | 1397 | * |
| 1398 | * Returns true if socket has waiting processes | 1398 | * Returns true if socket_wq has waiting processes |
| 1399 | * | 1399 | * |
| 1400 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | 1400 | * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory |
| 1401 | * barrier call. They were added due to the race found within the tcp code. | 1401 | * barrier call. They were added due to the race found within the tcp code. |
| 1402 | * | 1402 | * |
| 1403 | * Consider following tcp code paths: | 1403 | * Consider following tcp code paths: |
| @@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
| 1410 | * ... ... | 1410 | * ... ... |
| 1411 | * tp->rcv_nxt check sock_def_readable | 1411 | * tp->rcv_nxt check sock_def_readable |
| 1412 | * ... { | 1412 | * ... { |
| 1413 | * schedule ... | 1413 | * schedule rcu_read_lock(); |
| 1414 | * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 1414 | * wq = rcu_dereference(sk->sk_wq); |
| 1415 | * wake_up_interruptible(sk_sleep(sk)) | 1415 | * if (wq && waitqueue_active(&wq->wait)) |
| 1416 | * wake_up_interruptible(&wq->wait) | ||
| 1416 | * ... | 1417 | * ... |
| 1417 | * } | 1418 | * } |
| 1418 | * | 1419 | * |
| @@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
| 1421 | * could then endup calling schedule and sleep forever if there are no more | 1422 | * could then endup calling schedule and sleep forever if there are no more |
| 1422 | * data on the socket. | 1423 | * data on the socket. |
| 1423 | * | 1424 | * |
| 1424 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
| 1425 | * can use smp_mb__after_lock barrier. | ||
| 1426 | */ | 1425 | */ |
| 1427 | static inline int sk_has_sleeper(struct sock *sk) | 1426 | static inline bool wq_has_sleeper(struct socket_wq *wq) |
| 1428 | { | 1427 | { |
| 1428 | |||
| 1429 | /* | 1429 | /* |
| 1430 | * We need to be sure we are in sync with the | 1430 | * We need to be sure we are in sync with the |
| 1431 | * add_wait_queue modifications to the wait queue. | 1431 | * add_wait_queue modifications to the wait queue. |
| 1432 | * | 1432 | * |
| 1433 | * This memory barrier is paired in the sock_poll_wait. | 1433 | * This memory barrier is paired in the sock_poll_wait. |
| 1434 | */ | 1434 | */ |
| 1435 | smp_mb__after_lock(); | 1435 | smp_mb(); |
| 1436 | return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); | 1436 | return wq && waitqueue_active(&wq->wait); |
| 1437 | } | 1437 | } |
| 1438 | 1438 | ||
| 1439 | /** | 1439 | /** |
| @@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk) | |||
| 1442 | * @wait_address: socket wait queue | 1442 | * @wait_address: socket wait queue |
| 1443 | * @p: poll_table | 1443 | * @p: poll_table |
| 1444 | * | 1444 | * |
| 1445 | * See the comments in the sk_has_sleeper function. | 1445 | * See the comments in the wq_has_sleeper function. |
| 1446 | */ | 1446 | */ |
| 1447 | static inline void sock_poll_wait(struct file *filp, | 1447 | static inline void sock_poll_wait(struct file *filp, |
| 1448 | wait_queue_head_t *wait_address, poll_table *p) | 1448 | wait_queue_head_t *wait_address, poll_table *p) |
| @@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp, | |||
| 1453 | * We need to be sure we are in sync with the | 1453 | * We need to be sure we are in sync with the |
| 1454 | * socket flags modification. | 1454 | * socket flags modification. |
| 1455 | * | 1455 | * |
| 1456 | * This memory barrier is paired in the sk_has_sleeper. | 1456 | * This memory barrier is paired in the wq_has_sleeper. |
| 1457 | */ | 1457 | */ |
| 1458 | smp_mb(); | 1458 | smp_mb(); |
| 1459 | } | 1459 | } |
