diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /include/net/sock.h | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 38 |
1 files changed, 19 insertions, 19 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index e1777db5b9ab..cc7f91ec972c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -159,7 +159,7 @@ struct sock_common { | |||
159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
160 | * @sk_lock: synchronizer | 160 | * @sk_lock: synchronizer |
161 | * @sk_rcvbuf: size of receive buffer in bytes | 161 | * @sk_rcvbuf: size of receive buffer in bytes |
162 | * @sk_sleep: sock wait queue | 162 | * @sk_wq: sock wait queue and async head |
163 | * @sk_dst_cache: destination cache | 163 | * @sk_dst_cache: destination cache |
164 | * @sk_dst_lock: destination cache lock | 164 | * @sk_dst_lock: destination cache lock |
165 | * @sk_policy: flow policy | 165 | * @sk_policy: flow policy |
@@ -257,7 +257,7 @@ struct sock { | |||
257 | struct sk_buff *tail; | 257 | struct sk_buff *tail; |
258 | int len; | 258 | int len; |
259 | } sk_backlog; | 259 | } sk_backlog; |
260 | wait_queue_head_t *sk_sleep; | 260 | struct socket_wq *sk_wq; |
261 | struct dst_entry *sk_dst_cache; | 261 | struct dst_entry *sk_dst_cache; |
262 | #ifdef CONFIG_XFRM | 262 | #ifdef CONFIG_XFRM |
263 | struct xfrm_policy *sk_policy[2]; | 263 | struct xfrm_policy *sk_policy[2]; |
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) | |||
1219 | 1219 | ||
1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) | 1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) |
1221 | { | 1221 | { |
1222 | return sk->sk_sleep; | 1222 | return &sk->sk_wq->wait; |
1223 | } | 1223 | } |
1224 | /* Detach socket from process context. | 1224 | /* Detach socket from process context. |
1225 | * Announce socket dead, detach it from wait queue and inode. | 1225 | * Announce socket dead, detach it from wait queue and inode. |
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk) | |||
1233 | write_lock_bh(&sk->sk_callback_lock); | 1233 | write_lock_bh(&sk->sk_callback_lock); |
1234 | sock_set_flag(sk, SOCK_DEAD); | 1234 | sock_set_flag(sk, SOCK_DEAD); |
1235 | sk_set_socket(sk, NULL); | 1235 | sk_set_socket(sk, NULL); |
1236 | sk->sk_sleep = NULL; | 1236 | sk->sk_wq = NULL; |
1237 | write_unlock_bh(&sk->sk_callback_lock); | 1237 | write_unlock_bh(&sk->sk_callback_lock); |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) | 1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) |
1241 | { | 1241 | { |
1242 | write_lock_bh(&sk->sk_callback_lock); | 1242 | write_lock_bh(&sk->sk_callback_lock); |
1243 | sk->sk_sleep = &parent->wait; | 1243 | rcu_assign_pointer(sk->sk_wq, parent->wq); |
1244 | parent->sk = sk; | 1244 | parent->sk = sk; |
1245 | sk_set_socket(sk, parent); | 1245 | sk_set_socket(sk, parent); |
1246 | security_sock_graft(sk, parent); | 1246 | security_sock_graft(sk, parent); |
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | /** | 1394 | /** |
1395 | * sk_has_sleeper - check if there are any waiting processes | 1395 | * wq_has_sleeper - check if there are any waiting processes |
1396 | * @sk: socket | 1396 | * @sk: struct socket_wq |
1397 | * | 1397 | * |
1398 | * Returns true if socket has waiting processes | 1398 | * Returns true if socket_wq has waiting processes |
1399 | * | 1399 | * |
1400 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | 1400 | * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory |
1401 | * barrier call. They were added due to the race found within the tcp code. | 1401 | * barrier call. They were added due to the race found within the tcp code. |
1402 | * | 1402 | * |
1403 | * Consider following tcp code paths: | 1403 | * Consider following tcp code paths: |
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1410 | * ... ... | 1410 | * ... ... |
1411 | * tp->rcv_nxt check sock_def_readable | 1411 | * tp->rcv_nxt check sock_def_readable |
1412 | * ... { | 1412 | * ... { |
1413 | * schedule ... | 1413 | * schedule rcu_read_lock(); |
1414 | * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 1414 | * wq = rcu_dereference(sk->sk_wq); |
1415 | * wake_up_interruptible(sk_sleep(sk)) | 1415 | * if (wq && waitqueue_active(&wq->wait)) |
1416 | * wake_up_interruptible(&wq->wait) | ||
1416 | * ... | 1417 | * ... |
1417 | * } | 1418 | * } |
1418 | * | 1419 | * |
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1421 | * could then endup calling schedule and sleep forever if there are no more | 1422 | * could then endup calling schedule and sleep forever if there are no more |
1422 | * data on the socket. | 1423 | * data on the socket. |
1423 | * | 1424 | * |
1424 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
1425 | * can use smp_mb__after_lock barrier. | ||
1426 | */ | 1425 | */ |
1427 | static inline int sk_has_sleeper(struct sock *sk) | 1426 | static inline bool wq_has_sleeper(struct socket_wq *wq) |
1428 | { | 1427 | { |
1428 | |||
1429 | /* | 1429 | /* |
1430 | * We need to be sure we are in sync with the | 1430 | * We need to be sure we are in sync with the |
1431 | * add_wait_queue modifications to the wait queue. | 1431 | * add_wait_queue modifications to the wait queue. |
1432 | * | 1432 | * |
1433 | * This memory barrier is paired in the sock_poll_wait. | 1433 | * This memory barrier is paired in the sock_poll_wait. |
1434 | */ | 1434 | */ |
1435 | smp_mb__after_lock(); | 1435 | smp_mb(); |
1436 | return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); | 1436 | return wq && waitqueue_active(&wq->wait); |
1437 | } | 1437 | } |
1438 | 1438 | ||
1439 | /** | 1439 | /** |
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk) | |||
1442 | * @wait_address: socket wait queue | 1442 | * @wait_address: socket wait queue |
1443 | * @p: poll_table | 1443 | * @p: poll_table |
1444 | * | 1444 | * |
1445 | * See the comments in the sk_has_sleeper function. | 1445 | * See the comments in the wq_has_sleeper function. |
1446 | */ | 1446 | */ |
1447 | static inline void sock_poll_wait(struct file *filp, | 1447 | static inline void sock_poll_wait(struct file *filp, |
1448 | wait_queue_head_t *wait_address, poll_table *p) | 1448 | wait_queue_head_t *wait_address, poll_table *p) |
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp, | |||
1453 | * We need to be sure we are in sync with the | 1453 | * We need to be sure we are in sync with the |
1454 | * socket flags modification. | 1454 | * socket flags modification. |
1455 | * | 1455 | * |
1456 | * This memory barrier is paired in the sk_has_sleeper. | 1456 | * This memory barrier is paired in the wq_has_sleeper. |
1457 | */ | 1457 | */ |
1458 | smp_mb(); | 1458 | smp_mb(); |
1459 | } | 1459 | } |