diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /net/socket.c | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/socket.c')
-rw-r--r-- | net/socket.c | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/net/socket.c b/net/socket.c index cb7c1f6c0d6e..dae8c6b84a09 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb) | |||
252 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); | 252 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); |
253 | if (!ei) | 253 | if (!ei) |
254 | return NULL; | 254 | return NULL; |
255 | init_waitqueue_head(&ei->socket.wait); | 255 | ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); |
256 | if (!ei->socket.wq) { | ||
257 | kmem_cache_free(sock_inode_cachep, ei); | ||
258 | return NULL; | ||
259 | } | ||
260 | init_waitqueue_head(&ei->socket.wq->wait); | ||
261 | ei->socket.wq->fasync_list = NULL; | ||
256 | 262 | ||
257 | ei->socket.fasync_list = NULL; | ||
258 | ei->socket.state = SS_UNCONNECTED; | 263 | ei->socket.state = SS_UNCONNECTED; |
259 | ei->socket.flags = 0; | 264 | ei->socket.flags = 0; |
260 | ei->socket.ops = NULL; | 265 | ei->socket.ops = NULL; |
@@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) | |||
264 | return &ei->vfs_inode; | 269 | return &ei->vfs_inode; |
265 | } | 270 | } |
266 | 271 | ||
272 | |||
273 | static void wq_free_rcu(struct rcu_head *head) | ||
274 | { | ||
275 | struct socket_wq *wq = container_of(head, struct socket_wq, rcu); | ||
276 | |||
277 | kfree(wq); | ||
278 | } | ||
279 | |||
267 | static void sock_destroy_inode(struct inode *inode) | 280 | static void sock_destroy_inode(struct inode *inode) |
268 | { | 281 | { |
269 | kmem_cache_free(sock_inode_cachep, | 282 | struct socket_alloc *ei; |
270 | container_of(inode, struct socket_alloc, vfs_inode)); | 283 | |
284 | ei = container_of(inode, struct socket_alloc, vfs_inode); | ||
285 | call_rcu(&ei->socket.wq->rcu, wq_free_rcu); | ||
286 | kmem_cache_free(sock_inode_cachep, ei); | ||
271 | } | 287 | } |
272 | 288 | ||
273 | static void init_once(void *foo) | 289 | static void init_once(void *foo) |
@@ -513,7 +529,7 @@ void sock_release(struct socket *sock) | |||
513 | module_put(owner); | 529 | module_put(owner); |
514 | } | 530 | } |
515 | 531 | ||
516 | if (sock->fasync_list) | 532 | if (sock->wq->fasync_list) |
517 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); | 533 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); |
518 | 534 | ||
519 | percpu_sub(sockets_in_use, 1); | 535 | percpu_sub(sockets_in_use, 1); |
@@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on) | |||
1080 | 1096 | ||
1081 | lock_sock(sk); | 1097 | lock_sock(sk); |
1082 | 1098 | ||
1083 | fasync_helper(fd, filp, on, &sock->fasync_list); | 1099 | fasync_helper(fd, filp, on, &sock->wq->fasync_list); |
1084 | 1100 | ||
1085 | if (!sock->fasync_list) | 1101 | if (!sock->wq->fasync_list) |
1086 | sock_reset_flag(sk, SOCK_FASYNC); | 1102 | sock_reset_flag(sk, SOCK_FASYNC); |
1087 | else | 1103 | else |
1088 | sock_set_flag(sk, SOCK_FASYNC); | 1104 | sock_set_flag(sk, SOCK_FASYNC); |
@@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on) | |||
1091 | return 0; | 1107 | return 0; |
1092 | } | 1108 | } |
1093 | 1109 | ||
1094 | /* This function may be called only under socket lock or callback_lock */ | 1110 | /* This function may be called only under socket lock or callback_lock or rcu_lock */ |
1095 | 1111 | ||
1096 | int sock_wake_async(struct socket *sock, int how, int band) | 1112 | int sock_wake_async(struct socket *sock, int how, int band) |
1097 | { | 1113 | { |
1098 | if (!sock || !sock->fasync_list) | 1114 | struct socket_wq *wq; |
1115 | |||
1116 | if (!sock) | ||
1099 | return -1; | 1117 | return -1; |
1118 | rcu_read_lock(); | ||
1119 | wq = rcu_dereference(sock->wq); | ||
1120 | if (!wq || !wq->fasync_list) { | ||
1121 | rcu_read_unlock(); | ||
1122 | return -1; | ||
1123 | } | ||
1100 | switch (how) { | 1124 | switch (how) { |
1101 | case SOCK_WAKE_WAITD: | 1125 | case SOCK_WAKE_WAITD: |
1102 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) | 1126 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) |
@@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band) | |||
1108 | /* fall through */ | 1132 | /* fall through */ |
1109 | case SOCK_WAKE_IO: | 1133 | case SOCK_WAKE_IO: |
1110 | call_kill: | 1134 | call_kill: |
1111 | kill_fasync(&sock->fasync_list, SIGIO, band); | 1135 | kill_fasync(&wq->fasync_list, SIGIO, band); |
1112 | break; | 1136 | break; |
1113 | case SOCK_WAKE_URG: | 1137 | case SOCK_WAKE_URG: |
1114 | kill_fasync(&sock->fasync_list, SIGURG, band); | 1138 | kill_fasync(&wq->fasync_list, SIGURG, band); |
1115 | } | 1139 | } |
1140 | rcu_read_unlock(); | ||
1116 | return 0; | 1141 | return 0; |
1117 | } | 1142 | } |
1118 | 1143 | ||