aboutsummaryrefslogtreecommitdiffstats
path: root/net/socket.c
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-29 07:01:49 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-01 18:00:15 -0400
commit43815482370c510c569fd18edb57afcb0fa8cab6 (patch)
tree063efaae3758402b84f056438b704d1de68f7837 /net/socket.c
parent83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff)
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/socket.c')
-rw-r--r--net/socket.c47
1 files changed, 36 insertions, 11 deletions
diff --git a/net/socket.c b/net/socket.c
index cb7c1f6c0d6e..dae8c6b84a09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
253 if (!ei) 253 if (!ei)
254 return NULL; 254 return NULL;
255 init_waitqueue_head(&ei->socket.wait); 255 ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
256 if (!ei->socket.wq) {
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
260 init_waitqueue_head(&ei->socket.wq->wait);
261 ei->socket.wq->fasync_list = NULL;
256 262
257 ei->socket.fasync_list = NULL;
258 ei->socket.state = SS_UNCONNECTED; 263 ei->socket.state = SS_UNCONNECTED;
259 ei->socket.flags = 0; 264 ei->socket.flags = 0;
260 ei->socket.ops = NULL; 265 ei->socket.ops = NULL;
@@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
264 return &ei->vfs_inode; 269 return &ei->vfs_inode;
265} 270}
266 271
272
273static void wq_free_rcu(struct rcu_head *head)
274{
275 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
276
277 kfree(wq);
278}
279
267static void sock_destroy_inode(struct inode *inode) 280static void sock_destroy_inode(struct inode *inode)
268{ 281{
269 kmem_cache_free(sock_inode_cachep, 282 struct socket_alloc *ei;
270 container_of(inode, struct socket_alloc, vfs_inode)); 283
284 ei = container_of(inode, struct socket_alloc, vfs_inode);
285 call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
286 kmem_cache_free(sock_inode_cachep, ei);
271} 287}
272 288
273static void init_once(void *foo) 289static void init_once(void *foo)
@@ -513,7 +529,7 @@ void sock_release(struct socket *sock)
513 module_put(owner); 529 module_put(owner);
514 } 530 }
515 531
516 if (sock->fasync_list) 532 if (sock->wq->fasync_list)
517 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 533 printk(KERN_ERR "sock_release: fasync list not empty!\n");
518 534
519 percpu_sub(sockets_in_use, 1); 535 percpu_sub(sockets_in_use, 1);
@@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on)
1080 1096
1081 lock_sock(sk); 1097 lock_sock(sk);
1082 1098
1083 fasync_helper(fd, filp, on, &sock->fasync_list); 1099 fasync_helper(fd, filp, on, &sock->wq->fasync_list);
1084 1100
1085 if (!sock->fasync_list) 1101 if (!sock->wq->fasync_list)
1086 sock_reset_flag(sk, SOCK_FASYNC); 1102 sock_reset_flag(sk, SOCK_FASYNC);
1087 else 1103 else
1088 sock_set_flag(sk, SOCK_FASYNC); 1104 sock_set_flag(sk, SOCK_FASYNC);
@@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
1091 return 0; 1107 return 0;
1092} 1108}
1093 1109
1094/* This function may be called only under socket lock or callback_lock */ 1110/* This function may be called only under socket lock or callback_lock or rcu_lock */
1095 1111
1096int sock_wake_async(struct socket *sock, int how, int band) 1112int sock_wake_async(struct socket *sock, int how, int band)
1097{ 1113{
1098 if (!sock || !sock->fasync_list) 1114 struct socket_wq *wq;
1115
1116 if (!sock)
1099 return -1; 1117 return -1;
1118 rcu_read_lock();
1119 wq = rcu_dereference(sock->wq);
1120 if (!wq || !wq->fasync_list) {
1121 rcu_read_unlock();
1122 return -1;
1123 }
1100 switch (how) { 1124 switch (how) {
1101 case SOCK_WAKE_WAITD: 1125 case SOCK_WAKE_WAITD:
1102 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1126 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
@@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band)
1108 /* fall through */ 1132 /* fall through */
1109 case SOCK_WAKE_IO: 1133 case SOCK_WAKE_IO:
1110call_kill: 1134call_kill:
1111 kill_fasync(&sock->fasync_list, SIGIO, band); 1135 kill_fasync(&wq->fasync_list, SIGIO, band);
1112 break; 1136 break;
1113 case SOCK_WAKE_URG: 1137 case SOCK_WAKE_URG:
1114 kill_fasync(&sock->fasync_list, SIGURG, band); 1138 kill_fasync(&wq->fasync_list, SIGURG, band);
1115 } 1139 }
1140 rcu_read_unlock();
1116 return 0; 1141 return 0;
1117} 1142}
1118 1143