diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /drivers/net | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/macvtap.c | 13 | ||||
-rw-r--r-- | drivers/net/tun.c | 21 |
2 files changed, 21 insertions, 13 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d97e1fd234ba..1c4110df343e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -37,6 +37,7 @@ | |||
37 | struct macvtap_queue { | 37 | struct macvtap_queue { |
38 | struct sock sk; | 38 | struct sock sk; |
39 | struct socket sock; | 39 | struct socket sock; |
40 | struct socket_wq wq; | ||
40 | struct macvlan_dev *vlan; | 41 | struct macvlan_dev *vlan; |
41 | struct file *file; | 42 | struct file *file; |
42 | unsigned int flags; | 43 | unsigned int flags; |
@@ -242,12 +243,15 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { | |||
242 | 243 | ||
243 | static void macvtap_sock_write_space(struct sock *sk) | 244 | static void macvtap_sock_write_space(struct sock *sk) |
244 | { | 245 | { |
246 | wait_queue_head_t *wqueue; | ||
247 | |||
245 | if (!sock_writeable(sk) || | 248 | if (!sock_writeable(sk) || |
246 | !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) | 249 | !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) |
247 | return; | 250 | return; |
248 | 251 | ||
249 | if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 252 | wqueue = sk_sleep(sk); |
250 | wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); | 253 | if (wqueue && waitqueue_active(wqueue)) |
254 | wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); | ||
251 | } | 255 | } |
252 | 256 | ||
253 | static int macvtap_open(struct inode *inode, struct file *file) | 257 | static int macvtap_open(struct inode *inode, struct file *file) |
@@ -272,7 +276,8 @@ static int macvtap_open(struct inode *inode, struct file *file) | |||
272 | if (!q) | 276 | if (!q) |
273 | goto out; | 277 | goto out; |
274 | 278 | ||
275 | init_waitqueue_head(&q->sock.wait); | 279 | q->sock.wq = &q->wq; |
280 | init_waitqueue_head(&q->wq.wait); | ||
276 | q->sock.type = SOCK_RAW; | 281 | q->sock.type = SOCK_RAW; |
277 | q->sock.state = SS_CONNECTED; | 282 | q->sock.state = SS_CONNECTED; |
278 | q->sock.file = file; | 283 | q->sock.file = file; |
@@ -308,7 +313,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) | |||
308 | goto out; | 313 | goto out; |
309 | 314 | ||
310 | mask = 0; | 315 | mask = 0; |
311 | poll_wait(file, &q->sock.wait, wait); | 316 | poll_wait(file, &q->wq.wait, wait); |
312 | 317 | ||
313 | if (!skb_queue_empty(&q->sk.sk_receive_queue)) | 318 | if (!skb_queue_empty(&q->sk.sk_receive_queue)) |
314 | mask |= POLLIN | POLLRDNORM; | 319 | mask |= POLLIN | POLLRDNORM; |
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 20a17938c62b..e525a6cf5587 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -109,7 +109,7 @@ struct tun_struct { | |||
109 | 109 | ||
110 | struct tap_filter txflt; | 110 | struct tap_filter txflt; |
111 | struct socket socket; | 111 | struct socket socket; |
112 | 112 | struct socket_wq wq; | |
113 | #ifdef TUN_DEBUG | 113 | #ifdef TUN_DEBUG |
114 | int debug; | 114 | int debug; |
115 | #endif | 115 | #endif |
@@ -323,7 +323,7 @@ static void tun_net_uninit(struct net_device *dev) | |||
323 | /* Inform the methods they need to stop using the dev. | 323 | /* Inform the methods they need to stop using the dev. |
324 | */ | 324 | */ |
325 | if (tfile) { | 325 | if (tfile) { |
326 | wake_up_all(&tun->socket.wait); | 326 | wake_up_all(&tun->wq.wait); |
327 | if (atomic_dec_and_test(&tfile->count)) | 327 | if (atomic_dec_and_test(&tfile->count)) |
328 | __tun_detach(tun); | 328 | __tun_detach(tun); |
329 | } | 329 | } |
@@ -398,7 +398,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) | |||
398 | /* Notify and wake up reader process */ | 398 | /* Notify and wake up reader process */ |
399 | if (tun->flags & TUN_FASYNC) | 399 | if (tun->flags & TUN_FASYNC) |
400 | kill_fasync(&tun->fasync, SIGIO, POLL_IN); | 400 | kill_fasync(&tun->fasync, SIGIO, POLL_IN); |
401 | wake_up_interruptible_poll(&tun->socket.wait, POLLIN | | 401 | wake_up_interruptible_poll(&tun->wq.wait, POLLIN | |
402 | POLLRDNORM | POLLRDBAND); | 402 | POLLRDNORM | POLLRDBAND); |
403 | return NETDEV_TX_OK; | 403 | return NETDEV_TX_OK; |
404 | 404 | ||
@@ -498,7 +498,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) | |||
498 | 498 | ||
499 | DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); | 499 | DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); |
500 | 500 | ||
501 | poll_wait(file, &tun->socket.wait, wait); | 501 | poll_wait(file, &tun->wq.wait, wait); |
502 | 502 | ||
503 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 503 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
504 | mask |= POLLIN | POLLRDNORM; | 504 | mask |= POLLIN | POLLRDNORM; |
@@ -773,7 +773,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, | |||
773 | 773 | ||
774 | DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); | 774 | DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); |
775 | 775 | ||
776 | add_wait_queue(&tun->socket.wait, &wait); | 776 | add_wait_queue(&tun->wq.wait, &wait); |
777 | while (len) { | 777 | while (len) { |
778 | current->state = TASK_INTERRUPTIBLE; | 778 | current->state = TASK_INTERRUPTIBLE; |
779 | 779 | ||
@@ -804,7 +804,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, | |||
804 | } | 804 | } |
805 | 805 | ||
806 | current->state = TASK_RUNNING; | 806 | current->state = TASK_RUNNING; |
807 | remove_wait_queue(&tun->socket.wait, &wait); | 807 | remove_wait_queue(&tun->wq.wait, &wait); |
808 | 808 | ||
809 | return ret; | 809 | return ret; |
810 | } | 810 | } |
@@ -861,6 +861,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { | |||
861 | static void tun_sock_write_space(struct sock *sk) | 861 | static void tun_sock_write_space(struct sock *sk) |
862 | { | 862 | { |
863 | struct tun_struct *tun; | 863 | struct tun_struct *tun; |
864 | wait_queue_head_t *wqueue; | ||
864 | 865 | ||
865 | if (!sock_writeable(sk)) | 866 | if (!sock_writeable(sk)) |
866 | return; | 867 | return; |
@@ -868,8 +869,9 @@ static void tun_sock_write_space(struct sock *sk) | |||
868 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) | 869 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) |
869 | return; | 870 | return; |
870 | 871 | ||
871 | if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 872 | wqueue = sk_sleep(sk); |
872 | wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | | 873 | if (wqueue && waitqueue_active(wqueue)) |
874 | wake_up_interruptible_sync_poll(wqueue, POLLOUT | | ||
873 | POLLWRNORM | POLLWRBAND); | 875 | POLLWRNORM | POLLWRBAND); |
874 | 876 | ||
875 | tun = tun_sk(sk)->tun; | 877 | tun = tun_sk(sk)->tun; |
@@ -1039,7 +1041,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1039 | if (!sk) | 1041 | if (!sk) |
1040 | goto err_free_dev; | 1042 | goto err_free_dev; |
1041 | 1043 | ||
1042 | init_waitqueue_head(&tun->socket.wait); | 1044 | tun->socket.wq = &tun->wq; |
1045 | init_waitqueue_head(&tun->wq.wait); | ||
1043 | tun->socket.ops = &tun_socket_ops; | 1046 | tun->socket.ops = &tun_socket_ops; |
1044 | sock_init_data(&tun->socket, sk); | 1047 | sock_init_data(&tun->socket, sk); |
1045 | sk->sk_write_space = tun_sock_write_space; | 1048 | sk->sk_write_space = tun_sock_write_space; |