diff options
author | Michael S. Tsirkin <mst@redhat.com> | 2010-01-14 01:17:09 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-01-15 04:43:28 -0500 |
commit | 05c2828c72c4eabf62376adfe27bd24797621f62 (patch) | |
tree | 4db94de98ef8e75d5038f6a889dd434f3747c9d3 /drivers/net/tun.c | |
parent | ad72c347e56bf3a0231b9d686e17764157d2961c (diff) |
tun: export underlying socket
Tun device looks similar to a packet socket
in that both pass complete frames from/to userspace.
This patch fills in enough fields in the socket underlying tun driver
to support sendmsg/recvmsg operations, and message flags
MSG_TRUNC and MSG_DONTWAIT, and exports access to this socket
to modules. Regular read/write behaviour is unchanged.
This way, code using raw sockets to inject packets
into a physical device, can support injecting
packets into host network stack almost without modification.
First user of this interface will be vhost virtualization
accelerator.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 101 |
1 files changed, 82 insertions, 19 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2834a01bae24..5adb3d150552 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file) | |||
144 | err = 0; | 144 | err = 0; |
145 | tfile->tun = tun; | 145 | tfile->tun = tun; |
146 | tun->tfile = tfile; | 146 | tun->tfile = tfile; |
147 | tun->socket.file = file; | ||
147 | dev_hold(tun->dev); | 148 | dev_hold(tun->dev); |
148 | sock_hold(tun->socket.sk); | 149 | sock_hold(tun->socket.sk); |
149 | atomic_inc(&tfile->count); | 150 | atomic_inc(&tfile->count); |
@@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun) | |||
158 | /* Detach from net device */ | 159 | /* Detach from net device */ |
159 | netif_tx_lock_bh(tun->dev); | 160 | netif_tx_lock_bh(tun->dev); |
160 | tun->tfile = NULL; | 161 | tun->tfile = NULL; |
162 | tun->socket.file = NULL; | ||
161 | netif_tx_unlock_bh(tun->dev); | 163 | netif_tx_unlock_bh(tun->dev); |
162 | 164 | ||
163 | /* Drop read queue */ | 165 | /* Drop read queue */ |
@@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) | |||
387 | /* Notify and wake up reader process */ | 389 | /* Notify and wake up reader process */ |
388 | if (tun->flags & TUN_FASYNC) | 390 | if (tun->flags & TUN_FASYNC) |
389 | kill_fasync(&tun->fasync, SIGIO, POLL_IN); | 391 | kill_fasync(&tun->fasync, SIGIO, POLL_IN); |
390 | wake_up_interruptible(&tun->socket.wait); | 392 | wake_up_interruptible_poll(&tun->socket.wait, POLLIN | |
393 | POLLRDNORM | POLLRDBAND); | ||
391 | return NETDEV_TX_OK; | 394 | return NETDEV_TX_OK; |
392 | 395 | ||
393 | drop: | 396 | drop: |
@@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, | |||
743 | len = min_t(int, skb->len, len); | 746 | len = min_t(int, skb->len, len); |
744 | 747 | ||
745 | skb_copy_datagram_const_iovec(skb, 0, iv, total, len); | 748 | skb_copy_datagram_const_iovec(skb, 0, iv, total, len); |
746 | total += len; | 749 | total += skb->len; |
747 | 750 | ||
748 | tun->dev->stats.tx_packets++; | 751 | tun->dev->stats.tx_packets++; |
749 | tun->dev->stats.tx_bytes += len; | 752 | tun->dev->stats.tx_bytes += len; |
@@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, | |||
751 | return total; | 754 | return total; |
752 | } | 755 | } |
753 | 756 | ||
754 | static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, | 757 | static ssize_t tun_do_read(struct tun_struct *tun, |
755 | unsigned long count, loff_t pos) | 758 | struct kiocb *iocb, const struct iovec *iv, |
759 | ssize_t len, int noblock) | ||
756 | { | 760 | { |
757 | struct file *file = iocb->ki_filp; | ||
758 | struct tun_file *tfile = file->private_data; | ||
759 | struct tun_struct *tun = __tun_get(tfile); | ||
760 | DECLARE_WAITQUEUE(wait, current); | 761 | DECLARE_WAITQUEUE(wait, current); |
761 | struct sk_buff *skb; | 762 | struct sk_buff *skb; |
762 | ssize_t len, ret = 0; | 763 | ssize_t ret = 0; |
763 | |||
764 | if (!tun) | ||
765 | return -EBADFD; | ||
766 | 764 | ||
767 | DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); | 765 | DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); |
768 | 766 | ||
769 | len = iov_length(iv, count); | ||
770 | if (len < 0) { | ||
771 | ret = -EINVAL; | ||
772 | goto out; | ||
773 | } | ||
774 | |||
775 | add_wait_queue(&tun->socket.wait, &wait); | 767 | add_wait_queue(&tun->socket.wait, &wait); |
776 | while (len) { | 768 | while (len) { |
777 | current->state = TASK_INTERRUPTIBLE; | 769 | current->state = TASK_INTERRUPTIBLE; |
778 | 770 | ||
779 | /* Read frames from the queue */ | 771 | /* Read frames from the queue */ |
780 | if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { | 772 | if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { |
781 | if (file->f_flags & O_NONBLOCK) { | 773 | if (noblock) { |
782 | ret = -EAGAIN; | 774 | ret = -EAGAIN; |
783 | break; | 775 | break; |
784 | } | 776 | } |
@@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, | |||
805 | current->state = TASK_RUNNING; | 797 | current->state = TASK_RUNNING; |
806 | remove_wait_queue(&tun->socket.wait, &wait); | 798 | remove_wait_queue(&tun->socket.wait, &wait); |
807 | 799 | ||
800 | return ret; | ||
801 | } | ||
802 | |||
803 | static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, | ||
804 | unsigned long count, loff_t pos) | ||
805 | { | ||
806 | struct file *file = iocb->ki_filp; | ||
807 | struct tun_file *tfile = file->private_data; | ||
808 | struct tun_struct *tun = __tun_get(tfile); | ||
809 | ssize_t len, ret; | ||
810 | |||
811 | if (!tun) | ||
812 | return -EBADFD; | ||
813 | len = iov_length(iv, count); | ||
814 | if (len < 0) { | ||
815 | ret = -EINVAL; | ||
816 | goto out; | ||
817 | } | ||
818 | |||
819 | ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); | ||
820 | ret = min_t(ssize_t, ret, len); | ||
808 | out: | 821 | out: |
809 | tun_put(tun); | 822 | tun_put(tun); |
810 | return ret; | 823 | return ret; |
@@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk) | |||
847 | return; | 860 | return; |
848 | 861 | ||
849 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 862 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
850 | wake_up_interruptible_sync(sk->sk_sleep); | 863 | wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | |
864 | POLLWRNORM | POLLWRBAND); | ||
851 | 865 | ||
852 | tun = tun_sk(sk)->tun; | 866 | tun = tun_sk(sk)->tun; |
853 | kill_fasync(&tun->fasync, SIGIO, POLL_OUT); | 867 | kill_fasync(&tun->fasync, SIGIO, POLL_OUT); |
@@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk) | |||
858 | free_netdev(tun_sk(sk)->tun->dev); | 872 | free_netdev(tun_sk(sk)->tun->dev); |
859 | } | 873 | } |
860 | 874 | ||
875 | static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, | ||
876 | struct msghdr *m, size_t total_len) | ||
877 | { | ||
878 | struct tun_struct *tun = container_of(sock, struct tun_struct, socket); | ||
879 | return tun_get_user(tun, m->msg_iov, total_len, | ||
880 | m->msg_flags & MSG_DONTWAIT); | ||
881 | } | ||
882 | |||
883 | static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, | ||
884 | struct msghdr *m, size_t total_len, | ||
885 | int flags) | ||
886 | { | ||
887 | struct tun_struct *tun = container_of(sock, struct tun_struct, socket); | ||
888 | int ret; | ||
889 | if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) | ||
890 | return -EINVAL; | ||
891 | ret = tun_do_read(tun, iocb, m->msg_iov, total_len, | ||
892 | flags & MSG_DONTWAIT); | ||
893 | if (ret > total_len) { | ||
894 | m->msg_flags |= MSG_TRUNC; | ||
895 | ret = flags & MSG_TRUNC ? ret : total_len; | ||
896 | } | ||
897 | return ret; | ||
898 | } | ||
899 | |||
900 | /* Ops structure to mimic raw sockets with tun */ | ||
901 | static const struct proto_ops tun_socket_ops = { | ||
902 | .sendmsg = tun_sendmsg, | ||
903 | .recvmsg = tun_recvmsg, | ||
904 | }; | ||
905 | |||
861 | static struct proto tun_proto = { | 906 | static struct proto tun_proto = { |
862 | .name = "tun", | 907 | .name = "tun", |
863 | .owner = THIS_MODULE, | 908 | .owner = THIS_MODULE, |
@@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
986 | goto err_free_dev; | 1031 | goto err_free_dev; |
987 | 1032 | ||
988 | init_waitqueue_head(&tun->socket.wait); | 1033 | init_waitqueue_head(&tun->socket.wait); |
1034 | tun->socket.ops = &tun_socket_ops; | ||
989 | sock_init_data(&tun->socket, sk); | 1035 | sock_init_data(&tun->socket, sk); |
990 | sk->sk_write_space = tun_sock_write_space; | 1036 | sk->sk_write_space = tun_sock_write_space; |
991 | sk->sk_sndbuf = INT_MAX; | 1037 | sk->sk_sndbuf = INT_MAX; |
@@ -1525,6 +1571,23 @@ static void tun_cleanup(void) | |||
1525 | rtnl_link_unregister(&tun_link_ops); | 1571 | rtnl_link_unregister(&tun_link_ops); |
1526 | } | 1572 | } |
1527 | 1573 | ||
1574 | /* Get an underlying socket object from tun file. Returns error unless file is | ||
1575 | * attached to a device. The returned object works like a packet socket, it | ||
1576 | * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for | ||
1577 | * holding a reference to the file for as long as the socket is in use. */ | ||
1578 | struct socket *tun_get_socket(struct file *file) | ||
1579 | { | ||
1580 | struct tun_struct *tun; | ||
1581 | if (file->f_op != &tun_fops) | ||
1582 | return ERR_PTR(-EINVAL); | ||
1583 | tun = tun_get(file); | ||
1584 | if (!tun) | ||
1585 | return ERR_PTR(-EBADFD); | ||
1586 | tun_put(tun); | ||
1587 | return &tun->socket; | ||
1588 | } | ||
1589 | EXPORT_SYMBOL_GPL(tun_get_socket); | ||
1590 | |||
1528 | module_init(tun_init); | 1591 | module_init(tun_init); |
1529 | module_exit(tun_cleanup); | 1592 | module_exit(tun_cleanup); |
1530 | MODULE_DESCRIPTION(DRV_DESCRIPTION); | 1593 | MODULE_DESCRIPTION(DRV_DESCRIPTION); |