diff options
author | Jason Wang <jasowang@redhat.com> | 2012-10-31 15:45:57 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-11-01 11:14:07 -0400 |
commit | 54f968d6efdbf7dec36faa44fc11f01b0e4d1990 (patch) | |
tree | c6cf1f86265af7cb215c0a1e0d3662d30992a578 /drivers/net/tun.c | |
parent | 1e5883382c3287a61aeebc33a301fd50f3a1005b (diff) |
tuntap: move socket to tun_file
Current tuntap makes use of the socket receive queue as its tx queue. To
implement multiple tx queues for tuntap and enable the ability of adding and
removing queues during workload, the first step is to move the socket related
structures to tun_file. Then we could let multiple fds/sockets to be attached to
the tuntap.
This patch removes tun_sock and moves socket related structures from tun_sock or
tun_struct to tun_file. Two exceptions are tap_filter and sock_fprog, they are
still kept in tun_structure since they are used to filter packets for the net
device instead of per transmit queue (at least I see no requirements for
them). After those changes, socket were created and destroyed during file open
and close (instead of device creation and destroy), the socket structures could
be dereferenced from tun_file instead of the file of tun_struct structure
itself.
For persisent device, since we purge during datching and wouldn't queue any
packets when no interface were attached, there's no behaviod changes before and
after this patch, so the changes were transparent to the userspace. To keep the
attributes such as sndbuf, socket filter and vnet header, those would be
re-initialize after a new interface were attached to an persist device.
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 266 |
1 files changed, 145 insertions, 121 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f830b1be4c57..d52ad2438e26 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -109,14 +109,29 @@ struct tap_filter { | |||
109 | unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; | 109 | unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; |
110 | }; | 110 | }; |
111 | 111 | ||
112 | /* A tun_file connects an open character device to a tuntap netdevice. It | ||
113 | * also contains all socket related strctures (except sock_fprog and tap_filter) | ||
114 | * to serve as one transmit queue for tuntap device. The sock_fprog and | ||
115 | * tap_filter were kept in tun_struct since they were used for filtering for the | ||
116 | * netdevice not for a specific queue (at least I didn't see the reqirement for | ||
117 | * this). | ||
118 | */ | ||
112 | struct tun_file { | 119 | struct tun_file { |
120 | struct sock sk; | ||
121 | struct socket socket; | ||
122 | struct socket_wq wq; | ||
113 | atomic_t count; | 123 | atomic_t count; |
114 | struct tun_struct *tun; | 124 | struct tun_struct *tun; |
115 | struct net *net; | 125 | struct net *net; |
126 | struct fasync_struct *fasync; | ||
127 | /* only used for fasnyc */ | ||
128 | unsigned int flags; | ||
116 | }; | 129 | }; |
117 | 130 | ||
118 | struct tun_sock; | 131 | /* Since the socket were moved to tun_file, to preserve the behavior of persist |
119 | 132 | * device, socket fileter, sndbuf and vnet header size were restore when the | |
133 | * file were attached to a persist device. | ||
134 | */ | ||
120 | struct tun_struct { | 135 | struct tun_struct { |
121 | struct tun_file *tfile; | 136 | struct tun_file *tfile; |
122 | unsigned int flags; | 137 | unsigned int flags; |
@@ -127,29 +142,18 @@ struct tun_struct { | |||
127 | netdev_features_t set_features; | 142 | netdev_features_t set_features; |
128 | #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ | 143 | #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ |
129 | NETIF_F_TSO6|NETIF_F_UFO) | 144 | NETIF_F_TSO6|NETIF_F_UFO) |
130 | struct fasync_struct *fasync; | ||
131 | |||
132 | struct tap_filter txflt; | ||
133 | struct socket socket; | ||
134 | struct socket_wq wq; | ||
135 | 145 | ||
136 | int vnet_hdr_sz; | 146 | int vnet_hdr_sz; |
137 | 147 | int sndbuf; | |
148 | struct tap_filter txflt; | ||
149 | struct sock_fprog fprog; | ||
150 | /* protected by rtnl lock */ | ||
151 | bool filter_attached; | ||
138 | #ifdef TUN_DEBUG | 152 | #ifdef TUN_DEBUG |
139 | int debug; | 153 | int debug; |
140 | #endif | 154 | #endif |
141 | }; | 155 | }; |
142 | 156 | ||
143 | struct tun_sock { | ||
144 | struct sock sk; | ||
145 | struct tun_struct *tun; | ||
146 | }; | ||
147 | |||
148 | static inline struct tun_sock *tun_sk(struct sock *sk) | ||
149 | { | ||
150 | return container_of(sk, struct tun_sock, sk); | ||
151 | } | ||
152 | |||
153 | static int tun_attach(struct tun_struct *tun, struct file *file) | 157 | static int tun_attach(struct tun_struct *tun, struct file *file) |
154 | { | 158 | { |
155 | struct tun_file *tfile = file->private_data; | 159 | struct tun_file *tfile = file->private_data; |
@@ -168,12 +172,19 @@ static int tun_attach(struct tun_struct *tun, struct file *file) | |||
168 | goto out; | 172 | goto out; |
169 | 173 | ||
170 | err = 0; | 174 | err = 0; |
175 | |||
176 | /* Re-attach filter when attaching to a persist device */ | ||
177 | if (tun->filter_attached == true) { | ||
178 | err = sk_attach_filter(&tun->fprog, tfile->socket.sk); | ||
179 | if (!err) | ||
180 | goto out; | ||
181 | } | ||
171 | tfile->tun = tun; | 182 | tfile->tun = tun; |
183 | tfile->socket.sk->sk_sndbuf = tun->sndbuf; | ||
172 | tun->tfile = tfile; | 184 | tun->tfile = tfile; |
173 | tun->socket.file = file; | ||
174 | netif_carrier_on(tun->dev); | 185 | netif_carrier_on(tun->dev); |
175 | dev_hold(tun->dev); | 186 | dev_hold(tun->dev); |
176 | sock_hold(tun->socket.sk); | 187 | sock_hold(&tfile->sk); |
177 | atomic_inc(&tfile->count); | 188 | atomic_inc(&tfile->count); |
178 | 189 | ||
179 | out: | 190 | out: |
@@ -183,14 +194,16 @@ out: | |||
183 | 194 | ||
184 | static void __tun_detach(struct tun_struct *tun) | 195 | static void __tun_detach(struct tun_struct *tun) |
185 | { | 196 | { |
197 | struct tun_file *tfile = tun->tfile; | ||
186 | /* Detach from net device */ | 198 | /* Detach from net device */ |
187 | netif_tx_lock_bh(tun->dev); | 199 | netif_tx_lock_bh(tun->dev); |
188 | netif_carrier_off(tun->dev); | 200 | netif_carrier_off(tun->dev); |
189 | tun->tfile = NULL; | 201 | tun->tfile = NULL; |
202 | tfile->tun = NULL; | ||
190 | netif_tx_unlock_bh(tun->dev); | 203 | netif_tx_unlock_bh(tun->dev); |
191 | 204 | ||
192 | /* Drop read queue */ | 205 | /* Drop read queue */ |
193 | skb_queue_purge(&tun->socket.sk->sk_receive_queue); | 206 | skb_queue_purge(&tfile->socket.sk->sk_receive_queue); |
194 | 207 | ||
195 | /* Drop the extra count on the net device */ | 208 | /* Drop the extra count on the net device */ |
196 | dev_put(tun->dev); | 209 | dev_put(tun->dev); |
@@ -349,21 +362,12 @@ static void tun_net_uninit(struct net_device *dev) | |||
349 | /* Inform the methods they need to stop using the dev. | 362 | /* Inform the methods they need to stop using the dev. |
350 | */ | 363 | */ |
351 | if (tfile) { | 364 | if (tfile) { |
352 | wake_up_all(&tun->wq.wait); | 365 | wake_up_all(&tfile->wq.wait); |
353 | if (atomic_dec_and_test(&tfile->count)) | 366 | if (atomic_dec_and_test(&tfile->count)) |
354 | __tun_detach(tun); | 367 | __tun_detach(tun); |
355 | } | 368 | } |
356 | } | 369 | } |
357 | 370 | ||
358 | static void tun_free_netdev(struct net_device *dev) | ||
359 | { | ||
360 | struct tun_struct *tun = netdev_priv(dev); | ||
361 | |||
362 | BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags)); | ||
363 | |||
364 | sk_release_kernel(tun->socket.sk); | ||
365 | } | ||
366 | |||
367 | /* Net device open. */ | 371 | /* Net device open. */ |
368 | static int tun_net_open(struct net_device *dev) | 372 | static int tun_net_open(struct net_device *dev) |
369 | { | 373 | { |
@@ -382,11 +386,12 @@ static int tun_net_close(struct net_device *dev) | |||
382 | static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) | 386 | static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) |
383 | { | 387 | { |
384 | struct tun_struct *tun = netdev_priv(dev); | 388 | struct tun_struct *tun = netdev_priv(dev); |
389 | struct tun_file *tfile = tun->tfile; | ||
385 | 390 | ||
386 | tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); | 391 | tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); |
387 | 392 | ||
388 | /* Drop packet if interface is not attached */ | 393 | /* Drop packet if interface is not attached */ |
389 | if (!tun->tfile) | 394 | if (!tfile) |
390 | goto drop; | 395 | goto drop; |
391 | 396 | ||
392 | /* Drop if the filter does not like it. | 397 | /* Drop if the filter does not like it. |
@@ -395,11 +400,12 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) | |||
395 | if (!check_filter(&tun->txflt, skb)) | 400 | if (!check_filter(&tun->txflt, skb)) |
396 | goto drop; | 401 | goto drop; |
397 | 402 | ||
398 | if (tun->socket.sk->sk_filter && | 403 | if (tfile->socket.sk->sk_filter && |
399 | sk_filter(tun->socket.sk, skb)) | 404 | sk_filter(tfile->socket.sk, skb)) |
400 | goto drop; | 405 | goto drop; |
401 | 406 | ||
402 | if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) { | 407 | if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) |
408 | >= dev->tx_queue_len) { | ||
403 | if (!(tun->flags & TUN_ONE_QUEUE)) { | 409 | if (!(tun->flags & TUN_ONE_QUEUE)) { |
404 | /* Normal queueing mode. */ | 410 | /* Normal queueing mode. */ |
405 | /* Packet scheduler handles dropping of further packets. */ | 411 | /* Packet scheduler handles dropping of further packets. */ |
@@ -422,12 +428,12 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) | |||
422 | skb_orphan(skb); | 428 | skb_orphan(skb); |
423 | 429 | ||
424 | /* Enqueue packet */ | 430 | /* Enqueue packet */ |
425 | skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb); | 431 | skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); |
426 | 432 | ||
427 | /* Notify and wake up reader process */ | 433 | /* Notify and wake up reader process */ |
428 | if (tun->flags & TUN_FASYNC) | 434 | if (tfile->flags & TUN_FASYNC) |
429 | kill_fasync(&tun->fasync, SIGIO, POLL_IN); | 435 | kill_fasync(&tfile->fasync, SIGIO, POLL_IN); |
430 | wake_up_interruptible_poll(&tun->wq.wait, POLLIN | | 436 | wake_up_interruptible_poll(&tfile->wq.wait, POLLIN | |
431 | POLLRDNORM | POLLRDBAND); | 437 | POLLRDNORM | POLLRDBAND); |
432 | return NETDEV_TX_OK; | 438 | return NETDEV_TX_OK; |
433 | 439 | ||
@@ -555,11 +561,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) | |||
555 | if (!tun) | 561 | if (!tun) |
556 | return POLLERR; | 562 | return POLLERR; |
557 | 563 | ||
558 | sk = tun->socket.sk; | 564 | sk = tfile->socket.sk; |
559 | 565 | ||
560 | tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); | 566 | tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); |
561 | 567 | ||
562 | poll_wait(file, &tun->wq.wait, wait); | 568 | poll_wait(file, &tfile->wq.wait, wait); |
563 | 569 | ||
564 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 570 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
565 | mask |= POLLIN | POLLRDNORM; | 571 | mask |= POLLIN | POLLRDNORM; |
@@ -578,11 +584,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) | |||
578 | 584 | ||
579 | /* prepad is the amount to reserve at front. len is length after that. | 585 | /* prepad is the amount to reserve at front. len is length after that. |
580 | * linear is a hint as to how much to copy (usually headers). */ | 586 | * linear is a hint as to how much to copy (usually headers). */ |
581 | static struct sk_buff *tun_alloc_skb(struct tun_struct *tun, | 587 | static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, |
582 | size_t prepad, size_t len, | 588 | size_t prepad, size_t len, |
583 | size_t linear, int noblock) | 589 | size_t linear, int noblock) |
584 | { | 590 | { |
585 | struct sock *sk = tun->socket.sk; | 591 | struct sock *sk = tfile->socket.sk; |
586 | struct sk_buff *skb; | 592 | struct sk_buff *skb; |
587 | int err; | 593 | int err; |
588 | 594 | ||
@@ -682,9 +688,9 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, | |||
682 | } | 688 | } |
683 | 689 | ||
684 | /* Get packet from user space buffer */ | 690 | /* Get packet from user space buffer */ |
685 | static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, | 691 | static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, |
686 | const struct iovec *iv, size_t total_len, | 692 | void *msg_control, const struct iovec *iv, |
687 | size_t count, int noblock) | 693 | size_t total_len, size_t count, int noblock) |
688 | { | 694 | { |
689 | struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; | 695 | struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; |
690 | struct sk_buff *skb; | 696 | struct sk_buff *skb; |
@@ -754,7 +760,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, | |||
754 | } else | 760 | } else |
755 | copylen = len; | 761 | copylen = len; |
756 | 762 | ||
757 | skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock); | 763 | skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); |
758 | if (IS_ERR(skb)) { | 764 | if (IS_ERR(skb)) { |
759 | if (PTR_ERR(skb) != -EAGAIN) | 765 | if (PTR_ERR(skb) != -EAGAIN) |
760 | tun->dev->stats.rx_dropped++; | 766 | tun->dev->stats.rx_dropped++; |
@@ -859,6 +865,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, | |||
859 | { | 865 | { |
860 | struct file *file = iocb->ki_filp; | 866 | struct file *file = iocb->ki_filp; |
861 | struct tun_struct *tun = tun_get(file); | 867 | struct tun_struct *tun = tun_get(file); |
868 | struct tun_file *tfile = file->private_data; | ||
862 | ssize_t result; | 869 | ssize_t result; |
863 | 870 | ||
864 | if (!tun) | 871 | if (!tun) |
@@ -866,8 +873,8 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, | |||
866 | 873 | ||
867 | tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); | 874 | tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); |
868 | 875 | ||
869 | result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count, | 876 | result = tun_get_user(tun, tfile, NULL, iv, iov_length(iv, count), |
870 | file->f_flags & O_NONBLOCK); | 877 | count, file->f_flags & O_NONBLOCK); |
871 | 878 | ||
872 | tun_put(tun); | 879 | tun_put(tun); |
873 | return result; | 880 | return result; |
@@ -875,6 +882,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, | |||
875 | 882 | ||
876 | /* Put packet to the user space buffer */ | 883 | /* Put packet to the user space buffer */ |
877 | static ssize_t tun_put_user(struct tun_struct *tun, | 884 | static ssize_t tun_put_user(struct tun_struct *tun, |
885 | struct tun_file *tfile, | ||
878 | struct sk_buff *skb, | 886 | struct sk_buff *skb, |
879 | const struct iovec *iv, int len) | 887 | const struct iovec *iv, int len) |
880 | { | 888 | { |
@@ -954,7 +962,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, | |||
954 | return total; | 962 | return total; |
955 | } | 963 | } |
956 | 964 | ||
957 | static ssize_t tun_do_read(struct tun_struct *tun, | 965 | static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, |
958 | struct kiocb *iocb, const struct iovec *iv, | 966 | struct kiocb *iocb, const struct iovec *iv, |
959 | ssize_t len, int noblock) | 967 | ssize_t len, int noblock) |
960 | { | 968 | { |
@@ -965,12 +973,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, | |||
965 | tun_debug(KERN_INFO, tun, "tun_chr_read\n"); | 973 | tun_debug(KERN_INFO, tun, "tun_chr_read\n"); |
966 | 974 | ||
967 | if (unlikely(!noblock)) | 975 | if (unlikely(!noblock)) |
968 | add_wait_queue(&tun->wq.wait, &wait); | 976 | add_wait_queue(&tfile->wq.wait, &wait); |
969 | while (len) { | 977 | while (len) { |
970 | current->state = TASK_INTERRUPTIBLE; | 978 | current->state = TASK_INTERRUPTIBLE; |
971 | 979 | ||
972 | /* Read frames from the queue */ | 980 | /* Read frames from the queue */ |
973 | if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { | 981 | if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) { |
974 | if (noblock) { | 982 | if (noblock) { |
975 | ret = -EAGAIN; | 983 | ret = -EAGAIN; |
976 | break; | 984 | break; |
@@ -990,14 +998,14 @@ static ssize_t tun_do_read(struct tun_struct *tun, | |||
990 | } | 998 | } |
991 | netif_wake_queue(tun->dev); | 999 | netif_wake_queue(tun->dev); |
992 | 1000 | ||
993 | ret = tun_put_user(tun, skb, iv, len); | 1001 | ret = tun_put_user(tun, tfile, skb, iv, len); |
994 | kfree_skb(skb); | 1002 | kfree_skb(skb); |
995 | break; | 1003 | break; |
996 | } | 1004 | } |
997 | 1005 | ||
998 | current->state = TASK_RUNNING; | 1006 | current->state = TASK_RUNNING; |
999 | if (unlikely(!noblock)) | 1007 | if (unlikely(!noblock)) |
1000 | remove_wait_queue(&tun->wq.wait, &wait); | 1008 | remove_wait_queue(&tfile->wq.wait, &wait); |
1001 | 1009 | ||
1002 | return ret; | 1010 | return ret; |
1003 | } | 1011 | } |
@@ -1018,7 +1026,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, | |||
1018 | goto out; | 1026 | goto out; |
1019 | } | 1027 | } |
1020 | 1028 | ||
1021 | ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); | 1029 | ret = tun_do_read(tun, tfile, iocb, iv, len, |
1030 | file->f_flags & O_NONBLOCK); | ||
1022 | ret = min_t(ssize_t, ret, len); | 1031 | ret = min_t(ssize_t, ret, len); |
1023 | out: | 1032 | out: |
1024 | tun_put(tun); | 1033 | tun_put(tun); |
@@ -1033,7 +1042,7 @@ static void tun_setup(struct net_device *dev) | |||
1033 | tun->group = INVALID_GID; | 1042 | tun->group = INVALID_GID; |
1034 | 1043 | ||
1035 | dev->ethtool_ops = &tun_ethtool_ops; | 1044 | dev->ethtool_ops = &tun_ethtool_ops; |
1036 | dev->destructor = tun_free_netdev; | 1045 | dev->destructor = free_netdev; |
1037 | } | 1046 | } |
1038 | 1047 | ||
1039 | /* Trivial set of netlink ops to allow deleting tun or tap | 1048 | /* Trivial set of netlink ops to allow deleting tun or tap |
@@ -1053,7 +1062,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { | |||
1053 | 1062 | ||
1054 | static void tun_sock_write_space(struct sock *sk) | 1063 | static void tun_sock_write_space(struct sock *sk) |
1055 | { | 1064 | { |
1056 | struct tun_struct *tun; | 1065 | struct tun_file *tfile; |
1057 | wait_queue_head_t *wqueue; | 1066 | wait_queue_head_t *wqueue; |
1058 | 1067 | ||
1059 | if (!sock_writeable(sk)) | 1068 | if (!sock_writeable(sk)) |
@@ -1067,37 +1076,47 @@ static void tun_sock_write_space(struct sock *sk) | |||
1067 | wake_up_interruptible_sync_poll(wqueue, POLLOUT | | 1076 | wake_up_interruptible_sync_poll(wqueue, POLLOUT | |
1068 | POLLWRNORM | POLLWRBAND); | 1077 | POLLWRNORM | POLLWRBAND); |
1069 | 1078 | ||
1070 | tun = tun_sk(sk)->tun; | 1079 | tfile = container_of(sk, struct tun_file, sk); |
1071 | kill_fasync(&tun->fasync, SIGIO, POLL_OUT); | 1080 | kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); |
1072 | } | ||
1073 | |||
1074 | static void tun_sock_destruct(struct sock *sk) | ||
1075 | { | ||
1076 | free_netdev(tun_sk(sk)->tun->dev); | ||
1077 | } | 1081 | } |
1078 | 1082 | ||
1079 | static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, | 1083 | static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, |
1080 | struct msghdr *m, size_t total_len) | 1084 | struct msghdr *m, size_t total_len) |
1081 | { | 1085 | { |
1082 | struct tun_struct *tun = container_of(sock, struct tun_struct, socket); | 1086 | int ret; |
1083 | return tun_get_user(tun, m->msg_control, m->msg_iov, total_len, | 1087 | struct tun_file *tfile = container_of(sock, struct tun_file, socket); |
1084 | m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); | 1088 | struct tun_struct *tun = __tun_get(tfile); |
1089 | |||
1090 | if (!tun) | ||
1091 | return -EBADFD; | ||
1092 | |||
1093 | ret = tun_get_user(tun, tfile, m->msg_control, m->msg_iov, total_len, | ||
1094 | m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); | ||
1095 | tun_put(tun); | ||
1096 | return ret; | ||
1085 | } | 1097 | } |
1086 | 1098 | ||
1099 | |||
1087 | static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, | 1100 | static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, |
1088 | struct msghdr *m, size_t total_len, | 1101 | struct msghdr *m, size_t total_len, |
1089 | int flags) | 1102 | int flags) |
1090 | { | 1103 | { |
1091 | struct tun_struct *tun = container_of(sock, struct tun_struct, socket); | 1104 | struct tun_file *tfile = container_of(sock, struct tun_file, socket); |
1105 | struct tun_struct *tun = __tun_get(tfile); | ||
1092 | int ret; | 1106 | int ret; |
1107 | |||
1108 | if (!tun) | ||
1109 | return -EBADFD; | ||
1110 | |||
1093 | if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) | 1111 | if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) |
1094 | return -EINVAL; | 1112 | return -EINVAL; |
1095 | ret = tun_do_read(tun, iocb, m->msg_iov, total_len, | 1113 | ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len, |
1096 | flags & MSG_DONTWAIT); | 1114 | flags & MSG_DONTWAIT); |
1097 | if (ret > total_len) { | 1115 | if (ret > total_len) { |
1098 | m->msg_flags |= MSG_TRUNC; | 1116 | m->msg_flags |= MSG_TRUNC; |
1099 | ret = flags & MSG_TRUNC ? ret : total_len; | 1117 | ret = flags & MSG_TRUNC ? ret : total_len; |
1100 | } | 1118 | } |
1119 | tun_put(tun); | ||
1101 | return ret; | 1120 | return ret; |
1102 | } | 1121 | } |
1103 | 1122 | ||
@@ -1118,7 +1137,7 @@ static const struct proto_ops tun_socket_ops = { | |||
1118 | static struct proto tun_proto = { | 1137 | static struct proto tun_proto = { |
1119 | .name = "tun", | 1138 | .name = "tun", |
1120 | .owner = THIS_MODULE, | 1139 | .owner = THIS_MODULE, |
1121 | .obj_size = sizeof(struct tun_sock), | 1140 | .obj_size = sizeof(struct tun_file), |
1122 | }; | 1141 | }; |
1123 | 1142 | ||
1124 | static int tun_flags(struct tun_struct *tun) | 1143 | static int tun_flags(struct tun_struct *tun) |
@@ -1175,8 +1194,8 @@ static DEVICE_ATTR(group, 0444, tun_show_group, NULL); | |||
1175 | 1194 | ||
1176 | static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | 1195 | static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) |
1177 | { | 1196 | { |
1178 | struct sock *sk; | ||
1179 | struct tun_struct *tun; | 1197 | struct tun_struct *tun; |
1198 | struct tun_file *tfile = file->private_data; | ||
1180 | struct net_device *dev; | 1199 | struct net_device *dev; |
1181 | int err; | 1200 | int err; |
1182 | 1201 | ||
@@ -1197,7 +1216,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1197 | (gid_valid(tun->group) && !in_egroup_p(tun->group))) && | 1216 | (gid_valid(tun->group) && !in_egroup_p(tun->group))) && |
1198 | !capable(CAP_NET_ADMIN)) | 1217 | !capable(CAP_NET_ADMIN)) |
1199 | return -EPERM; | 1218 | return -EPERM; |
1200 | err = security_tun_dev_attach(tun->socket.sk); | 1219 | err = security_tun_dev_attach(tfile->socket.sk); |
1201 | if (err < 0) | 1220 | if (err < 0) |
1202 | return err; | 1221 | return err; |
1203 | 1222 | ||
@@ -1243,25 +1262,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1243 | tun->flags = flags; | 1262 | tun->flags = flags; |
1244 | tun->txflt.count = 0; | 1263 | tun->txflt.count = 0; |
1245 | tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); | 1264 | tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); |
1246 | set_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags); | ||
1247 | |||
1248 | err = -ENOMEM; | ||
1249 | sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto); | ||
1250 | if (!sk) | ||
1251 | goto err_free_dev; | ||
1252 | 1265 | ||
1253 | sk_change_net(sk, net); | 1266 | tun->filter_attached = false; |
1254 | tun->socket.wq = &tun->wq; | 1267 | tun->sndbuf = tfile->socket.sk->sk_sndbuf; |
1255 | init_waitqueue_head(&tun->wq.wait); | ||
1256 | tun->socket.ops = &tun_socket_ops; | ||
1257 | sock_init_data(&tun->socket, sk); | ||
1258 | sk->sk_write_space = tun_sock_write_space; | ||
1259 | sk->sk_sndbuf = INT_MAX; | ||
1260 | sock_set_flag(sk, SOCK_ZEROCOPY); | ||
1261 | 1268 | ||
1262 | tun_sk(sk)->tun = tun; | 1269 | security_tun_dev_post_create(&tfile->sk); |
1263 | |||
1264 | security_tun_dev_post_create(sk); | ||
1265 | 1270 | ||
1266 | tun_net_init(dev); | 1271 | tun_net_init(dev); |
1267 | 1272 | ||
@@ -1271,15 +1276,13 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1271 | 1276 | ||
1272 | err = register_netdevice(tun->dev); | 1277 | err = register_netdevice(tun->dev); |
1273 | if (err < 0) | 1278 | if (err < 0) |
1274 | goto err_free_sk; | 1279 | goto err_free_dev; |
1275 | 1280 | ||
1276 | if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) || | 1281 | if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) || |
1277 | device_create_file(&tun->dev->dev, &dev_attr_owner) || | 1282 | device_create_file(&tun->dev->dev, &dev_attr_owner) || |
1278 | device_create_file(&tun->dev->dev, &dev_attr_group)) | 1283 | device_create_file(&tun->dev->dev, &dev_attr_group)) |
1279 | pr_err("Failed to create tun sysfs files\n"); | 1284 | pr_err("Failed to create tun sysfs files\n"); |
1280 | 1285 | ||
1281 | sk->sk_destruct = tun_sock_destruct; | ||
1282 | |||
1283 | err = tun_attach(tun, file); | 1286 | err = tun_attach(tun, file); |
1284 | if (err < 0) | 1287 | if (err < 0) |
1285 | goto failed; | 1288 | goto failed; |
@@ -1311,8 +1314,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1311 | strcpy(ifr->ifr_name, tun->dev->name); | 1314 | strcpy(ifr->ifr_name, tun->dev->name); |
1312 | return 0; | 1315 | return 0; |
1313 | 1316 | ||
1314 | err_free_sk: | ||
1315 | tun_free_netdev(dev); | ||
1316 | err_free_dev: | 1317 | err_free_dev: |
1317 | free_netdev(dev); | 1318 | free_netdev(dev); |
1318 | failed: | 1319 | failed: |
@@ -1376,7 +1377,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, | |||
1376 | struct tun_file *tfile = file->private_data; | 1377 | struct tun_file *tfile = file->private_data; |
1377 | struct tun_struct *tun; | 1378 | struct tun_struct *tun; |
1378 | void __user* argp = (void __user*)arg; | 1379 | void __user* argp = (void __user*)arg; |
1379 | struct sock_fprog fprog; | ||
1380 | struct ifreq ifr; | 1380 | struct ifreq ifr; |
1381 | kuid_t owner; | 1381 | kuid_t owner; |
1382 | kgid_t group; | 1382 | kgid_t group; |
@@ -1441,11 +1441,16 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, | |||
1441 | break; | 1441 | break; |
1442 | 1442 | ||
1443 | case TUNSETPERSIST: | 1443 | case TUNSETPERSIST: |
1444 | /* Disable/Enable persist mode */ | 1444 | /* Disable/Enable persist mode. Keep an extra reference to the |
1445 | if (arg) | 1445 | * module to prevent the module being unprobed. |
1446 | */ | ||
1447 | if (arg) { | ||
1446 | tun->flags |= TUN_PERSIST; | 1448 | tun->flags |= TUN_PERSIST; |
1447 | else | 1449 | __module_get(THIS_MODULE); |
1450 | } else { | ||
1448 | tun->flags &= ~TUN_PERSIST; | 1451 | tun->flags &= ~TUN_PERSIST; |
1452 | module_put(THIS_MODULE); | ||
1453 | } | ||
1449 | 1454 | ||
1450 | tun_debug(KERN_INFO, tun, "persist %s\n", | 1455 | tun_debug(KERN_INFO, tun, "persist %s\n", |
1451 | arg ? "enabled" : "disabled"); | 1456 | arg ? "enabled" : "disabled"); |
@@ -1523,7 +1528,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, | |||
1523 | break; | 1528 | break; |
1524 | 1529 | ||
1525 | case TUNGETSNDBUF: | 1530 | case TUNGETSNDBUF: |
1526 | sndbuf = tun->socket.sk->sk_sndbuf; | 1531 | sndbuf = tfile->socket.sk->sk_sndbuf; |
1527 | if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) | 1532 | if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) |
1528 | ret = -EFAULT; | 1533 | ret = -EFAULT; |
1529 | break; | 1534 | break; |
@@ -1534,7 +1539,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, | |||
1534 | break; | 1539 | break; |
1535 | } | 1540 | } |
1536 | 1541 | ||
1537 | tun->socket.sk->sk_sndbuf = sndbuf; | 1542 | tun->sndbuf = tfile->socket.sk->sk_sndbuf = sndbuf; |
1538 | break; | 1543 | break; |
1539 | 1544 | ||
1540 | case TUNGETVNETHDRSZ: | 1545 | case TUNGETVNETHDRSZ: |
@@ -1562,10 +1567,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, | |||
1562 | if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) | 1567 | if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) |
1563 | break; | 1568 | break; |
1564 | ret = -EFAULT; | 1569 | ret = -EFAULT; |
1565 | if (copy_from_user(&fprog, argp, sizeof(fprog))) | 1570 | if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) |
1566 | break; | 1571 | break; |
1567 | 1572 | ||
1568 | ret = sk_attach_filter(&fprog, tun->socket.sk); | 1573 | ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); |
1574 | if (!ret) | ||
1575 | tun->filter_attached = true; | ||
1569 | break; | 1576 | break; |
1570 | 1577 | ||
1571 | case TUNDETACHFILTER: | 1578 | case TUNDETACHFILTER: |
@@ -1573,7 +1580,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, | |||
1573 | ret = -EINVAL; | 1580 | ret = -EINVAL; |
1574 | if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) | 1581 | if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) |
1575 | break; | 1582 | break; |
1576 | ret = sk_detach_filter(tun->socket.sk); | 1583 | ret = sk_detach_filter(tfile->socket.sk); |
1584 | if (!ret) | ||
1585 | tun->filter_attached = false; | ||
1577 | break; | 1586 | break; |
1578 | 1587 | ||
1579 | default: | 1588 | default: |
@@ -1625,27 +1634,21 @@ static long tun_chr_compat_ioctl(struct file *file, | |||
1625 | 1634 | ||
1626 | static int tun_chr_fasync(int fd, struct file *file, int on) | 1635 | static int tun_chr_fasync(int fd, struct file *file, int on) |
1627 | { | 1636 | { |
1628 | struct tun_struct *tun = tun_get(file); | 1637 | struct tun_file *tfile = file->private_data; |
1629 | int ret; | 1638 | int ret; |
1630 | 1639 | ||
1631 | if (!tun) | 1640 | if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) |
1632 | return -EBADFD; | ||
1633 | |||
1634 | tun_debug(KERN_INFO, tun, "tun_chr_fasync %d\n", on); | ||
1635 | |||
1636 | if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0) | ||
1637 | goto out; | 1641 | goto out; |
1638 | 1642 | ||
1639 | if (on) { | 1643 | if (on) { |
1640 | ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); | 1644 | ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); |
1641 | if (ret) | 1645 | if (ret) |
1642 | goto out; | 1646 | goto out; |
1643 | tun->flags |= TUN_FASYNC; | 1647 | tfile->flags |= TUN_FASYNC; |
1644 | } else | 1648 | } else |
1645 | tun->flags &= ~TUN_FASYNC; | 1649 | tfile->flags &= ~TUN_FASYNC; |
1646 | ret = 0; | 1650 | ret = 0; |
1647 | out: | 1651 | out: |
1648 | tun_put(tun); | ||
1649 | return ret; | 1652 | return ret; |
1650 | } | 1653 | } |
1651 | 1654 | ||
@@ -1655,13 +1658,30 @@ static int tun_chr_open(struct inode *inode, struct file * file) | |||
1655 | 1658 | ||
1656 | DBG1(KERN_INFO, "tunX: tun_chr_open\n"); | 1659 | DBG1(KERN_INFO, "tunX: tun_chr_open\n"); |
1657 | 1660 | ||
1658 | tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); | 1661 | tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, |
1662 | &tun_proto); | ||
1659 | if (!tfile) | 1663 | if (!tfile) |
1660 | return -ENOMEM; | 1664 | return -ENOMEM; |
1661 | atomic_set(&tfile->count, 0); | 1665 | atomic_set(&tfile->count, 0); |
1662 | tfile->tun = NULL; | 1666 | tfile->tun = NULL; |
1663 | tfile->net = get_net(current->nsproxy->net_ns); | 1667 | tfile->net = get_net(current->nsproxy->net_ns); |
1668 | tfile->flags = 0; | ||
1669 | |||
1670 | rcu_assign_pointer(tfile->socket.wq, &tfile->wq); | ||
1671 | init_waitqueue_head(&tfile->wq.wait); | ||
1672 | |||
1673 | tfile->socket.file = file; | ||
1674 | tfile->socket.ops = &tun_socket_ops; | ||
1675 | |||
1676 | sock_init_data(&tfile->socket, &tfile->sk); | ||
1677 | sk_change_net(&tfile->sk, tfile->net); | ||
1678 | |||
1679 | tfile->sk.sk_write_space = tun_sock_write_space; | ||
1680 | tfile->sk.sk_sndbuf = INT_MAX; | ||
1681 | |||
1664 | file->private_data = tfile; | 1682 | file->private_data = tfile; |
1683 | set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); | ||
1684 | |||
1665 | return 0; | 1685 | return 0; |
1666 | } | 1686 | } |
1667 | 1687 | ||
@@ -1669,6 +1689,7 @@ static int tun_chr_close(struct inode *inode, struct file *file) | |||
1669 | { | 1689 | { |
1670 | struct tun_file *tfile = file->private_data; | 1690 | struct tun_file *tfile = file->private_data; |
1671 | struct tun_struct *tun; | 1691 | struct tun_struct *tun; |
1692 | struct net *net = tfile->net; | ||
1672 | 1693 | ||
1673 | tun = __tun_get(tfile); | 1694 | tun = __tun_get(tfile); |
1674 | if (tun) { | 1695 | if (tun) { |
@@ -1685,14 +1706,16 @@ static int tun_chr_close(struct inode *inode, struct file *file) | |||
1685 | unregister_netdevice(dev); | 1706 | unregister_netdevice(dev); |
1686 | rtnl_unlock(); | 1707 | rtnl_unlock(); |
1687 | } | 1708 | } |
1688 | } | ||
1689 | 1709 | ||
1690 | tun = tfile->tun; | 1710 | /* drop the reference that netdevice holds */ |
1691 | if (tun) | 1711 | sock_put(&tfile->sk); |
1692 | sock_put(tun->socket.sk); | 1712 | } |
1693 | 1713 | ||
1694 | put_net(tfile->net); | 1714 | /* drop the reference that file holds */ |
1695 | kfree(tfile); | 1715 | BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, |
1716 | &tfile->socket.flags)); | ||
1717 | sk_release_kernel(&tfile->sk); | ||
1718 | put_net(net); | ||
1696 | 1719 | ||
1697 | return 0; | 1720 | return 0; |
1698 | } | 1721 | } |
@@ -1820,13 +1843,14 @@ static void tun_cleanup(void) | |||
1820 | struct socket *tun_get_socket(struct file *file) | 1843 | struct socket *tun_get_socket(struct file *file) |
1821 | { | 1844 | { |
1822 | struct tun_struct *tun; | 1845 | struct tun_struct *tun; |
1846 | struct tun_file *tfile = file->private_data; | ||
1823 | if (file->f_op != &tun_fops) | 1847 | if (file->f_op != &tun_fops) |
1824 | return ERR_PTR(-EINVAL); | 1848 | return ERR_PTR(-EINVAL); |
1825 | tun = tun_get(file); | 1849 | tun = tun_get(file); |
1826 | if (!tun) | 1850 | if (!tun) |
1827 | return ERR_PTR(-EBADFD); | 1851 | return ERR_PTR(-EBADFD); |
1828 | tun_put(tun); | 1852 | tun_put(tun); |
1829 | return &tun->socket; | 1853 | return &tfile->socket; |
1830 | } | 1854 | } |
1831 | EXPORT_SYMBOL_GPL(tun_get_socket); | 1855 | EXPORT_SYMBOL_GPL(tun_get_socket); |
1832 | 1856 | ||