aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2012-10-31 15:45:57 -0400
committerDavid S. Miller <davem@davemloft.net>2012-11-01 11:14:07 -0400
commit54f968d6efdbf7dec36faa44fc11f01b0e4d1990 (patch)
treec6cf1f86265af7cb215c0a1e0d3662d30992a578 /drivers/net/tun.c
parent1e5883382c3287a61aeebc33a301fd50f3a1005b (diff)
tuntap: move socket to tun_file
Current tuntap makes use of the socket receive queue as its tx queue. To implement multiple tx queues for tuntap and enable the ability of adding and removing queues during workload, the first step is to move the socket related structures to tun_file. Then we could let multiple fds/sockets to be attached to the tuntap. This patch removes tun_sock and moves socket related structures from tun_sock or tun_struct to tun_file. Two exceptions are tap_filter and sock_fprog, they are still kept in tun_structure since they are used to filter packets for the net device instead of per transmit queue (at least I see no requirements for them). After those changes, socket were created and destroyed during file open and close (instead of device creation and destroy), the socket structures could be dereferenced from tun_file instead of the file of tun_struct structure itself. For persisent device, since we purge during datching and wouldn't queue any packets when no interface were attached, there's no behaviod changes before and after this patch, so the changes were transparent to the userspace. To keep the attributes such as sndbuf, socket filter and vnet header, those would be re-initialize after a new interface were attached to an persist device. Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c266
1 files changed, 145 insertions, 121 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f830b1be4c57..d52ad2438e26 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -109,14 +109,29 @@ struct tap_filter {
109 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; 109 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
110}; 110};
111 111
112/* A tun_file connects an open character device to a tuntap netdevice. It
113 * also contains all socket related strctures (except sock_fprog and tap_filter)
114 * to serve as one transmit queue for tuntap device. The sock_fprog and
115 * tap_filter were kept in tun_struct since they were used for filtering for the
116 * netdevice not for a specific queue (at least I didn't see the reqirement for
117 * this).
118 */
112struct tun_file { 119struct tun_file {
120 struct sock sk;
121 struct socket socket;
122 struct socket_wq wq;
113 atomic_t count; 123 atomic_t count;
114 struct tun_struct *tun; 124 struct tun_struct *tun;
115 struct net *net; 125 struct net *net;
126 struct fasync_struct *fasync;
127 /* only used for fasnyc */
128 unsigned int flags;
116}; 129};
117 130
118struct tun_sock; 131/* Since the socket were moved to tun_file, to preserve the behavior of persist
119 132 * device, socket fileter, sndbuf and vnet header size were restore when the
133 * file were attached to a persist device.
134 */
120struct tun_struct { 135struct tun_struct {
121 struct tun_file *tfile; 136 struct tun_file *tfile;
122 unsigned int flags; 137 unsigned int flags;
@@ -127,29 +142,18 @@ struct tun_struct {
127 netdev_features_t set_features; 142 netdev_features_t set_features;
128#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ 143#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
129 NETIF_F_TSO6|NETIF_F_UFO) 144 NETIF_F_TSO6|NETIF_F_UFO)
130 struct fasync_struct *fasync;
131
132 struct tap_filter txflt;
133 struct socket socket;
134 struct socket_wq wq;
135 145
136 int vnet_hdr_sz; 146 int vnet_hdr_sz;
137 147 int sndbuf;
148 struct tap_filter txflt;
149 struct sock_fprog fprog;
150 /* protected by rtnl lock */
151 bool filter_attached;
138#ifdef TUN_DEBUG 152#ifdef TUN_DEBUG
139 int debug; 153 int debug;
140#endif 154#endif
141}; 155};
142 156
143struct tun_sock {
144 struct sock sk;
145 struct tun_struct *tun;
146};
147
148static inline struct tun_sock *tun_sk(struct sock *sk)
149{
150 return container_of(sk, struct tun_sock, sk);
151}
152
153static int tun_attach(struct tun_struct *tun, struct file *file) 157static int tun_attach(struct tun_struct *tun, struct file *file)
154{ 158{
155 struct tun_file *tfile = file->private_data; 159 struct tun_file *tfile = file->private_data;
@@ -168,12 +172,19 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
168 goto out; 172 goto out;
169 173
170 err = 0; 174 err = 0;
175
176 /* Re-attach filter when attaching to a persist device */
177 if (tun->filter_attached == true) {
178 err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
179 if (!err)
180 goto out;
181 }
171 tfile->tun = tun; 182 tfile->tun = tun;
183 tfile->socket.sk->sk_sndbuf = tun->sndbuf;
172 tun->tfile = tfile; 184 tun->tfile = tfile;
173 tun->socket.file = file;
174 netif_carrier_on(tun->dev); 185 netif_carrier_on(tun->dev);
175 dev_hold(tun->dev); 186 dev_hold(tun->dev);
176 sock_hold(tun->socket.sk); 187 sock_hold(&tfile->sk);
177 atomic_inc(&tfile->count); 188 atomic_inc(&tfile->count);
178 189
179out: 190out:
@@ -183,14 +194,16 @@ out:
183 194
184static void __tun_detach(struct tun_struct *tun) 195static void __tun_detach(struct tun_struct *tun)
185{ 196{
197 struct tun_file *tfile = tun->tfile;
186 /* Detach from net device */ 198 /* Detach from net device */
187 netif_tx_lock_bh(tun->dev); 199 netif_tx_lock_bh(tun->dev);
188 netif_carrier_off(tun->dev); 200 netif_carrier_off(tun->dev);
189 tun->tfile = NULL; 201 tun->tfile = NULL;
202 tfile->tun = NULL;
190 netif_tx_unlock_bh(tun->dev); 203 netif_tx_unlock_bh(tun->dev);
191 204
192 /* Drop read queue */ 205 /* Drop read queue */
193 skb_queue_purge(&tun->socket.sk->sk_receive_queue); 206 skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
194 207
195 /* Drop the extra count on the net device */ 208 /* Drop the extra count on the net device */
196 dev_put(tun->dev); 209 dev_put(tun->dev);
@@ -349,21 +362,12 @@ static void tun_net_uninit(struct net_device *dev)
349 /* Inform the methods they need to stop using the dev. 362 /* Inform the methods they need to stop using the dev.
350 */ 363 */
351 if (tfile) { 364 if (tfile) {
352 wake_up_all(&tun->wq.wait); 365 wake_up_all(&tfile->wq.wait);
353 if (atomic_dec_and_test(&tfile->count)) 366 if (atomic_dec_and_test(&tfile->count))
354 __tun_detach(tun); 367 __tun_detach(tun);
355 } 368 }
356} 369}
357 370
358static void tun_free_netdev(struct net_device *dev)
359{
360 struct tun_struct *tun = netdev_priv(dev);
361
362 BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags));
363
364 sk_release_kernel(tun->socket.sk);
365}
366
367/* Net device open. */ 371/* Net device open. */
368static int tun_net_open(struct net_device *dev) 372static int tun_net_open(struct net_device *dev)
369{ 373{
@@ -382,11 +386,12 @@ static int tun_net_close(struct net_device *dev)
382static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) 386static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
383{ 387{
384 struct tun_struct *tun = netdev_priv(dev); 388 struct tun_struct *tun = netdev_priv(dev);
389 struct tun_file *tfile = tun->tfile;
385 390
386 tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); 391 tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
387 392
388 /* Drop packet if interface is not attached */ 393 /* Drop packet if interface is not attached */
389 if (!tun->tfile) 394 if (!tfile)
390 goto drop; 395 goto drop;
391 396
392 /* Drop if the filter does not like it. 397 /* Drop if the filter does not like it.
@@ -395,11 +400,12 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
395 if (!check_filter(&tun->txflt, skb)) 400 if (!check_filter(&tun->txflt, skb))
396 goto drop; 401 goto drop;
397 402
398 if (tun->socket.sk->sk_filter && 403 if (tfile->socket.sk->sk_filter &&
399 sk_filter(tun->socket.sk, skb)) 404 sk_filter(tfile->socket.sk, skb))
400 goto drop; 405 goto drop;
401 406
402 if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) { 407 if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
408 >= dev->tx_queue_len) {
403 if (!(tun->flags & TUN_ONE_QUEUE)) { 409 if (!(tun->flags & TUN_ONE_QUEUE)) {
404 /* Normal queueing mode. */ 410 /* Normal queueing mode. */
405 /* Packet scheduler handles dropping of further packets. */ 411 /* Packet scheduler handles dropping of further packets. */
@@ -422,12 +428,12 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
422 skb_orphan(skb); 428 skb_orphan(skb);
423 429
424 /* Enqueue packet */ 430 /* Enqueue packet */
425 skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb); 431 skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb);
426 432
427 /* Notify and wake up reader process */ 433 /* Notify and wake up reader process */
428 if (tun->flags & TUN_FASYNC) 434 if (tfile->flags & TUN_FASYNC)
429 kill_fasync(&tun->fasync, SIGIO, POLL_IN); 435 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
430 wake_up_interruptible_poll(&tun->wq.wait, POLLIN | 436 wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
431 POLLRDNORM | POLLRDBAND); 437 POLLRDNORM | POLLRDBAND);
432 return NETDEV_TX_OK; 438 return NETDEV_TX_OK;
433 439
@@ -555,11 +561,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
555 if (!tun) 561 if (!tun)
556 return POLLERR; 562 return POLLERR;
557 563
558 sk = tun->socket.sk; 564 sk = tfile->socket.sk;
559 565
560 tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); 566 tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
561 567
562 poll_wait(file, &tun->wq.wait, wait); 568 poll_wait(file, &tfile->wq.wait, wait);
563 569
564 if (!skb_queue_empty(&sk->sk_receive_queue)) 570 if (!skb_queue_empty(&sk->sk_receive_queue))
565 mask |= POLLIN | POLLRDNORM; 571 mask |= POLLIN | POLLRDNORM;
@@ -578,11 +584,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
578 584
579/* prepad is the amount to reserve at front. len is length after that. 585/* prepad is the amount to reserve at front. len is length after that.
580 * linear is a hint as to how much to copy (usually headers). */ 586 * linear is a hint as to how much to copy (usually headers). */
581static struct sk_buff *tun_alloc_skb(struct tun_struct *tun, 587static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
582 size_t prepad, size_t len, 588 size_t prepad, size_t len,
583 size_t linear, int noblock) 589 size_t linear, int noblock)
584{ 590{
585 struct sock *sk = tun->socket.sk; 591 struct sock *sk = tfile->socket.sk;
586 struct sk_buff *skb; 592 struct sk_buff *skb;
587 int err; 593 int err;
588 594
@@ -682,9 +688,9 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
682} 688}
683 689
684/* Get packet from user space buffer */ 690/* Get packet from user space buffer */
685static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, 691static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
686 const struct iovec *iv, size_t total_len, 692 void *msg_control, const struct iovec *iv,
687 size_t count, int noblock) 693 size_t total_len, size_t count, int noblock)
688{ 694{
689 struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; 695 struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
690 struct sk_buff *skb; 696 struct sk_buff *skb;
@@ -754,7 +760,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control,
754 } else 760 } else
755 copylen = len; 761 copylen = len;
756 762
757 skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock); 763 skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock);
758 if (IS_ERR(skb)) { 764 if (IS_ERR(skb)) {
759 if (PTR_ERR(skb) != -EAGAIN) 765 if (PTR_ERR(skb) != -EAGAIN)
760 tun->dev->stats.rx_dropped++; 766 tun->dev->stats.rx_dropped++;
@@ -859,6 +865,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
859{ 865{
860 struct file *file = iocb->ki_filp; 866 struct file *file = iocb->ki_filp;
861 struct tun_struct *tun = tun_get(file); 867 struct tun_struct *tun = tun_get(file);
868 struct tun_file *tfile = file->private_data;
862 ssize_t result; 869 ssize_t result;
863 870
864 if (!tun) 871 if (!tun)
@@ -866,8 +873,8 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
866 873
867 tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); 874 tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count);
868 875
869 result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count, 876 result = tun_get_user(tun, tfile, NULL, iv, iov_length(iv, count),
870 file->f_flags & O_NONBLOCK); 877 count, file->f_flags & O_NONBLOCK);
871 878
872 tun_put(tun); 879 tun_put(tun);
873 return result; 880 return result;
@@ -875,6 +882,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
875 882
876/* Put packet to the user space buffer */ 883/* Put packet to the user space buffer */
877static ssize_t tun_put_user(struct tun_struct *tun, 884static ssize_t tun_put_user(struct tun_struct *tun,
885 struct tun_file *tfile,
878 struct sk_buff *skb, 886 struct sk_buff *skb,
879 const struct iovec *iv, int len) 887 const struct iovec *iv, int len)
880{ 888{
@@ -954,7 +962,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
954 return total; 962 return total;
955} 963}
956 964
957static ssize_t tun_do_read(struct tun_struct *tun, 965static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
958 struct kiocb *iocb, const struct iovec *iv, 966 struct kiocb *iocb, const struct iovec *iv,
959 ssize_t len, int noblock) 967 ssize_t len, int noblock)
960{ 968{
@@ -965,12 +973,12 @@ static ssize_t tun_do_read(struct tun_struct *tun,
965 tun_debug(KERN_INFO, tun, "tun_chr_read\n"); 973 tun_debug(KERN_INFO, tun, "tun_chr_read\n");
966 974
967 if (unlikely(!noblock)) 975 if (unlikely(!noblock))
968 add_wait_queue(&tun->wq.wait, &wait); 976 add_wait_queue(&tfile->wq.wait, &wait);
969 while (len) { 977 while (len) {
970 current->state = TASK_INTERRUPTIBLE; 978 current->state = TASK_INTERRUPTIBLE;
971 979
972 /* Read frames from the queue */ 980 /* Read frames from the queue */
973 if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { 981 if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) {
974 if (noblock) { 982 if (noblock) {
975 ret = -EAGAIN; 983 ret = -EAGAIN;
976 break; 984 break;
@@ -990,14 +998,14 @@ static ssize_t tun_do_read(struct tun_struct *tun,
990 } 998 }
991 netif_wake_queue(tun->dev); 999 netif_wake_queue(tun->dev);
992 1000
993 ret = tun_put_user(tun, skb, iv, len); 1001 ret = tun_put_user(tun, tfile, skb, iv, len);
994 kfree_skb(skb); 1002 kfree_skb(skb);
995 break; 1003 break;
996 } 1004 }
997 1005
998 current->state = TASK_RUNNING; 1006 current->state = TASK_RUNNING;
999 if (unlikely(!noblock)) 1007 if (unlikely(!noblock))
1000 remove_wait_queue(&tun->wq.wait, &wait); 1008 remove_wait_queue(&tfile->wq.wait, &wait);
1001 1009
1002 return ret; 1010 return ret;
1003} 1011}
@@ -1018,7 +1026,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
1018 goto out; 1026 goto out;
1019 } 1027 }
1020 1028
1021 ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); 1029 ret = tun_do_read(tun, tfile, iocb, iv, len,
1030 file->f_flags & O_NONBLOCK);
1022 ret = min_t(ssize_t, ret, len); 1031 ret = min_t(ssize_t, ret, len);
1023out: 1032out:
1024 tun_put(tun); 1033 tun_put(tun);
@@ -1033,7 +1042,7 @@ static void tun_setup(struct net_device *dev)
1033 tun->group = INVALID_GID; 1042 tun->group = INVALID_GID;
1034 1043
1035 dev->ethtool_ops = &tun_ethtool_ops; 1044 dev->ethtool_ops = &tun_ethtool_ops;
1036 dev->destructor = tun_free_netdev; 1045 dev->destructor = free_netdev;
1037} 1046}
1038 1047
1039/* Trivial set of netlink ops to allow deleting tun or tap 1048/* Trivial set of netlink ops to allow deleting tun or tap
@@ -1053,7 +1062,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = {
1053 1062
1054static void tun_sock_write_space(struct sock *sk) 1063static void tun_sock_write_space(struct sock *sk)
1055{ 1064{
1056 struct tun_struct *tun; 1065 struct tun_file *tfile;
1057 wait_queue_head_t *wqueue; 1066 wait_queue_head_t *wqueue;
1058 1067
1059 if (!sock_writeable(sk)) 1068 if (!sock_writeable(sk))
@@ -1067,37 +1076,47 @@ static void tun_sock_write_space(struct sock *sk)
1067 wake_up_interruptible_sync_poll(wqueue, POLLOUT | 1076 wake_up_interruptible_sync_poll(wqueue, POLLOUT |
1068 POLLWRNORM | POLLWRBAND); 1077 POLLWRNORM | POLLWRBAND);
1069 1078
1070 tun = tun_sk(sk)->tun; 1079 tfile = container_of(sk, struct tun_file, sk);
1071 kill_fasync(&tun->fasync, SIGIO, POLL_OUT); 1080 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
1072}
1073
1074static void tun_sock_destruct(struct sock *sk)
1075{
1076 free_netdev(tun_sk(sk)->tun->dev);
1077} 1081}
1078 1082
1079static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, 1083static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
1080 struct msghdr *m, size_t total_len) 1084 struct msghdr *m, size_t total_len)
1081{ 1085{
1082 struct tun_struct *tun = container_of(sock, struct tun_struct, socket); 1086 int ret;
1083 return tun_get_user(tun, m->msg_control, m->msg_iov, total_len, 1087 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
1084 m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); 1088 struct tun_struct *tun = __tun_get(tfile);
1089
1090 if (!tun)
1091 return -EBADFD;
1092
1093 ret = tun_get_user(tun, tfile, m->msg_control, m->msg_iov, total_len,
1094 m->msg_iovlen, m->msg_flags & MSG_DONTWAIT);
1095 tun_put(tun);
1096 return ret;
1085} 1097}
1086 1098
1099
1087static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, 1100static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
1088 struct msghdr *m, size_t total_len, 1101 struct msghdr *m, size_t total_len,
1089 int flags) 1102 int flags)
1090{ 1103{
1091 struct tun_struct *tun = container_of(sock, struct tun_struct, socket); 1104 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
1105 struct tun_struct *tun = __tun_get(tfile);
1092 int ret; 1106 int ret;
1107
1108 if (!tun)
1109 return -EBADFD;
1110
1093 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) 1111 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
1094 return -EINVAL; 1112 return -EINVAL;
1095 ret = tun_do_read(tun, iocb, m->msg_iov, total_len, 1113 ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len,
1096 flags & MSG_DONTWAIT); 1114 flags & MSG_DONTWAIT);
1097 if (ret > total_len) { 1115 if (ret > total_len) {
1098 m->msg_flags |= MSG_TRUNC; 1116 m->msg_flags |= MSG_TRUNC;
1099 ret = flags & MSG_TRUNC ? ret : total_len; 1117 ret = flags & MSG_TRUNC ? ret : total_len;
1100 } 1118 }
1119 tun_put(tun);
1101 return ret; 1120 return ret;
1102} 1121}
1103 1122
@@ -1118,7 +1137,7 @@ static const struct proto_ops tun_socket_ops = {
1118static struct proto tun_proto = { 1137static struct proto tun_proto = {
1119 .name = "tun", 1138 .name = "tun",
1120 .owner = THIS_MODULE, 1139 .owner = THIS_MODULE,
1121 .obj_size = sizeof(struct tun_sock), 1140 .obj_size = sizeof(struct tun_file),
1122}; 1141};
1123 1142
1124static int tun_flags(struct tun_struct *tun) 1143static int tun_flags(struct tun_struct *tun)
@@ -1175,8 +1194,8 @@ static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
1175 1194
1176static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) 1195static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1177{ 1196{
1178 struct sock *sk;
1179 struct tun_struct *tun; 1197 struct tun_struct *tun;
1198 struct tun_file *tfile = file->private_data;
1180 struct net_device *dev; 1199 struct net_device *dev;
1181 int err; 1200 int err;
1182 1201
@@ -1197,7 +1216,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1197 (gid_valid(tun->group) && !in_egroup_p(tun->group))) && 1216 (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
1198 !capable(CAP_NET_ADMIN)) 1217 !capable(CAP_NET_ADMIN))
1199 return -EPERM; 1218 return -EPERM;
1200 err = security_tun_dev_attach(tun->socket.sk); 1219 err = security_tun_dev_attach(tfile->socket.sk);
1201 if (err < 0) 1220 if (err < 0)
1202 return err; 1221 return err;
1203 1222
@@ -1243,25 +1262,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1243 tun->flags = flags; 1262 tun->flags = flags;
1244 tun->txflt.count = 0; 1263 tun->txflt.count = 0;
1245 tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 1264 tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
1246 set_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags);
1247
1248 err = -ENOMEM;
1249 sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
1250 if (!sk)
1251 goto err_free_dev;
1252 1265
1253 sk_change_net(sk, net); 1266 tun->filter_attached = false;
1254 tun->socket.wq = &tun->wq; 1267 tun->sndbuf = tfile->socket.sk->sk_sndbuf;
1255 init_waitqueue_head(&tun->wq.wait);
1256 tun->socket.ops = &tun_socket_ops;
1257 sock_init_data(&tun->socket, sk);
1258 sk->sk_write_space = tun_sock_write_space;
1259 sk->sk_sndbuf = INT_MAX;
1260 sock_set_flag(sk, SOCK_ZEROCOPY);
1261 1268
1262 tun_sk(sk)->tun = tun; 1269 security_tun_dev_post_create(&tfile->sk);
1263
1264 security_tun_dev_post_create(sk);
1265 1270
1266 tun_net_init(dev); 1271 tun_net_init(dev);
1267 1272
@@ -1271,15 +1276,13 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1271 1276
1272 err = register_netdevice(tun->dev); 1277 err = register_netdevice(tun->dev);
1273 if (err < 0) 1278 if (err < 0)
1274 goto err_free_sk; 1279 goto err_free_dev;
1275 1280
1276 if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) || 1281 if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
1277 device_create_file(&tun->dev->dev, &dev_attr_owner) || 1282 device_create_file(&tun->dev->dev, &dev_attr_owner) ||
1278 device_create_file(&tun->dev->dev, &dev_attr_group)) 1283 device_create_file(&tun->dev->dev, &dev_attr_group))
1279 pr_err("Failed to create tun sysfs files\n"); 1284 pr_err("Failed to create tun sysfs files\n");
1280 1285
1281 sk->sk_destruct = tun_sock_destruct;
1282
1283 err = tun_attach(tun, file); 1286 err = tun_attach(tun, file);
1284 if (err < 0) 1287 if (err < 0)
1285 goto failed; 1288 goto failed;
@@ -1311,8 +1314,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1311 strcpy(ifr->ifr_name, tun->dev->name); 1314 strcpy(ifr->ifr_name, tun->dev->name);
1312 return 0; 1315 return 0;
1313 1316
1314 err_free_sk:
1315 tun_free_netdev(dev);
1316 err_free_dev: 1317 err_free_dev:
1317 free_netdev(dev); 1318 free_netdev(dev);
1318 failed: 1319 failed:
@@ -1376,7 +1377,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1376 struct tun_file *tfile = file->private_data; 1377 struct tun_file *tfile = file->private_data;
1377 struct tun_struct *tun; 1378 struct tun_struct *tun;
1378 void __user* argp = (void __user*)arg; 1379 void __user* argp = (void __user*)arg;
1379 struct sock_fprog fprog;
1380 struct ifreq ifr; 1380 struct ifreq ifr;
1381 kuid_t owner; 1381 kuid_t owner;
1382 kgid_t group; 1382 kgid_t group;
@@ -1441,11 +1441,16 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1441 break; 1441 break;
1442 1442
1443 case TUNSETPERSIST: 1443 case TUNSETPERSIST:
1444 /* Disable/Enable persist mode */ 1444 /* Disable/Enable persist mode. Keep an extra reference to the
1445 if (arg) 1445 * module to prevent the module being unprobed.
1446 */
1447 if (arg) {
1446 tun->flags |= TUN_PERSIST; 1448 tun->flags |= TUN_PERSIST;
1447 else 1449 __module_get(THIS_MODULE);
1450 } else {
1448 tun->flags &= ~TUN_PERSIST; 1451 tun->flags &= ~TUN_PERSIST;
1452 module_put(THIS_MODULE);
1453 }
1449 1454
1450 tun_debug(KERN_INFO, tun, "persist %s\n", 1455 tun_debug(KERN_INFO, tun, "persist %s\n",
1451 arg ? "enabled" : "disabled"); 1456 arg ? "enabled" : "disabled");
@@ -1523,7 +1528,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1523 break; 1528 break;
1524 1529
1525 case TUNGETSNDBUF: 1530 case TUNGETSNDBUF:
1526 sndbuf = tun->socket.sk->sk_sndbuf; 1531 sndbuf = tfile->socket.sk->sk_sndbuf;
1527 if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) 1532 if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
1528 ret = -EFAULT; 1533 ret = -EFAULT;
1529 break; 1534 break;
@@ -1534,7 +1539,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1534 break; 1539 break;
1535 } 1540 }
1536 1541
1537 tun->socket.sk->sk_sndbuf = sndbuf; 1542 tun->sndbuf = tfile->socket.sk->sk_sndbuf = sndbuf;
1538 break; 1543 break;
1539 1544
1540 case TUNGETVNETHDRSZ: 1545 case TUNGETVNETHDRSZ:
@@ -1562,10 +1567,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1562 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) 1567 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1563 break; 1568 break;
1564 ret = -EFAULT; 1569 ret = -EFAULT;
1565 if (copy_from_user(&fprog, argp, sizeof(fprog))) 1570 if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog)))
1566 break; 1571 break;
1567 1572
1568 ret = sk_attach_filter(&fprog, tun->socket.sk); 1573 ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
1574 if (!ret)
1575 tun->filter_attached = true;
1569 break; 1576 break;
1570 1577
1571 case TUNDETACHFILTER: 1578 case TUNDETACHFILTER:
@@ -1573,7 +1580,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1573 ret = -EINVAL; 1580 ret = -EINVAL;
1574 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) 1581 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1575 break; 1582 break;
1576 ret = sk_detach_filter(tun->socket.sk); 1583 ret = sk_detach_filter(tfile->socket.sk);
1584 if (!ret)
1585 tun->filter_attached = false;
1577 break; 1586 break;
1578 1587
1579 default: 1588 default:
@@ -1625,27 +1634,21 @@ static long tun_chr_compat_ioctl(struct file *file,
1625 1634
1626static int tun_chr_fasync(int fd, struct file *file, int on) 1635static int tun_chr_fasync(int fd, struct file *file, int on)
1627{ 1636{
1628 struct tun_struct *tun = tun_get(file); 1637 struct tun_file *tfile = file->private_data;
1629 int ret; 1638 int ret;
1630 1639
1631 if (!tun) 1640 if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
1632 return -EBADFD;
1633
1634 tun_debug(KERN_INFO, tun, "tun_chr_fasync %d\n", on);
1635
1636 if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
1637 goto out; 1641 goto out;
1638 1642
1639 if (on) { 1643 if (on) {
1640 ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); 1644 ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
1641 if (ret) 1645 if (ret)
1642 goto out; 1646 goto out;
1643 tun->flags |= TUN_FASYNC; 1647 tfile->flags |= TUN_FASYNC;
1644 } else 1648 } else
1645 tun->flags &= ~TUN_FASYNC; 1649 tfile->flags &= ~TUN_FASYNC;
1646 ret = 0; 1650 ret = 0;
1647out: 1651out:
1648 tun_put(tun);
1649 return ret; 1652 return ret;
1650} 1653}
1651 1654
@@ -1655,13 +1658,30 @@ static int tun_chr_open(struct inode *inode, struct file * file)
1655 1658
1656 DBG1(KERN_INFO, "tunX: tun_chr_open\n"); 1659 DBG1(KERN_INFO, "tunX: tun_chr_open\n");
1657 1660
1658 tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); 1661 tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL,
1662 &tun_proto);
1659 if (!tfile) 1663 if (!tfile)
1660 return -ENOMEM; 1664 return -ENOMEM;
1661 atomic_set(&tfile->count, 0); 1665 atomic_set(&tfile->count, 0);
1662 tfile->tun = NULL; 1666 tfile->tun = NULL;
1663 tfile->net = get_net(current->nsproxy->net_ns); 1667 tfile->net = get_net(current->nsproxy->net_ns);
1668 tfile->flags = 0;
1669
1670 rcu_assign_pointer(tfile->socket.wq, &tfile->wq);
1671 init_waitqueue_head(&tfile->wq.wait);
1672
1673 tfile->socket.file = file;
1674 tfile->socket.ops = &tun_socket_ops;
1675
1676 sock_init_data(&tfile->socket, &tfile->sk);
1677 sk_change_net(&tfile->sk, tfile->net);
1678
1679 tfile->sk.sk_write_space = tun_sock_write_space;
1680 tfile->sk.sk_sndbuf = INT_MAX;
1681
1664 file->private_data = tfile; 1682 file->private_data = tfile;
1683 set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags);
1684
1665 return 0; 1685 return 0;
1666} 1686}
1667 1687
@@ -1669,6 +1689,7 @@ static int tun_chr_close(struct inode *inode, struct file *file)
1669{ 1689{
1670 struct tun_file *tfile = file->private_data; 1690 struct tun_file *tfile = file->private_data;
1671 struct tun_struct *tun; 1691 struct tun_struct *tun;
1692 struct net *net = tfile->net;
1672 1693
1673 tun = __tun_get(tfile); 1694 tun = __tun_get(tfile);
1674 if (tun) { 1695 if (tun) {
@@ -1685,14 +1706,16 @@ static int tun_chr_close(struct inode *inode, struct file *file)
1685 unregister_netdevice(dev); 1706 unregister_netdevice(dev);
1686 rtnl_unlock(); 1707 rtnl_unlock();
1687 } 1708 }
1688 }
1689 1709
1690 tun = tfile->tun; 1710 /* drop the reference that netdevice holds */
1691 if (tun) 1711 sock_put(&tfile->sk);
1692 sock_put(tun->socket.sk); 1712 }
1693 1713
1694 put_net(tfile->net); 1714 /* drop the reference that file holds */
1695 kfree(tfile); 1715 BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
1716 &tfile->socket.flags));
1717 sk_release_kernel(&tfile->sk);
1718 put_net(net);
1696 1719
1697 return 0; 1720 return 0;
1698} 1721}
@@ -1820,13 +1843,14 @@ static void tun_cleanup(void)
1820struct socket *tun_get_socket(struct file *file) 1843struct socket *tun_get_socket(struct file *file)
1821{ 1844{
1822 struct tun_struct *tun; 1845 struct tun_struct *tun;
1846 struct tun_file *tfile = file->private_data;
1823 if (file->f_op != &tun_fops) 1847 if (file->f_op != &tun_fops)
1824 return ERR_PTR(-EINVAL); 1848 return ERR_PTR(-EINVAL);
1825 tun = tun_get(file); 1849 tun = tun_get(file);
1826 if (!tun) 1850 if (!tun)
1827 return ERR_PTR(-EBADFD); 1851 return ERR_PTR(-EBADFD);
1828 tun_put(tun); 1852 tun_put(tun);
1829 return &tun->socket; 1853 return &tfile->socket;
1830} 1854}
1831EXPORT_SYMBOL_GPL(tun_get_socket); 1855EXPORT_SYMBOL_GPL(tun_get_socket);
1832 1856