aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c528
1 files changed, 337 insertions, 191 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 09fea31d3e36..4825c52924bf 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -63,6 +63,8 @@
63#include <linux/virtio_net.h> 63#include <linux/virtio_net.h>
64#include <net/net_namespace.h> 64#include <net/net_namespace.h>
65#include <net/netns/generic.h> 65#include <net/netns/generic.h>
66#include <net/rtnetlink.h>
67#include <net/sock.h>
66 68
67#include <asm/system.h> 69#include <asm/system.h>
68#include <asm/uaccess.h> 70#include <asm/uaccess.h>
@@ -87,26 +89,127 @@ struct tap_filter {
87 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; 89 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
88}; 90};
89 91
92struct tun_file {
93 atomic_t count;
94 struct tun_struct *tun;
95 struct net *net;
96 wait_queue_head_t read_wait;
97};
98
99struct tun_sock;
100
90struct tun_struct { 101struct tun_struct {
91 struct list_head list; 102 struct tun_file *tfile;
92 unsigned int flags; 103 unsigned int flags;
93 int attached;
94 uid_t owner; 104 uid_t owner;
95 gid_t group; 105 gid_t group;
96 106
97 wait_queue_head_t read_wait;
98 struct sk_buff_head readq; 107 struct sk_buff_head readq;
99 108
100 struct net_device *dev; 109 struct net_device *dev;
101 struct fasync_struct *fasync; 110 struct fasync_struct *fasync;
102 111
103 struct tap_filter txflt; 112 struct tap_filter txflt;
113 struct sock *sk;
114 struct socket socket;
104 115
105#ifdef TUN_DEBUG 116#ifdef TUN_DEBUG
106 int debug; 117 int debug;
107#endif 118#endif
108}; 119};
109 120
121struct tun_sock {
122 struct sock sk;
123 struct tun_struct *tun;
124};
125
126static inline struct tun_sock *tun_sk(struct sock *sk)
127{
128 return container_of(sk, struct tun_sock, sk);
129}
130
131static int tun_attach(struct tun_struct *tun, struct file *file)
132{
133 struct tun_file *tfile = file->private_data;
134 const struct cred *cred = current_cred();
135 int err;
136
137 ASSERT_RTNL();
138
139 /* Check permissions */
140 if (((tun->owner != -1 && cred->euid != tun->owner) ||
141 (tun->group != -1 && !in_egroup_p(tun->group))) &&
142 !capable(CAP_NET_ADMIN))
143 return -EPERM;
144
145 netif_tx_lock_bh(tun->dev);
146
147 err = -EINVAL;
148 if (tfile->tun)
149 goto out;
150
151 err = -EBUSY;
152 if (tun->tfile)
153 goto out;
154
155 err = 0;
156 tfile->tun = tun;
157 tun->tfile = tfile;
158 dev_hold(tun->dev);
159 atomic_inc(&tfile->count);
160
161out:
162 netif_tx_unlock_bh(tun->dev);
163 return err;
164}
165
166static void __tun_detach(struct tun_struct *tun)
167{
168 struct tun_file *tfile = tun->tfile;
169
170 /* Detach from net device */
171 netif_tx_lock_bh(tun->dev);
172 tfile->tun = NULL;
173 tun->tfile = NULL;
174 netif_tx_unlock_bh(tun->dev);
175
176 /* Drop read queue */
177 skb_queue_purge(&tun->readq);
178
179 /* Drop the extra count on the net device */
180 dev_put(tun->dev);
181}
182
183static void tun_detach(struct tun_struct *tun)
184{
185 rtnl_lock();
186 __tun_detach(tun);
187 rtnl_unlock();
188}
189
190static struct tun_struct *__tun_get(struct tun_file *tfile)
191{
192 struct tun_struct *tun = NULL;
193
194 if (atomic_inc_not_zero(&tfile->count))
195 tun = tfile->tun;
196
197 return tun;
198}
199
200static struct tun_struct *tun_get(struct file *file)
201{
202 return __tun_get(file->private_data);
203}
204
205static void tun_put(struct tun_struct *tun)
206{
207 struct tun_file *tfile = tun->tfile;
208
209 if (atomic_dec_and_test(&tfile->count))
210 tun_detach(tfile->tun);
211}
212
110/* TAP filterting */ 213/* TAP filterting */
111static void addr_hash_set(u32 *mask, const u8 *addr) 214static void addr_hash_set(u32 *mask, const u8 *addr)
112{ 215{
@@ -219,13 +322,23 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
219 322
220/* Network device part of the driver */ 323/* Network device part of the driver */
221 324
222static int tun_net_id;
223struct tun_net {
224 struct list_head dev_list;
225};
226
227static const struct ethtool_ops tun_ethtool_ops; 325static const struct ethtool_ops tun_ethtool_ops;
228 326
327/* Net device detach from fd. */
328static void tun_net_uninit(struct net_device *dev)
329{
330 struct tun_struct *tun = netdev_priv(dev);
331 struct tun_file *tfile = tun->tfile;
332
333 /* Inform the methods they need to stop using the dev.
334 */
335 if (tfile) {
336 wake_up_all(&tfile->read_wait);
337 if (atomic_dec_and_test(&tfile->count))
338 __tun_detach(tun);
339 }
340}
341
229/* Net device open. */ 342/* Net device open. */
230static int tun_net_open(struct net_device *dev) 343static int tun_net_open(struct net_device *dev)
231{ 344{
@@ -248,7 +361,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
248 DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len); 361 DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
249 362
250 /* Drop packet if interface is not attached */ 363 /* Drop packet if interface is not attached */
251 if (!tun->attached) 364 if (!tun->tfile)
252 goto drop; 365 goto drop;
253 366
254 /* Drop if the filter does not like it. 367 /* Drop if the filter does not like it.
@@ -280,7 +393,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
280 /* Notify and wake up reader process */ 393 /* Notify and wake up reader process */
281 if (tun->flags & TUN_FASYNC) 394 if (tun->flags & TUN_FASYNC)
282 kill_fasync(&tun->fasync, SIGIO, POLL_IN); 395 kill_fasync(&tun->fasync, SIGIO, POLL_IN);
283 wake_up_interruptible(&tun->read_wait); 396 wake_up_interruptible(&tun->tfile->read_wait);
284 return 0; 397 return 0;
285 398
286drop: 399drop:
@@ -312,6 +425,7 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu)
312} 425}
313 426
314static const struct net_device_ops tun_netdev_ops = { 427static const struct net_device_ops tun_netdev_ops = {
428 .ndo_uninit = tun_net_uninit,
315 .ndo_open = tun_net_open, 429 .ndo_open = tun_net_open,
316 .ndo_stop = tun_net_close, 430 .ndo_stop = tun_net_close,
317 .ndo_start_xmit = tun_net_xmit, 431 .ndo_start_xmit = tun_net_xmit,
@@ -319,6 +433,7 @@ static const struct net_device_ops tun_netdev_ops = {
319}; 433};
320 434
321static const struct net_device_ops tap_netdev_ops = { 435static const struct net_device_ops tap_netdev_ops = {
436 .ndo_uninit = tun_net_uninit,
322 .ndo_open = tun_net_open, 437 .ndo_open = tun_net_open,
323 .ndo_stop = tun_net_close, 438 .ndo_stop = tun_net_close,
324 .ndo_start_xmit = tun_net_xmit, 439 .ndo_start_xmit = tun_net_xmit,
@@ -365,86 +480,66 @@ static void tun_net_init(struct net_device *dev)
365/* Poll */ 480/* Poll */
366static unsigned int tun_chr_poll(struct file *file, poll_table * wait) 481static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
367{ 482{
368 struct tun_struct *tun = file->private_data; 483 struct tun_file *tfile = file->private_data;
369 unsigned int mask = POLLOUT | POLLWRNORM; 484 struct tun_struct *tun = __tun_get(tfile);
485 struct sock *sk = tun->sk;
486 unsigned int mask = 0;
370 487
371 if (!tun) 488 if (!tun)
372 return -EBADFD; 489 return POLLERR;
373 490
374 DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); 491 DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
375 492
376 poll_wait(file, &tun->read_wait, wait); 493 poll_wait(file, &tfile->read_wait, wait);
377 494
378 if (!skb_queue_empty(&tun->readq)) 495 if (!skb_queue_empty(&tun->readq))
379 mask |= POLLIN | POLLRDNORM; 496 mask |= POLLIN | POLLRDNORM;
380 497
498 if (sock_writeable(sk) ||
499 (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
500 sock_writeable(sk)))
501 mask |= POLLOUT | POLLWRNORM;
502
503 if (tun->dev->reg_state != NETREG_REGISTERED)
504 mask = POLLERR;
505
506 tun_put(tun);
381 return mask; 507 return mask;
382} 508}
383 509
384/* prepad is the amount to reserve at front. len is length after that. 510/* prepad is the amount to reserve at front. len is length after that.
385 * linear is a hint as to how much to copy (usually headers). */ 511 * linear is a hint as to how much to copy (usually headers). */
386static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear, 512static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
387 gfp_t gfp) 513 size_t prepad, size_t len,
514 size_t linear, int noblock)
388{ 515{
516 struct sock *sk = tun->sk;
389 struct sk_buff *skb; 517 struct sk_buff *skb;
390 unsigned int i; 518 int err;
391
392 skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN);
393 if (skb) {
394 skb_reserve(skb, prepad);
395 skb_put(skb, len);
396 return skb;
397 }
398 519
399 /* Under a page? Don't bother with paged skb. */ 520 /* Under a page? Don't bother with paged skb. */
400 if (prepad + len < PAGE_SIZE) 521 if (prepad + len < PAGE_SIZE)
401 return NULL; 522 linear = len;
402 523
403 /* Start with a normal skb, and add pages. */ 524 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
404 skb = alloc_skb(prepad + linear, gfp); 525 &err);
405 if (!skb) 526 if (!skb)
406 return NULL; 527 return ERR_PTR(err);
407 528
408 skb_reserve(skb, prepad); 529 skb_reserve(skb, prepad);
409 skb_put(skb, linear); 530 skb_put(skb, linear);
410 531 skb->data_len = len - linear;
411 len -= linear; 532 skb->len += len - linear;
412
413 for (i = 0; i < MAX_SKB_FRAGS; i++) {
414 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
415
416 f->page = alloc_page(gfp|__GFP_ZERO);
417 if (!f->page)
418 break;
419
420 f->page_offset = 0;
421 f->size = PAGE_SIZE;
422
423 skb->data_len += PAGE_SIZE;
424 skb->len += PAGE_SIZE;
425 skb->truesize += PAGE_SIZE;
426 skb_shinfo(skb)->nr_frags++;
427
428 if (len < PAGE_SIZE) {
429 len = 0;
430 break;
431 }
432 len -= PAGE_SIZE;
433 }
434
435 /* Too large, or alloc fail? */
436 if (unlikely(len)) {
437 kfree_skb(skb);
438 skb = NULL;
439 }
440 533
441 return skb; 534 return skb;
442} 535}
443 536
444/* Get packet from user space buffer */ 537/* Get packet from user space buffer */
445static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count) 538static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
539 struct iovec *iv, size_t count,
540 int noblock)
446{ 541{
447 struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) }; 542 struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
448 struct sk_buff *skb; 543 struct sk_buff *skb;
449 size_t len = count, align = 0; 544 size_t len = count, align = 0;
450 struct virtio_net_hdr gso = { 0 }; 545 struct virtio_net_hdr gso = { 0 };
@@ -474,9 +569,11 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
474 return -EINVAL; 569 return -EINVAL;
475 } 570 }
476 571
477 if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) { 572 skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
478 tun->dev->stats.rx_dropped++; 573 if (IS_ERR(skb)) {
479 return -ENOMEM; 574 if (PTR_ERR(skb) != -EAGAIN)
575 tun->dev->stats.rx_dropped++;
576 return PTR_ERR(skb);
480 } 577 }
481 578
482 if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) { 579 if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
@@ -562,14 +659,20 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
562static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, 659static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
563 unsigned long count, loff_t pos) 660 unsigned long count, loff_t pos)
564{ 661{
565 struct tun_struct *tun = iocb->ki_filp->private_data; 662 struct file *file = iocb->ki_filp;
663 struct tun_struct *tun = file->private_data;
664 ssize_t result;
566 665
567 if (!tun) 666 if (!tun)
568 return -EBADFD; 667 return -EBADFD;
569 668
570 DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count); 669 DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
571 670
572 return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count)); 671 result = tun_get_user(tun, (struct iovec *)iv, iov_length(iv, count),
672 file->f_flags & O_NONBLOCK);
673
674 tun_put(tun);
675 return result;
573} 676}
574 677
575/* Put packet to the user space buffer */ 678/* Put packet to the user space buffer */
@@ -642,7 +745,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
642 unsigned long count, loff_t pos) 745 unsigned long count, loff_t pos)
643{ 746{
644 struct file *file = iocb->ki_filp; 747 struct file *file = iocb->ki_filp;
645 struct tun_struct *tun = file->private_data; 748 struct tun_file *tfile = file->private_data;
749 struct tun_struct *tun = __tun_get(tfile);
646 DECLARE_WAITQUEUE(wait, current); 750 DECLARE_WAITQUEUE(wait, current);
647 struct sk_buff *skb; 751 struct sk_buff *skb;
648 ssize_t len, ret = 0; 752 ssize_t len, ret = 0;
@@ -653,10 +757,12 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
653 DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); 757 DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
654 758
655 len = iov_length(iv, count); 759 len = iov_length(iv, count);
656 if (len < 0) 760 if (len < 0) {
657 return -EINVAL; 761 ret = -EINVAL;
762 goto out;
763 }
658 764
659 add_wait_queue(&tun->read_wait, &wait); 765 add_wait_queue(&tfile->read_wait, &wait);
660 while (len) { 766 while (len) {
661 current->state = TASK_INTERRUPTIBLE; 767 current->state = TASK_INTERRUPTIBLE;
662 768
@@ -670,6 +776,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
670 ret = -ERESTARTSYS; 776 ret = -ERESTARTSYS;
671 break; 777 break;
672 } 778 }
779 if (tun->dev->reg_state != NETREG_REGISTERED) {
780 ret = -EIO;
781 break;
782 }
673 783
674 /* Nothing to read, let's sleep */ 784 /* Nothing to read, let's sleep */
675 schedule(); 785 schedule();
@@ -683,8 +793,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
683 } 793 }
684 794
685 current->state = TASK_RUNNING; 795 current->state = TASK_RUNNING;
686 remove_wait_queue(&tun->read_wait, &wait); 796 remove_wait_queue(&tfile->read_wait, &wait);
687 797
798out:
799 tun_put(tun);
688 return ret; 800 return ret;
689} 801}
690 802
@@ -693,54 +805,78 @@ static void tun_setup(struct net_device *dev)
693 struct tun_struct *tun = netdev_priv(dev); 805 struct tun_struct *tun = netdev_priv(dev);
694 806
695 skb_queue_head_init(&tun->readq); 807 skb_queue_head_init(&tun->readq);
696 init_waitqueue_head(&tun->read_wait);
697 808
698 tun->owner = -1; 809 tun->owner = -1;
699 tun->group = -1; 810 tun->group = -1;
700 811
701 dev->ethtool_ops = &tun_ethtool_ops; 812 dev->ethtool_ops = &tun_ethtool_ops;
702 dev->destructor = free_netdev; 813 dev->destructor = free_netdev;
703 dev->features |= NETIF_F_NETNS_LOCAL;
704} 814}
705 815
706static struct tun_struct *tun_get_by_name(struct tun_net *tn, const char *name) 816/* Trivial set of netlink ops to allow deleting tun or tap
817 * device with netlink.
818 */
819static int tun_validate(struct nlattr *tb[], struct nlattr *data[])
820{
821 return -EINVAL;
822}
823
824static struct rtnl_link_ops tun_link_ops __read_mostly = {
825 .kind = DRV_NAME,
826 .priv_size = sizeof(struct tun_struct),
827 .setup = tun_setup,
828 .validate = tun_validate,
829};
830
831static void tun_sock_write_space(struct sock *sk)
707{ 832{
708 struct tun_struct *tun; 833 struct tun_struct *tun;
709 834
710 ASSERT_RTNL(); 835 if (!sock_writeable(sk))
711 list_for_each_entry(tun, &tn->dev_list, list) { 836 return;
712 if (!strncmp(tun->dev->name, name, IFNAMSIZ)) 837
713 return tun; 838 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
714 } 839 wake_up_interruptible_sync(sk->sk_sleep);
840
841 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
842 return;
715 843
716 return NULL; 844 tun = container_of(sk, struct tun_sock, sk)->tun;
845 kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
717} 846}
718 847
848static void tun_sock_destruct(struct sock *sk)
849{
850 dev_put(container_of(sk, struct tun_sock, sk)->tun->dev);
851}
852
853static struct proto tun_proto = {
854 .name = "tun",
855 .owner = THIS_MODULE,
856 .obj_size = sizeof(struct tun_sock),
857};
858
719static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) 859static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
720{ 860{
721 struct tun_net *tn; 861 struct sock *sk;
722 struct tun_struct *tun; 862 struct tun_struct *tun;
723 struct net_device *dev; 863 struct net_device *dev;
724 const struct cred *cred = current_cred(); 864 struct tun_file *tfile = file->private_data;
725 int err; 865 int err;
726 866
727 tn = net_generic(net, tun_net_id); 867 dev = __dev_get_by_name(net, ifr->ifr_name);
728 tun = tun_get_by_name(tn, ifr->ifr_name); 868 if (dev) {
729 if (tun) { 869 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
730 if (tun->attached) 870 tun = netdev_priv(dev);
731 return -EBUSY; 871 else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
732 872 tun = netdev_priv(dev);
733 /* Check permissions */ 873 else
734 if (((tun->owner != -1 && 874 return -EINVAL;
735 cred->euid != tun->owner) || 875
736 (tun->group != -1 && 876 err = tun_attach(tun, file);
737 cred->egid != tun->group)) && 877 if (err < 0)
738 !capable(CAP_NET_ADMIN)) { 878 return err;
739 return -EPERM;
740 }
741 } 879 }
742 else if (__dev_get_by_name(net, ifr->ifr_name))
743 return -EINVAL;
744 else { 880 else {
745 char *name; 881 char *name;
746 unsigned long flags = 0; 882 unsigned long flags = 0;
@@ -771,25 +907,45 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
771 return -ENOMEM; 907 return -ENOMEM;
772 908
773 dev_net_set(dev, net); 909 dev_net_set(dev, net);
910 dev->rtnl_link_ops = &tun_link_ops;
774 911
775 tun = netdev_priv(dev); 912 tun = netdev_priv(dev);
776 tun->dev = dev; 913 tun->dev = dev;
777 tun->flags = flags; 914 tun->flags = flags;
778 tun->txflt.count = 0; 915 tun->txflt.count = 0;
779 916
917 err = -ENOMEM;
918 sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
919 if (!sk)
920 goto err_free_dev;
921
922 /* This ref count is for tun->sk. */
923 dev_hold(dev);
924 sock_init_data(&tun->socket, sk);
925 sk->sk_write_space = tun_sock_write_space;
926 sk->sk_destruct = tun_sock_destruct;
927 sk->sk_sndbuf = INT_MAX;
928 sk->sk_sleep = &tfile->read_wait;
929
930 tun->sk = sk;
931 container_of(sk, struct tun_sock, sk)->tun = tun;
932
780 tun_net_init(dev); 933 tun_net_init(dev);
781 934
782 if (strchr(dev->name, '%')) { 935 if (strchr(dev->name, '%')) {
783 err = dev_alloc_name(dev, dev->name); 936 err = dev_alloc_name(dev, dev->name);
784 if (err < 0) 937 if (err < 0)
785 goto err_free_dev; 938 goto err_free_sk;
786 } 939 }
787 940
941 err = -EINVAL;
788 err = register_netdevice(tun->dev); 942 err = register_netdevice(tun->dev);
789 if (err < 0) 943 if (err < 0)
790 goto err_free_dev; 944 goto err_free_dev;
791 945
792 list_add(&tun->list, &tn->dev_list); 946 err = tun_attach(tun, file);
947 if (err < 0)
948 goto err_free_dev;
793 } 949 }
794 950
795 DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name); 951 DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
@@ -809,10 +965,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
809 else 965 else
810 tun->flags &= ~TUN_VNET_HDR; 966 tun->flags &= ~TUN_VNET_HDR;
811 967
812 file->private_data = tun;
813 tun->attached = 1;
814 get_net(dev_net(tun->dev));
815
816 /* Make sure persistent devices do not get stuck in 968 /* Make sure persistent devices do not get stuck in
817 * xoff state. 969 * xoff state.
818 */ 970 */
@@ -822,6 +974,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
822 strcpy(ifr->ifr_name, tun->dev->name); 974 strcpy(ifr->ifr_name, tun->dev->name);
823 return 0; 975 return 0;
824 976
977 err_free_sk:
978 sock_put(sk);
825 err_free_dev: 979 err_free_dev:
826 free_netdev(dev); 980 free_netdev(dev);
827 failed: 981 failed:
@@ -830,7 +984,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
830 984
831static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr) 985static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
832{ 986{
833 struct tun_struct *tun = file->private_data; 987 struct tun_struct *tun = tun_get(file);
834 988
835 if (!tun) 989 if (!tun)
836 return -EBADFD; 990 return -EBADFD;
@@ -855,6 +1009,7 @@ static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
855 if (tun->flags & TUN_VNET_HDR) 1009 if (tun->flags & TUN_VNET_HDR)
856 ifr->ifr_flags |= IFF_VNET_HDR; 1010 ifr->ifr_flags |= IFF_VNET_HDR;
857 1011
1012 tun_put(tun);
858 return 0; 1013 return 0;
859} 1014}
860 1015
@@ -901,22 +1056,34 @@ static int set_offload(struct net_device *dev, unsigned long arg)
901static int tun_chr_ioctl(struct inode *inode, struct file *file, 1056static int tun_chr_ioctl(struct inode *inode, struct file *file,
902 unsigned int cmd, unsigned long arg) 1057 unsigned int cmd, unsigned long arg)
903{ 1058{
904 struct tun_struct *tun = file->private_data; 1059 struct tun_file *tfile = file->private_data;
1060 struct tun_struct *tun;
905 void __user* argp = (void __user*)arg; 1061 void __user* argp = (void __user*)arg;
906 struct ifreq ifr; 1062 struct ifreq ifr;
1063 int sndbuf;
907 int ret; 1064 int ret;
908 1065
909 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) 1066 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
910 if (copy_from_user(&ifr, argp, sizeof ifr)) 1067 if (copy_from_user(&ifr, argp, sizeof ifr))
911 return -EFAULT; 1068 return -EFAULT;
912 1069
1070 if (cmd == TUNGETFEATURES) {
1071 /* Currently this just means: "what IFF flags are valid?".
1072 * This is needed because we never checked for invalid flags on
1073 * TUNSETIFF. */
1074 return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
1075 IFF_VNET_HDR,
1076 (unsigned int __user*)argp);
1077 }
1078
1079 tun = __tun_get(tfile);
913 if (cmd == TUNSETIFF && !tun) { 1080 if (cmd == TUNSETIFF && !tun) {
914 int err; 1081 int err;
915 1082
916 ifr.ifr_name[IFNAMSIZ-1] = '\0'; 1083 ifr.ifr_name[IFNAMSIZ-1] = '\0';
917 1084
918 rtnl_lock(); 1085 rtnl_lock();
919 err = tun_set_iff(current->nsproxy->net_ns, file, &ifr); 1086 err = tun_set_iff(tfile->net, file, &ifr);
920 rtnl_unlock(); 1087 rtnl_unlock();
921 1088
922 if (err) 1089 if (err)
@@ -927,28 +1094,21 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
927 return 0; 1094 return 0;
928 } 1095 }
929 1096
930 if (cmd == TUNGETFEATURES) {
931 /* Currently this just means: "what IFF flags are valid?".
932 * This is needed because we never checked for invalid flags on
933 * TUNSETIFF. */
934 return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
935 IFF_VNET_HDR,
936 (unsigned int __user*)argp);
937 }
938 1097
939 if (!tun) 1098 if (!tun)
940 return -EBADFD; 1099 return -EBADFD;
941 1100
942 DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd); 1101 DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
943 1102
1103 ret = 0;
944 switch (cmd) { 1104 switch (cmd) {
945 case TUNGETIFF: 1105 case TUNGETIFF:
946 ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr); 1106 ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr);
947 if (ret) 1107 if (ret)
948 return ret; 1108 break;
949 1109
950 if (copy_to_user(argp, &ifr, sizeof(ifr))) 1110 if (copy_to_user(argp, &ifr, sizeof(ifr)))
951 return -EFAULT; 1111 ret = -EFAULT;
952 break; 1112 break;
953 1113
954 case TUNSETNOCSUM: 1114 case TUNSETNOCSUM:
@@ -1000,7 +1160,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
1000 ret = 0; 1160 ret = 0;
1001 } 1161 }
1002 rtnl_unlock(); 1162 rtnl_unlock();
1003 return ret; 1163 break;
1004 1164
1005#ifdef TUN_DEBUG 1165#ifdef TUN_DEBUG
1006 case TUNSETDEBUG: 1166 case TUNSETDEBUG:
@@ -1011,24 +1171,25 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
1011 rtnl_lock(); 1171 rtnl_lock();
1012 ret = set_offload(tun->dev, arg); 1172 ret = set_offload(tun->dev, arg);
1013 rtnl_unlock(); 1173 rtnl_unlock();
1014 return ret; 1174 break;
1015 1175
1016 case TUNSETTXFILTER: 1176 case TUNSETTXFILTER:
1017 /* Can be set only for TAPs */ 1177 /* Can be set only for TAPs */
1178 ret = -EINVAL;
1018 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) 1179 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1019 return -EINVAL; 1180 break;
1020 rtnl_lock(); 1181 rtnl_lock();
1021 ret = update_filter(&tun->txflt, (void __user *)arg); 1182 ret = update_filter(&tun->txflt, (void __user *)arg);
1022 rtnl_unlock(); 1183 rtnl_unlock();
1023 return ret; 1184 break;
1024 1185
1025 case SIOCGIFHWADDR: 1186 case SIOCGIFHWADDR:
1026 /* Get hw addres */ 1187 /* Get hw addres */
1027 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); 1188 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
1028 ifr.ifr_hwaddr.sa_family = tun->dev->type; 1189 ifr.ifr_hwaddr.sa_family = tun->dev->type;
1029 if (copy_to_user(argp, &ifr, sizeof ifr)) 1190 if (copy_to_user(argp, &ifr, sizeof ifr))
1030 return -EFAULT; 1191 ret = -EFAULT;
1031 return 0; 1192 break;
1032 1193
1033 case SIOCSIFHWADDR: 1194 case SIOCSIFHWADDR:
1034 /* Set hw address */ 1195 /* Set hw address */
@@ -1038,18 +1199,35 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
1038 rtnl_lock(); 1199 rtnl_lock();
1039 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); 1200 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
1040 rtnl_unlock(); 1201 rtnl_unlock();
1041 return ret; 1202 break;
1203
1204 case TUNGETSNDBUF:
1205 sndbuf = tun->sk->sk_sndbuf;
1206 if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
1207 ret = -EFAULT;
1208 break;
1209
1210 case TUNSETSNDBUF:
1211 if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
1212 ret = -EFAULT;
1213 break;
1214 }
1215
1216 tun->sk->sk_sndbuf = sndbuf;
1217 break;
1042 1218
1043 default: 1219 default:
1044 return -EINVAL; 1220 ret = -EINVAL;
1221 break;
1045 }; 1222 };
1046 1223
1047 return 0; 1224 tun_put(tun);
1225 return ret;
1048} 1226}
1049 1227
1050static int tun_chr_fasync(int fd, struct file *file, int on) 1228static int tun_chr_fasync(int fd, struct file *file, int on)
1051{ 1229{
1052 struct tun_struct *tun = file->private_data; 1230 struct tun_struct *tun = tun_get(file);
1053 int ret; 1231 int ret;
1054 1232
1055 if (!tun) 1233 if (!tun)
@@ -1071,42 +1249,50 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
1071 ret = 0; 1249 ret = 0;
1072out: 1250out:
1073 unlock_kernel(); 1251 unlock_kernel();
1252 tun_put(tun);
1074 return ret; 1253 return ret;
1075} 1254}
1076 1255
1077static int tun_chr_open(struct inode *inode, struct file * file) 1256static int tun_chr_open(struct inode *inode, struct file * file)
1078{ 1257{
1258 struct tun_file *tfile;
1079 cycle_kernel_lock(); 1259 cycle_kernel_lock();
1080 DBG1(KERN_INFO "tunX: tun_chr_open\n"); 1260 DBG1(KERN_INFO "tunX: tun_chr_open\n");
1081 file->private_data = NULL; 1261
1262 tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
1263 if (!tfile)
1264 return -ENOMEM;
1265 atomic_set(&tfile->count, 0);
1266 tfile->tun = NULL;
1267 tfile->net = get_net(current->nsproxy->net_ns);
1268 init_waitqueue_head(&tfile->read_wait);
1269 file->private_data = tfile;
1082 return 0; 1270 return 0;
1083} 1271}
1084 1272
1085static int tun_chr_close(struct inode *inode, struct file *file) 1273static int tun_chr_close(struct inode *inode, struct file *file)
1086{ 1274{
1087 struct tun_struct *tun = file->private_data; 1275 struct tun_file *tfile = file->private_data;
1088 1276 struct tun_struct *tun = __tun_get(tfile);
1089 if (!tun)
1090 return 0;
1091 1277
1092 DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
1093 1278
1094 rtnl_lock(); 1279 if (tun) {
1280 DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
1095 1281
1096 /* Detach from net device */ 1282 rtnl_lock();
1097 file->private_data = NULL; 1283 __tun_detach(tun);
1098 tun->attached = 0;
1099 put_net(dev_net(tun->dev));
1100 1284
1101 /* Drop read queue */ 1285 /* If desireable, unregister the netdevice. */
1102 skb_queue_purge(&tun->readq); 1286 if (!(tun->flags & TUN_PERSIST)) {
1287 sock_put(tun->sk);
1288 unregister_netdevice(tun->dev);
1289 }
1103 1290
1104 if (!(tun->flags & TUN_PERSIST)) { 1291 rtnl_unlock();
1105 list_del(&tun->list);
1106 unregister_netdevice(tun->dev);
1107 } 1292 }
1108 1293
1109 rtnl_unlock(); 1294 put_net(tfile->net);
1295 kfree(tfile);
1110 1296
1111 return 0; 1297 return 0;
1112} 1298}
@@ -1187,7 +1373,7 @@ static void tun_set_msglevel(struct net_device *dev, u32 value)
1187static u32 tun_get_link(struct net_device *dev) 1373static u32 tun_get_link(struct net_device *dev)
1188{ 1374{
1189 struct tun_struct *tun = netdev_priv(dev); 1375 struct tun_struct *tun = netdev_priv(dev);
1190 return tun->attached; 1376 return !!tun->tfile;
1191} 1377}
1192 1378
1193static u32 tun_get_rx_csum(struct net_device *dev) 1379static u32 tun_get_rx_csum(struct net_device *dev)
@@ -1216,45 +1402,6 @@ static const struct ethtool_ops tun_ethtool_ops = {
1216 .set_rx_csum = tun_set_rx_csum 1402 .set_rx_csum = tun_set_rx_csum
1217}; 1403};
1218 1404
1219static int tun_init_net(struct net *net)
1220{
1221 struct tun_net *tn;
1222
1223 tn = kmalloc(sizeof(*tn), GFP_KERNEL);
1224 if (tn == NULL)
1225 return -ENOMEM;
1226
1227 INIT_LIST_HEAD(&tn->dev_list);
1228
1229 if (net_assign_generic(net, tun_net_id, tn)) {
1230 kfree(tn);
1231 return -ENOMEM;
1232 }
1233
1234 return 0;
1235}
1236
1237static void tun_exit_net(struct net *net)
1238{
1239 struct tun_net *tn;
1240 struct tun_struct *tun, *nxt;
1241
1242 tn = net_generic(net, tun_net_id);
1243
1244 rtnl_lock();
1245 list_for_each_entry_safe(tun, nxt, &tn->dev_list, list) {
1246 DBG(KERN_INFO "%s cleaned up\n", tun->dev->name);
1247 unregister_netdevice(tun->dev);
1248 }
1249 rtnl_unlock();
1250
1251 kfree(tn);
1252}
1253
1254static struct pernet_operations tun_net_ops = {
1255 .init = tun_init_net,
1256 .exit = tun_exit_net,
1257};
1258 1405
1259static int __init tun_init(void) 1406static int __init tun_init(void)
1260{ 1407{
@@ -1263,10 +1410,10 @@ static int __init tun_init(void)
1263 printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); 1410 printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
1264 printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT); 1411 printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
1265 1412
1266 ret = register_pernet_gen_device(&tun_net_id, &tun_net_ops); 1413 ret = rtnl_link_register(&tun_link_ops);
1267 if (ret) { 1414 if (ret) {
1268 printk(KERN_ERR "tun: Can't register pernet ops\n"); 1415 printk(KERN_ERR "tun: Can't register link_ops\n");
1269 goto err_pernet; 1416 goto err_linkops;
1270 } 1417 }
1271 1418
1272 ret = misc_register(&tun_miscdev); 1419 ret = misc_register(&tun_miscdev);
@@ -1274,18 +1421,17 @@ static int __init tun_init(void)
1274 printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR); 1421 printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR);
1275 goto err_misc; 1422 goto err_misc;
1276 } 1423 }
1277 return 0; 1424 return 0;
1278
1279err_misc: 1425err_misc:
1280 unregister_pernet_gen_device(tun_net_id, &tun_net_ops); 1426 rtnl_link_unregister(&tun_link_ops);
1281err_pernet: 1427err_linkops:
1282 return ret; 1428 return ret;
1283} 1429}
1284 1430
1285static void tun_cleanup(void) 1431static void tun_cleanup(void)
1286{ 1432{
1287 misc_deregister(&tun_miscdev); 1433 misc_deregister(&tun_miscdev);
1288 unregister_pernet_gen_device(tun_net_id, &tun_net_ops); 1434 rtnl_link_unregister(&tun_link_ops);
1289} 1435}
1290 1436
1291module_init(tun_init); 1437module_init(tun_init);