diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 89 |
1 files changed, 54 insertions, 35 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fbd106edbe59..cc09b67c23bc 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -109,11 +109,11 @@ struct tap_filter { | |||
109 | unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; | 109 | unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; |
110 | }; | 110 | }; |
111 | 111 | ||
112 | /* 1024 is probably a high enough limit: modern hypervisors seem to support on | 112 | /* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for |
113 | * the order of 100-200 CPUs so this leaves us some breathing space if we want | 113 | * the netdevice to be fit in one page. So we can make sure the success of |
114 | * to match a queue per guest CPU. | 114 | * memory allocation. TODO: increase the limit. */ |
115 | */ | 115 | #define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES |
116 | #define MAX_TAP_QUEUES 1024 | 116 | #define MAX_TAP_FLOWS 4096 |
117 | 117 | ||
118 | #define TUN_FLOW_EXPIRE (3 * HZ) | 118 | #define TUN_FLOW_EXPIRE (3 * HZ) |
119 | 119 | ||
@@ -185,6 +185,8 @@ struct tun_struct { | |||
185 | unsigned long ageing_time; | 185 | unsigned long ageing_time; |
186 | unsigned int numdisabled; | 186 | unsigned int numdisabled; |
187 | struct list_head disabled; | 187 | struct list_head disabled; |
188 | void *security; | ||
189 | u32 flow_count; | ||
188 | }; | 190 | }; |
189 | 191 | ||
190 | static inline u32 tun_hashfn(u32 rxhash) | 192 | static inline u32 tun_hashfn(u32 rxhash) |
@@ -218,6 +220,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, | |||
218 | e->queue_index = queue_index; | 220 | e->queue_index = queue_index; |
219 | e->tun = tun; | 221 | e->tun = tun; |
220 | hlist_add_head_rcu(&e->hash_link, head); | 222 | hlist_add_head_rcu(&e->hash_link, head); |
223 | ++tun->flow_count; | ||
221 | } | 224 | } |
222 | return e; | 225 | return e; |
223 | } | 226 | } |
@@ -228,6 +231,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) | |||
228 | e->rxhash, e->queue_index); | 231 | e->rxhash, e->queue_index); |
229 | hlist_del_rcu(&e->hash_link); | 232 | hlist_del_rcu(&e->hash_link); |
230 | kfree_rcu(e, rcu); | 233 | kfree_rcu(e, rcu); |
234 | --tun->flow_count; | ||
231 | } | 235 | } |
232 | 236 | ||
233 | static void tun_flow_flush(struct tun_struct *tun) | 237 | static void tun_flow_flush(struct tun_struct *tun) |
@@ -317,7 +321,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, | |||
317 | e->updated = jiffies; | 321 | e->updated = jiffies; |
318 | } else { | 322 | } else { |
319 | spin_lock_bh(&tun->lock); | 323 | spin_lock_bh(&tun->lock); |
320 | if (!tun_flow_find(head, rxhash)) | 324 | if (!tun_flow_find(head, rxhash) && |
325 | tun->flow_count < MAX_TAP_FLOWS) | ||
321 | tun_flow_create(tun, head, rxhash, queue_index); | 326 | tun_flow_create(tun, head, rxhash, queue_index); |
322 | 327 | ||
323 | if (!timer_pending(&tun->flow_gc_timer)) | 328 | if (!timer_pending(&tun->flow_gc_timer)) |
@@ -404,8 +409,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) | |||
404 | struct tun_struct *tun; | 409 | struct tun_struct *tun; |
405 | struct net_device *dev; | 410 | struct net_device *dev; |
406 | 411 | ||
407 | tun = rcu_dereference_protected(tfile->tun, | 412 | tun = rtnl_dereference(tfile->tun); |
408 | lockdep_rtnl_is_held()); | 413 | |
409 | if (tun) { | 414 | if (tun) { |
410 | u16 index = tfile->queue_index; | 415 | u16 index = tfile->queue_index; |
411 | BUG_ON(index >= tun->numqueues); | 416 | BUG_ON(index >= tun->numqueues); |
@@ -414,8 +419,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) | |||
414 | rcu_assign_pointer(tun->tfiles[index], | 419 | rcu_assign_pointer(tun->tfiles[index], |
415 | tun->tfiles[tun->numqueues - 1]); | 420 | tun->tfiles[tun->numqueues - 1]); |
416 | rcu_assign_pointer(tfile->tun, NULL); | 421 | rcu_assign_pointer(tfile->tun, NULL); |
417 | ntfile = rcu_dereference_protected(tun->tfiles[index], | 422 | ntfile = rtnl_dereference(tun->tfiles[index]); |
418 | lockdep_rtnl_is_held()); | ||
419 | ntfile->queue_index = index; | 423 | ntfile->queue_index = index; |
420 | 424 | ||
421 | --tun->numqueues; | 425 | --tun->numqueues; |
@@ -429,8 +433,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) | |||
429 | /* Drop read queue */ | 433 | /* Drop read queue */ |
430 | skb_queue_purge(&tfile->sk.sk_receive_queue); | 434 | skb_queue_purge(&tfile->sk.sk_receive_queue); |
431 | tun_set_real_num_queues(tun); | 435 | tun_set_real_num_queues(tun); |
432 | } else if (tfile->detached && clean) | 436 | } else if (tfile->detached && clean) { |
433 | tun = tun_enable_queue(tfile); | 437 | tun = tun_enable_queue(tfile); |
438 | sock_put(&tfile->sk); | ||
439 | } | ||
434 | 440 | ||
435 | if (clean) { | 441 | if (clean) { |
436 | if (tun && tun->numqueues == 0 && tun->numdisabled == 0 && | 442 | if (tun && tun->numqueues == 0 && tun->numdisabled == 0 && |
@@ -458,8 +464,7 @@ static void tun_detach_all(struct net_device *dev) | |||
458 | int i, n = tun->numqueues; | 464 | int i, n = tun->numqueues; |
459 | 465 | ||
460 | for (i = 0; i < n; i++) { | 466 | for (i = 0; i < n; i++) { |
461 | tfile = rcu_dereference_protected(tun->tfiles[i], | 467 | tfile = rtnl_dereference(tun->tfiles[i]); |
462 | lockdep_rtnl_is_held()); | ||
463 | BUG_ON(!tfile); | 468 | BUG_ON(!tfile); |
464 | wake_up_all(&tfile->wq.wait); | 469 | wake_up_all(&tfile->wq.wait); |
465 | rcu_assign_pointer(tfile->tun, NULL); | 470 | rcu_assign_pointer(tfile->tun, NULL); |
@@ -469,8 +474,7 @@ static void tun_detach_all(struct net_device *dev) | |||
469 | 474 | ||
470 | synchronize_net(); | 475 | synchronize_net(); |
471 | for (i = 0; i < n; i++) { | 476 | for (i = 0; i < n; i++) { |
472 | tfile = rcu_dereference_protected(tun->tfiles[i], | 477 | tfile = rtnl_dereference(tun->tfiles[i]); |
473 | lockdep_rtnl_is_held()); | ||
474 | /* Drop read queue */ | 478 | /* Drop read queue */ |
475 | skb_queue_purge(&tfile->sk.sk_receive_queue); | 479 | skb_queue_purge(&tfile->sk.sk_receive_queue); |
476 | sock_put(&tfile->sk); | 480 | sock_put(&tfile->sk); |
@@ -481,6 +485,9 @@ static void tun_detach_all(struct net_device *dev) | |||
481 | sock_put(&tfile->sk); | 485 | sock_put(&tfile->sk); |
482 | } | 486 | } |
483 | BUG_ON(tun->numdisabled != 0); | 487 | BUG_ON(tun->numdisabled != 0); |
488 | |||
489 | if (tun->flags & TUN_PERSIST) | ||
490 | module_put(THIS_MODULE); | ||
484 | } | 491 | } |
485 | 492 | ||
486 | static int tun_attach(struct tun_struct *tun, struct file *file) | 493 | static int tun_attach(struct tun_struct *tun, struct file *file) |
@@ -488,8 +495,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file) | |||
488 | struct tun_file *tfile = file->private_data; | 495 | struct tun_file *tfile = file->private_data; |
489 | int err; | 496 | int err; |
490 | 497 | ||
498 | err = security_tun_dev_attach(tfile->socket.sk, tun->security); | ||
499 | if (err < 0) | ||
500 | goto out; | ||
501 | |||
491 | err = -EINVAL; | 502 | err = -EINVAL; |
492 | if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held())) | 503 | if (rtnl_dereference(tfile->tun)) |
493 | goto out; | 504 | goto out; |
494 | 505 | ||
495 | err = -EBUSY; | 506 | err = -EBUSY; |
@@ -1371,6 +1382,7 @@ static void tun_free_netdev(struct net_device *dev) | |||
1371 | 1382 | ||
1372 | BUG_ON(!(list_empty(&tun->disabled))); | 1383 | BUG_ON(!(list_empty(&tun->disabled))); |
1373 | tun_flow_uninit(tun); | 1384 | tun_flow_uninit(tun); |
1385 | security_tun_dev_free_security(tun->security); | ||
1374 | free_netdev(dev); | 1386 | free_netdev(dev); |
1375 | } | 1387 | } |
1376 | 1388 | ||
@@ -1544,6 +1556,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1544 | struct net_device *dev; | 1556 | struct net_device *dev; |
1545 | int err; | 1557 | int err; |
1546 | 1558 | ||
1559 | if (tfile->detached) | ||
1560 | return -EINVAL; | ||
1561 | |||
1547 | dev = __dev_get_by_name(net, ifr->ifr_name); | 1562 | dev = __dev_get_by_name(net, ifr->ifr_name); |
1548 | if (dev) { | 1563 | if (dev) { |
1549 | if (ifr->ifr_flags & IFF_TUN_EXCL) | 1564 | if (ifr->ifr_flags & IFF_TUN_EXCL) |
@@ -1557,7 +1572,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1557 | 1572 | ||
1558 | if (tun_not_capable(tun)) | 1573 | if (tun_not_capable(tun)) |
1559 | return -EPERM; | 1574 | return -EPERM; |
1560 | err = security_tun_dev_attach(tfile->socket.sk); | 1575 | err = security_tun_dev_open(tun->security); |
1561 | if (err < 0) | 1576 | if (err < 0) |
1562 | return err; | 1577 | return err; |
1563 | 1578 | ||
@@ -1572,6 +1587,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1572 | else { | 1587 | else { |
1573 | char *name; | 1588 | char *name; |
1574 | unsigned long flags = 0; | 1589 | unsigned long flags = 0; |
1590 | int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? | ||
1591 | MAX_TAP_QUEUES : 1; | ||
1575 | 1592 | ||
1576 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | 1593 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
1577 | return -EPERM; | 1594 | return -EPERM; |
@@ -1595,8 +1612,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1595 | name = ifr->ifr_name; | 1612 | name = ifr->ifr_name; |
1596 | 1613 | ||
1597 | dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, | 1614 | dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, |
1598 | tun_setup, | 1615 | tun_setup, queues, queues); |
1599 | MAX_TAP_QUEUES, MAX_TAP_QUEUES); | 1616 | |
1600 | if (!dev) | 1617 | if (!dev) |
1601 | return -ENOMEM; | 1618 | return -ENOMEM; |
1602 | 1619 | ||
@@ -1614,7 +1631,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
1614 | 1631 | ||
1615 | spin_lock_init(&tun->lock); | 1632 | spin_lock_init(&tun->lock); |
1616 | 1633 | ||
1617 | security_tun_dev_post_create(&tfile->sk); | 1634 | err = security_tun_dev_alloc_security(&tun->security); |
1635 | if (err < 0) | ||
1636 | goto err_free_dev; | ||
1618 | 1637 | ||
1619 | tun_net_init(dev); | 1638 | tun_net_init(dev); |
1620 | 1639 | ||
@@ -1738,8 +1757,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n) | |||
1738 | struct tun_file *tfile; | 1757 | struct tun_file *tfile; |
1739 | 1758 | ||
1740 | for (i = 0; i < n; i++) { | 1759 | for (i = 0; i < n; i++) { |
1741 | tfile = rcu_dereference_protected(tun->tfiles[i], | 1760 | tfile = rtnl_dereference(tun->tfiles[i]); |
1742 | lockdep_rtnl_is_held()); | ||
1743 | sk_detach_filter(tfile->socket.sk); | 1761 | sk_detach_filter(tfile->socket.sk); |
1744 | } | 1762 | } |
1745 | 1763 | ||
@@ -1752,8 +1770,7 @@ static int tun_attach_filter(struct tun_struct *tun) | |||
1752 | struct tun_file *tfile; | 1770 | struct tun_file *tfile; |
1753 | 1771 | ||
1754 | for (i = 0; i < tun->numqueues; i++) { | 1772 | for (i = 0; i < tun->numqueues; i++) { |
1755 | tfile = rcu_dereference_protected(tun->tfiles[i], | 1773 | tfile = rtnl_dereference(tun->tfiles[i]); |
1756 | lockdep_rtnl_is_held()); | ||
1757 | ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); | 1774 | ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); |
1758 | if (ret) { | 1775 | if (ret) { |
1759 | tun_detach_filter(tun, i); | 1776 | tun_detach_filter(tun, i); |
@@ -1771,8 +1788,7 @@ static void tun_set_sndbuf(struct tun_struct *tun) | |||
1771 | int i; | 1788 | int i; |
1772 | 1789 | ||
1773 | for (i = 0; i < tun->numqueues; i++) { | 1790 | for (i = 0; i < tun->numqueues; i++) { |
1774 | tfile = rcu_dereference_protected(tun->tfiles[i], | 1791 | tfile = rtnl_dereference(tun->tfiles[i]); |
1775 | lockdep_rtnl_is_held()); | ||
1776 | tfile->socket.sk->sk_sndbuf = tun->sndbuf; | 1792 | tfile->socket.sk->sk_sndbuf = tun->sndbuf; |
1777 | } | 1793 | } |
1778 | } | 1794 | } |
@@ -1787,15 +1803,16 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) | |||
1787 | 1803 | ||
1788 | if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { | 1804 | if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { |
1789 | tun = tfile->detached; | 1805 | tun = tfile->detached; |
1790 | if (!tun) | 1806 | if (!tun) { |
1791 | ret = -EINVAL; | 1807 | ret = -EINVAL; |
1792 | else if (tun_not_capable(tun)) | 1808 | goto unlock; |
1793 | ret = -EPERM; | 1809 | } |
1794 | else | 1810 | ret = security_tun_dev_attach_queue(tun->security); |
1795 | ret = tun_attach(tun, file); | 1811 | if (ret < 0) |
1812 | goto unlock; | ||
1813 | ret = tun_attach(tun, file); | ||
1796 | } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { | 1814 | } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { |
1797 | tun = rcu_dereference_protected(tfile->tun, | 1815 | tun = rtnl_dereference(tfile->tun); |
1798 | lockdep_rtnl_is_held()); | ||
1799 | if (!tun || !(tun->flags & TUN_TAP_MQ)) | 1816 | if (!tun || !(tun->flags & TUN_TAP_MQ)) |
1800 | ret = -EINVAL; | 1817 | ret = -EINVAL; |
1801 | else | 1818 | else |
@@ -1803,6 +1820,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) | |||
1803 | } else | 1820 | } else |
1804 | ret = -EINVAL; | 1821 | ret = -EINVAL; |
1805 | 1822 | ||
1823 | unlock: | ||
1806 | rtnl_unlock(); | 1824 | rtnl_unlock(); |
1807 | return ret; | 1825 | return ret; |
1808 | } | 1826 | } |
@@ -1880,10 +1898,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, | |||
1880 | /* Disable/Enable persist mode. Keep an extra reference to the | 1898 | /* Disable/Enable persist mode. Keep an extra reference to the |
1881 | * module to prevent the module being unprobed. | 1899 | * module to prevent the module being unprobed. |
1882 | */ | 1900 | */ |
1883 | if (arg) { | 1901 | if (arg && !(tun->flags & TUN_PERSIST)) { |
1884 | tun->flags |= TUN_PERSIST; | 1902 | tun->flags |= TUN_PERSIST; |
1885 | __module_get(THIS_MODULE); | 1903 | __module_get(THIS_MODULE); |
1886 | } else { | 1904 | } |
1905 | if (!arg && (tun->flags & TUN_PERSIST)) { | ||
1887 | tun->flags &= ~TUN_PERSIST; | 1906 | tun->flags &= ~TUN_PERSIST; |
1888 | module_put(THIS_MODULE); | 1907 | module_put(THIS_MODULE); |
1889 | } | 1908 | } |