aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/tun.c138
-rw-r--r--drivers/vhost/net.c16
-rw-r--r--include/linux/net.h1
3 files changed, 146 insertions, 9 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4884802e0af1..74752159ec34 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -71,6 +71,7 @@
71#include <net/sock.h> 71#include <net/sock.h>
72#include <linux/seq_file.h> 72#include <linux/seq_file.h>
73#include <linux/uio.h> 73#include <linux/uio.h>
74#include <linux/skb_array.h>
74 75
75#include <asm/uaccess.h> 76#include <asm/uaccess.h>
76 77
@@ -167,6 +168,7 @@ struct tun_file {
167 }; 168 };
168 struct list_head next; 169 struct list_head next;
169 struct tun_struct *detached; 170 struct tun_struct *detached;
171 struct skb_array tx_array;
170}; 172};
171 173
172struct tun_flow_entry { 174struct tun_flow_entry {
@@ -515,7 +517,11 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
515 517
516static void tun_queue_purge(struct tun_file *tfile) 518static void tun_queue_purge(struct tun_file *tfile)
517{ 519{
518 skb_queue_purge(&tfile->sk.sk_receive_queue); 520 struct sk_buff *skb;
521
522 while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
523 kfree_skb(skb);
524
519 skb_queue_purge(&tfile->sk.sk_error_queue); 525 skb_queue_purge(&tfile->sk.sk_error_queue);
520} 526}
521 527
@@ -560,6 +566,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
560 tun->dev->reg_state == NETREG_REGISTERED) 566 tun->dev->reg_state == NETREG_REGISTERED)
561 unregister_netdevice(tun->dev); 567 unregister_netdevice(tun->dev);
562 } 568 }
569 if (tun)
570 skb_array_cleanup(&tfile->tx_array);
563 sock_put(&tfile->sk); 571 sock_put(&tfile->sk);
564 } 572 }
565} 573}
@@ -613,6 +621,7 @@ static void tun_detach_all(struct net_device *dev)
613static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) 621static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
614{ 622{
615 struct tun_file *tfile = file->private_data; 623 struct tun_file *tfile = file->private_data;
624 struct net_device *dev = tun->dev;
616 int err; 625 int err;
617 626
618 err = security_tun_dev_attach(tfile->socket.sk, tun->security); 627 err = security_tun_dev_attach(tfile->socket.sk, tun->security);
@@ -642,6 +651,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
642 if (!err) 651 if (!err)
643 goto out; 652 goto out;
644 } 653 }
654
655 if (!tfile->detached &&
656 skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
657 err = -ENOMEM;
658 goto out;
659 }
660
645 tfile->queue_index = tun->numqueues; 661 tfile->queue_index = tun->numqueues;
646 tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; 662 tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
647 rcu_assign_pointer(tfile->tun, tun); 663 rcu_assign_pointer(tfile->tun, tun);
@@ -891,8 +907,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
891 907
892 nf_reset(skb); 908 nf_reset(skb);
893 909
894 /* Enqueue packet */ 910 if (skb_array_produce(&tfile->tx_array, skb))
895 skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); 911 goto drop;
896 912
897 /* Notify and wake up reader process */ 913 /* Notify and wake up reader process */
898 if (tfile->flags & TUN_FASYNC) 914 if (tfile->flags & TUN_FASYNC)
@@ -1107,7 +1123,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
1107 1123
1108 poll_wait(file, sk_sleep(sk), wait); 1124 poll_wait(file, sk_sleep(sk), wait);
1109 1125
1110 if (!skb_queue_empty(&sk->sk_receive_queue)) 1126 if (!skb_array_empty(&tfile->tx_array))
1111 mask |= POLLIN | POLLRDNORM; 1127 mask |= POLLIN | POLLRDNORM;
1112 1128
1113 if (sock_writeable(sk) || 1129 if (sock_writeable(sk) ||
@@ -1426,22 +1442,61 @@ done:
1426 return total; 1442 return total;
1427} 1443}
1428 1444
1445static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
1446 int *err)
1447{
1448 DECLARE_WAITQUEUE(wait, current);
1449 struct sk_buff *skb = NULL;
1450
1451 skb = skb_array_consume(&tfile->tx_array);
1452 if (skb)
1453 goto out;
1454 if (noblock) {
1455 *err = -EAGAIN;
1456 goto out;
1457 }
1458
1459 add_wait_queue(&tfile->wq.wait, &wait);
1460 current->state = TASK_INTERRUPTIBLE;
1461
1462 while (1) {
1463 skb = skb_array_consume(&tfile->tx_array);
1464 if (skb)
1465 break;
1466 if (signal_pending(current)) {
1467 *err = -ERESTARTSYS;
1468 break;
1469 }
1470 if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) {
1471 *err = -EFAULT;
1472 break;
1473 }
1474
1475 schedule();
1476 }
1477
1478 current->state = TASK_RUNNING;
1479 remove_wait_queue(&tfile->wq.wait, &wait);
1480
1481out:
1482 return skb;
1483}
1484
1429static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, 1485static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
1430 struct iov_iter *to, 1486 struct iov_iter *to,
1431 int noblock) 1487 int noblock)
1432{ 1488{
1433 struct sk_buff *skb; 1489 struct sk_buff *skb;
1434 ssize_t ret; 1490 ssize_t ret;
1435 int peeked, err, off = 0; 1491 int err;
1436 1492
1437 tun_debug(KERN_INFO, tun, "tun_do_read\n"); 1493 tun_debug(KERN_INFO, tun, "tun_do_read\n");
1438 1494
1439 if (!iov_iter_count(to)) 1495 if (!iov_iter_count(to))
1440 return 0; 1496 return 0;
1441 1497
1442 /* Read frames from queue */ 1498 /* Read frames from ring */
1443 skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, 1499 skb = tun_ring_recv(tfile, noblock, &err);
1444 &peeked, &off, &err);
1445 if (!skb) 1500 if (!skb)
1446 return err; 1501 return err;
1447 1502
@@ -1574,8 +1629,25 @@ out:
1574 return ret; 1629 return ret;
1575} 1630}
1576 1631
1632static int tun_peek_len(struct socket *sock)
1633{
1634 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
1635 struct tun_struct *tun;
1636 int ret = 0;
1637
1638 tun = __tun_get(tfile);
1639 if (!tun)
1640 return 0;
1641
1642 ret = skb_array_peek_len(&tfile->tx_array);
1643 tun_put(tun);
1644
1645 return ret;
1646}
1647
1577/* Ops structure to mimic raw sockets with tun */ 1648/* Ops structure to mimic raw sockets with tun */
1578static const struct proto_ops tun_socket_ops = { 1649static const struct proto_ops tun_socket_ops = {
1650 .peek_len = tun_peek_len,
1579 .sendmsg = tun_sendmsg, 1651 .sendmsg = tun_sendmsg,
1580 .recvmsg = tun_recvmsg, 1652 .recvmsg = tun_recvmsg,
1581}; 1653};
@@ -2397,6 +2469,53 @@ static const struct ethtool_ops tun_ethtool_ops = {
2397 .get_ts_info = ethtool_op_get_ts_info, 2469 .get_ts_info = ethtool_op_get_ts_info,
2398}; 2470};
2399 2471
2472static int tun_queue_resize(struct tun_struct *tun)
2473{
2474 struct net_device *dev = tun->dev;
2475 struct tun_file *tfile;
2476 struct skb_array **arrays;
2477 int n = tun->numqueues + tun->numdisabled;
2478 int ret, i;
2479
2480 arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
2481 if (!arrays)
2482 return -ENOMEM;
2483
2484 for (i = 0; i < tun->numqueues; i++) {
2485 tfile = rtnl_dereference(tun->tfiles[i]);
2486 arrays[i] = &tfile->tx_array;
2487 }
2488 list_for_each_entry(tfile, &tun->disabled, next)
2489 arrays[i++] = &tfile->tx_array;
2490
2491 ret = skb_array_resize_multiple(arrays, n,
2492 dev->tx_queue_len, GFP_KERNEL);
2493
2494 kfree(arrays);
2495 return ret;
2496}
2497
2498static int tun_device_event(struct notifier_block *unused,
2499 unsigned long event, void *ptr)
2500{
2501 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2502 struct tun_struct *tun = netdev_priv(dev);
2503
2504 switch (event) {
2505 case NETDEV_CHANGE_TX_QUEUE_LEN:
2506 if (tun_queue_resize(tun))
2507 return NOTIFY_BAD;
2508 break;
2509 default:
2510 break;
2511 }
2512
2513 return NOTIFY_DONE;
2514}
2515
2516static struct notifier_block tun_notifier_block __read_mostly = {
2517 .notifier_call = tun_device_event,
2518};
2400 2519
2401static int __init tun_init(void) 2520static int __init tun_init(void)
2402{ 2521{
@@ -2416,6 +2535,8 @@ static int __init tun_init(void)
2416 pr_err("Can't register misc device %d\n", TUN_MINOR); 2535 pr_err("Can't register misc device %d\n", TUN_MINOR);
2417 goto err_misc; 2536 goto err_misc;
2418 } 2537 }
2538
2539 register_netdevice_notifier(&tun_notifier_block);
2419 return 0; 2540 return 0;
2420err_misc: 2541err_misc:
2421 rtnl_link_unregister(&tun_link_ops); 2542 rtnl_link_unregister(&tun_link_ops);
@@ -2427,6 +2548,7 @@ static void tun_cleanup(void)
2427{ 2548{
2428 misc_deregister(&tun_miscdev); 2549 misc_deregister(&tun_miscdev);
2429 rtnl_link_unregister(&tun_link_ops); 2550 rtnl_link_unregister(&tun_link_ops);
2551 unregister_netdevice_notifier(&tun_notifier_block);
2430} 2552}
2431 2553
2432/* Get an underlying socket object from tun file. Returns error unless file is 2554/* Get an underlying socket object from tun file. Returns error unless file is
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1d3e45f84549..e032ca397371 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -481,10 +481,14 @@ out:
481 481
482static int peek_head_len(struct sock *sk) 482static int peek_head_len(struct sock *sk)
483{ 483{
484 struct socket *sock = sk->sk_socket;
484 struct sk_buff *head; 485 struct sk_buff *head;
485 int len = 0; 486 int len = 0;
486 unsigned long flags; 487 unsigned long flags;
487 488
489 if (sock->ops->peek_len)
490 return sock->ops->peek_len(sock);
491
488 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); 492 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
489 head = skb_peek(&sk->sk_receive_queue); 493 head = skb_peek(&sk->sk_receive_queue);
490 if (likely(head)) { 494 if (likely(head)) {
@@ -497,6 +501,16 @@ static int peek_head_len(struct sock *sk)
497 return len; 501 return len;
498} 502}
499 503
504static int sk_has_rx_data(struct sock *sk)
505{
506 struct socket *sock = sk->sk_socket;
507
508 if (sock->ops->peek_len)
509 return sock->ops->peek_len(sock);
510
511 return skb_queue_empty(&sk->sk_receive_queue);
512}
513
500static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) 514static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
501{ 515{
502 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; 516 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -513,7 +527,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
513 endtime = busy_clock() + vq->busyloop_timeout; 527 endtime = busy_clock() + vq->busyloop_timeout;
514 528
515 while (vhost_can_busy_poll(&net->dev, endtime) && 529 while (vhost_can_busy_poll(&net->dev, endtime) &&
516 skb_queue_empty(&sk->sk_receive_queue) && 530 !sk_has_rx_data(sk) &&
517 vhost_vq_avail_empty(&net->dev, vq)) 531 vhost_vq_avail_empty(&net->dev, vq))
518 cpu_relax_lowlatency(); 532 cpu_relax_lowlatency();
519 533
diff --git a/include/linux/net.h b/include/linux/net.h
index 25aa03b51c4e..b9f0ff4d489c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -185,6 +185,7 @@ struct proto_ops {
185 ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, 185 ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
186 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 186 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
187 int (*set_peek_off)(struct sock *sk, int val); 187 int (*set_peek_off)(struct sock *sk, int val);
188 int (*peek_len)(struct socket *sock);
188}; 189};
189 190
190#define DECLARE_SOCKADDR(type, dst, src) \ 191#define DECLARE_SOCKADDR(type, dst, src) \