diff options
-rw-r--r-- | drivers/net/tun.c | 138 | ||||
-rw-r--r-- | drivers/vhost/net.c | 16 | ||||
-rw-r--r-- | include/linux/net.h | 1 |
3 files changed, 146 insertions, 9 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4884802e0af1..74752159ec34 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <net/sock.h> | 71 | #include <net/sock.h> |
72 | #include <linux/seq_file.h> | 72 | #include <linux/seq_file.h> |
73 | #include <linux/uio.h> | 73 | #include <linux/uio.h> |
74 | #include <linux/skb_array.h> | ||
74 | 75 | ||
75 | #include <asm/uaccess.h> | 76 | #include <asm/uaccess.h> |
76 | 77 | ||
@@ -167,6 +168,7 @@ struct tun_file { | |||
167 | }; | 168 | }; |
168 | struct list_head next; | 169 | struct list_head next; |
169 | struct tun_struct *detached; | 170 | struct tun_struct *detached; |
171 | struct skb_array tx_array; | ||
170 | }; | 172 | }; |
171 | 173 | ||
172 | struct tun_flow_entry { | 174 | struct tun_flow_entry { |
@@ -515,7 +517,11 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) | |||
515 | 517 | ||
516 | static void tun_queue_purge(struct tun_file *tfile) | 518 | static void tun_queue_purge(struct tun_file *tfile) |
517 | { | 519 | { |
518 | skb_queue_purge(&tfile->sk.sk_receive_queue); | 520 | struct sk_buff *skb; |
521 | |||
522 | while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) | ||
523 | kfree_skb(skb); | ||
524 | |||
519 | skb_queue_purge(&tfile->sk.sk_error_queue); | 525 | skb_queue_purge(&tfile->sk.sk_error_queue); |
520 | } | 526 | } |
521 | 527 | ||
@@ -560,6 +566,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) | |||
560 | tun->dev->reg_state == NETREG_REGISTERED) | 566 | tun->dev->reg_state == NETREG_REGISTERED) |
561 | unregister_netdevice(tun->dev); | 567 | unregister_netdevice(tun->dev); |
562 | } | 568 | } |
569 | if (tun) | ||
570 | skb_array_cleanup(&tfile->tx_array); | ||
563 | sock_put(&tfile->sk); | 571 | sock_put(&tfile->sk); |
564 | } | 572 | } |
565 | } | 573 | } |
@@ -613,6 +621,7 @@ static void tun_detach_all(struct net_device *dev) | |||
613 | static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) | 621 | static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) |
614 | { | 622 | { |
615 | struct tun_file *tfile = file->private_data; | 623 | struct tun_file *tfile = file->private_data; |
624 | struct net_device *dev = tun->dev; | ||
616 | int err; | 625 | int err; |
617 | 626 | ||
618 | err = security_tun_dev_attach(tfile->socket.sk, tun->security); | 627 | err = security_tun_dev_attach(tfile->socket.sk, tun->security); |
@@ -642,6 +651,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte | |||
642 | if (!err) | 651 | if (!err) |
643 | goto out; | 652 | goto out; |
644 | } | 653 | } |
654 | |||
655 | if (!tfile->detached && | ||
656 | skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { | ||
657 | err = -ENOMEM; | ||
658 | goto out; | ||
659 | } | ||
660 | |||
645 | tfile->queue_index = tun->numqueues; | 661 | tfile->queue_index = tun->numqueues; |
646 | tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; | 662 | tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; |
647 | rcu_assign_pointer(tfile->tun, tun); | 663 | rcu_assign_pointer(tfile->tun, tun); |
@@ -891,8 +907,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) | |||
891 | 907 | ||
892 | nf_reset(skb); | 908 | nf_reset(skb); |
893 | 909 | ||
894 | /* Enqueue packet */ | 910 | if (skb_array_produce(&tfile->tx_array, skb)) |
895 | skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); | 911 | goto drop; |
896 | 912 | ||
897 | /* Notify and wake up reader process */ | 913 | /* Notify and wake up reader process */ |
898 | if (tfile->flags & TUN_FASYNC) | 914 | if (tfile->flags & TUN_FASYNC) |
@@ -1107,7 +1123,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) | |||
1107 | 1123 | ||
1108 | poll_wait(file, sk_sleep(sk), wait); | 1124 | poll_wait(file, sk_sleep(sk), wait); |
1109 | 1125 | ||
1110 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 1126 | if (!skb_array_empty(&tfile->tx_array)) |
1111 | mask |= POLLIN | POLLRDNORM; | 1127 | mask |= POLLIN | POLLRDNORM; |
1112 | 1128 | ||
1113 | if (sock_writeable(sk) || | 1129 | if (sock_writeable(sk) || |
@@ -1426,22 +1442,61 @@ done: | |||
1426 | return total; | 1442 | return total; |
1427 | } | 1443 | } |
1428 | 1444 | ||
1445 | static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, | ||
1446 | int *err) | ||
1447 | { | ||
1448 | DECLARE_WAITQUEUE(wait, current); | ||
1449 | struct sk_buff *skb = NULL; | ||
1450 | |||
1451 | skb = skb_array_consume(&tfile->tx_array); | ||
1452 | if (skb) | ||
1453 | goto out; | ||
1454 | if (noblock) { | ||
1455 | *err = -EAGAIN; | ||
1456 | goto out; | ||
1457 | } | ||
1458 | |||
1459 | add_wait_queue(&tfile->wq.wait, &wait); | ||
1460 | current->state = TASK_INTERRUPTIBLE; | ||
1461 | |||
1462 | while (1) { | ||
1463 | skb = skb_array_consume(&tfile->tx_array); | ||
1464 | if (skb) | ||
1465 | break; | ||
1466 | if (signal_pending(current)) { | ||
1467 | *err = -ERESTARTSYS; | ||
1468 | break; | ||
1469 | } | ||
1470 | if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) { | ||
1471 | *err = -EFAULT; | ||
1472 | break; | ||
1473 | } | ||
1474 | |||
1475 | schedule(); | ||
1476 | } | ||
1477 | |||
1478 | current->state = TASK_RUNNING; | ||
1479 | remove_wait_queue(&tfile->wq.wait, &wait); | ||
1480 | |||
1481 | out: | ||
1482 | return skb; | ||
1483 | } | ||
1484 | |||
1429 | static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, | 1485 | static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, |
1430 | struct iov_iter *to, | 1486 | struct iov_iter *to, |
1431 | int noblock) | 1487 | int noblock) |
1432 | { | 1488 | { |
1433 | struct sk_buff *skb; | 1489 | struct sk_buff *skb; |
1434 | ssize_t ret; | 1490 | ssize_t ret; |
1435 | int peeked, err, off = 0; | 1491 | int err; |
1436 | 1492 | ||
1437 | tun_debug(KERN_INFO, tun, "tun_do_read\n"); | 1493 | tun_debug(KERN_INFO, tun, "tun_do_read\n"); |
1438 | 1494 | ||
1439 | if (!iov_iter_count(to)) | 1495 | if (!iov_iter_count(to)) |
1440 | return 0; | 1496 | return 0; |
1441 | 1497 | ||
1442 | /* Read frames from queue */ | 1498 | /* Read frames from ring */ |
1443 | skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, | 1499 | skb = tun_ring_recv(tfile, noblock, &err); |
1444 | &peeked, &off, &err); | ||
1445 | if (!skb) | 1500 | if (!skb) |
1446 | return err; | 1501 | return err; |
1447 | 1502 | ||
@@ -1574,8 +1629,25 @@ out: | |||
1574 | return ret; | 1629 | return ret; |
1575 | } | 1630 | } |
1576 | 1631 | ||
1632 | static int tun_peek_len(struct socket *sock) | ||
1633 | { | ||
1634 | struct tun_file *tfile = container_of(sock, struct tun_file, socket); | ||
1635 | struct tun_struct *tun; | ||
1636 | int ret = 0; | ||
1637 | |||
1638 | tun = __tun_get(tfile); | ||
1639 | if (!tun) | ||
1640 | return 0; | ||
1641 | |||
1642 | ret = skb_array_peek_len(&tfile->tx_array); | ||
1643 | tun_put(tun); | ||
1644 | |||
1645 | return ret; | ||
1646 | } | ||
1647 | |||
1577 | /* Ops structure to mimic raw sockets with tun */ | 1648 | /* Ops structure to mimic raw sockets with tun */ |
1578 | static const struct proto_ops tun_socket_ops = { | 1649 | static const struct proto_ops tun_socket_ops = { |
1650 | .peek_len = tun_peek_len, | ||
1579 | .sendmsg = tun_sendmsg, | 1651 | .sendmsg = tun_sendmsg, |
1580 | .recvmsg = tun_recvmsg, | 1652 | .recvmsg = tun_recvmsg, |
1581 | }; | 1653 | }; |
@@ -2397,6 +2469,53 @@ static const struct ethtool_ops tun_ethtool_ops = { | |||
2397 | .get_ts_info = ethtool_op_get_ts_info, | 2469 | .get_ts_info = ethtool_op_get_ts_info, |
2398 | }; | 2470 | }; |
2399 | 2471 | ||
2472 | static int tun_queue_resize(struct tun_struct *tun) | ||
2473 | { | ||
2474 | struct net_device *dev = tun->dev; | ||
2475 | struct tun_file *tfile; | ||
2476 | struct skb_array **arrays; | ||
2477 | int n = tun->numqueues + tun->numdisabled; | ||
2478 | int ret, i; | ||
2479 | |||
2480 | arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL); | ||
2481 | if (!arrays) | ||
2482 | return -ENOMEM; | ||
2483 | |||
2484 | for (i = 0; i < tun->numqueues; i++) { | ||
2485 | tfile = rtnl_dereference(tun->tfiles[i]); | ||
2486 | arrays[i] = &tfile->tx_array; | ||
2487 | } | ||
2488 | list_for_each_entry(tfile, &tun->disabled, next) | ||
2489 | arrays[i++] = &tfile->tx_array; | ||
2490 | |||
2491 | ret = skb_array_resize_multiple(arrays, n, | ||
2492 | dev->tx_queue_len, GFP_KERNEL); | ||
2493 | |||
2494 | kfree(arrays); | ||
2495 | return ret; | ||
2496 | } | ||
2497 | |||
2498 | static int tun_device_event(struct notifier_block *unused, | ||
2499 | unsigned long event, void *ptr) | ||
2500 | { | ||
2501 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | ||
2502 | struct tun_struct *tun = netdev_priv(dev); | ||
2503 | |||
2504 | switch (event) { | ||
2505 | case NETDEV_CHANGE_TX_QUEUE_LEN: | ||
2506 | if (tun_queue_resize(tun)) | ||
2507 | return NOTIFY_BAD; | ||
2508 | break; | ||
2509 | default: | ||
2510 | break; | ||
2511 | } | ||
2512 | |||
2513 | return NOTIFY_DONE; | ||
2514 | } | ||
2515 | |||
2516 | static struct notifier_block tun_notifier_block __read_mostly = { | ||
2517 | .notifier_call = tun_device_event, | ||
2518 | }; | ||
2400 | 2519 | ||
2401 | static int __init tun_init(void) | 2520 | static int __init tun_init(void) |
2402 | { | 2521 | { |
@@ -2416,6 +2535,8 @@ static int __init tun_init(void) | |||
2416 | pr_err("Can't register misc device %d\n", TUN_MINOR); | 2535 | pr_err("Can't register misc device %d\n", TUN_MINOR); |
2417 | goto err_misc; | 2536 | goto err_misc; |
2418 | } | 2537 | } |
2538 | |||
2539 | register_netdevice_notifier(&tun_notifier_block); | ||
2419 | return 0; | 2540 | return 0; |
2420 | err_misc: | 2541 | err_misc: |
2421 | rtnl_link_unregister(&tun_link_ops); | 2542 | rtnl_link_unregister(&tun_link_ops); |
@@ -2427,6 +2548,7 @@ static void tun_cleanup(void) | |||
2427 | { | 2548 | { |
2428 | misc_deregister(&tun_miscdev); | 2549 | misc_deregister(&tun_miscdev); |
2429 | rtnl_link_unregister(&tun_link_ops); | 2550 | rtnl_link_unregister(&tun_link_ops); |
2551 | unregister_netdevice_notifier(&tun_notifier_block); | ||
2430 | } | 2552 | } |
2431 | 2553 | ||
2432 | /* Get an underlying socket object from tun file. Returns error unless file is | 2554 | /* Get an underlying socket object from tun file. Returns error unless file is |
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 1d3e45f84549..e032ca397371 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -481,10 +481,14 @@ out: | |||
481 | 481 | ||
482 | static int peek_head_len(struct sock *sk) | 482 | static int peek_head_len(struct sock *sk) |
483 | { | 483 | { |
484 | struct socket *sock = sk->sk_socket; | ||
484 | struct sk_buff *head; | 485 | struct sk_buff *head; |
485 | int len = 0; | 486 | int len = 0; |
486 | unsigned long flags; | 487 | unsigned long flags; |
487 | 488 | ||
489 | if (sock->ops->peek_len) | ||
490 | return sock->ops->peek_len(sock); | ||
491 | |||
488 | spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); | 492 | spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); |
489 | head = skb_peek(&sk->sk_receive_queue); | 493 | head = skb_peek(&sk->sk_receive_queue); |
490 | if (likely(head)) { | 494 | if (likely(head)) { |
@@ -497,6 +501,16 @@ static int peek_head_len(struct sock *sk) | |||
497 | return len; | 501 | return len; |
498 | } | 502 | } |
499 | 503 | ||
504 | static int sk_has_rx_data(struct sock *sk) | ||
505 | { | ||
506 | struct socket *sock = sk->sk_socket; | ||
507 | |||
508 | if (sock->ops->peek_len) | ||
509 | return sock->ops->peek_len(sock); | ||
510 | |||
511 | return skb_queue_empty(&sk->sk_receive_queue); | ||
512 | } | ||
513 | |||
500 | static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) | 514 | static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) |
501 | { | 515 | { |
502 | struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; | 516 | struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; |
@@ -513,7 +527,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) | |||
513 | endtime = busy_clock() + vq->busyloop_timeout; | 527 | endtime = busy_clock() + vq->busyloop_timeout; |
514 | 528 | ||
515 | while (vhost_can_busy_poll(&net->dev, endtime) && | 529 | while (vhost_can_busy_poll(&net->dev, endtime) && |
516 | skb_queue_empty(&sk->sk_receive_queue) && | 530 | !sk_has_rx_data(sk) && |
517 | vhost_vq_avail_empty(&net->dev, vq)) | 531 | vhost_vq_avail_empty(&net->dev, vq)) |
518 | cpu_relax_lowlatency(); | 532 | cpu_relax_lowlatency(); |
519 | 533 | ||
diff --git a/include/linux/net.h b/include/linux/net.h index 25aa03b51c4e..b9f0ff4d489c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -185,6 +185,7 @@ struct proto_ops { | |||
185 | ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, | 185 | ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, |
186 | struct pipe_inode_info *pipe, size_t len, unsigned int flags); | 186 | struct pipe_inode_info *pipe, size_t len, unsigned int flags); |
187 | int (*set_peek_off)(struct sock *sk, int val); | 187 | int (*set_peek_off)(struct sock *sk, int val); |
188 | int (*peek_len)(struct socket *sock); | ||
188 | }; | 189 | }; |
189 | 190 | ||
190 | #define DECLARE_SOCKADDR(type, dst, src) \ | 191 | #define DECLARE_SOCKADDR(type, dst, src) \ |