diff options
author | Arnd Bergmann <arnd@arndb.de> | 2010-02-18 00:46:50 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-02-18 17:08:38 -0500 |
commit | 501c774cb13c3ef8fb7fc5f08fa19473f7d9a0db (patch) | |
tree | 65db25ce76a5c038d69c304d9b6456e4f68e9f47 /drivers | |
parent | 02df55d28c6001a3cdb7a997a34a0b01f01d015e (diff) |
net/macvtap: add vhost support
This adds support for passing a macvtap file descriptor into
vhost-net, much like we already do for tun/tap.
Most of the new code is taken from the respective patch
in the tun driver and may get consolidated in the future.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/macvtap.c | 98 | ||||
-rw-r--r-- | drivers/vhost/Kconfig | 2 | ||||
-rw-r--r-- | drivers/vhost/net.c | 8 |
3 files changed, 84 insertions, 24 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 705099749766..e354501ab297 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -58,6 +58,8 @@ static unsigned int macvtap_major; | |||
58 | static struct class *macvtap_class; | 58 | static struct class *macvtap_class; |
59 | static struct cdev macvtap_cdev; | 59 | static struct cdev macvtap_cdev; |
60 | 60 | ||
61 | static const struct proto_ops macvtap_socket_ops; | ||
62 | |||
61 | /* | 63 | /* |
62 | * RCU usage: | 64 | * RCU usage: |
63 | * The macvtap_queue and the macvlan_dev are loosely coupled, the | 65 | * The macvtap_queue and the macvlan_dev are loosely coupled, the |
@@ -176,7 +178,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) | |||
176 | return -ENOLINK; | 178 | return -ENOLINK; |
177 | 179 | ||
178 | skb_queue_tail(&q->sk.sk_receive_queue, skb); | 180 | skb_queue_tail(&q->sk.sk_receive_queue, skb); |
179 | wake_up(q->sk.sk_sleep); | 181 | wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); |
180 | return 0; | 182 | return 0; |
181 | } | 183 | } |
182 | 184 | ||
@@ -242,7 +244,7 @@ static void macvtap_sock_write_space(struct sock *sk) | |||
242 | return; | 244 | return; |
243 | 245 | ||
244 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 246 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
245 | wake_up_interruptible_sync(sk->sk_sleep); | 247 | wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); |
246 | } | 248 | } |
247 | 249 | ||
248 | static int macvtap_open(struct inode *inode, struct file *file) | 250 | static int macvtap_open(struct inode *inode, struct file *file) |
@@ -270,6 +272,8 @@ static int macvtap_open(struct inode *inode, struct file *file) | |||
270 | init_waitqueue_head(&q->sock.wait); | 272 | init_waitqueue_head(&q->sock.wait); |
271 | q->sock.type = SOCK_RAW; | 273 | q->sock.type = SOCK_RAW; |
272 | q->sock.state = SS_CONNECTED; | 274 | q->sock.state = SS_CONNECTED; |
275 | q->sock.file = file; | ||
276 | q->sock.ops = &macvtap_socket_ops; | ||
273 | sock_init_data(&q->sock, &q->sk); | 277 | sock_init_data(&q->sock, &q->sk); |
274 | q->sk.sk_write_space = macvtap_sock_write_space; | 278 | q->sk.sk_write_space = macvtap_sock_write_space; |
275 | 279 | ||
@@ -387,32 +391,20 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, | |||
387 | 391 | ||
388 | rcu_read_lock_bh(); | 392 | rcu_read_lock_bh(); |
389 | vlan = rcu_dereference(q->vlan); | 393 | vlan = rcu_dereference(q->vlan); |
390 | macvlan_count_rx(vlan, len, ret == 0, 0); | 394 | if (vlan) |
395 | macvlan_count_rx(vlan, len, ret == 0, 0); | ||
391 | rcu_read_unlock_bh(); | 396 | rcu_read_unlock_bh(); |
392 | 397 | ||
393 | return ret ? ret : len; | 398 | return ret ? ret : len; |
394 | } | 399 | } |
395 | 400 | ||
396 | static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, | 401 | static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, |
397 | unsigned long count, loff_t pos) | 402 | const struct iovec *iv, unsigned long len, |
403 | int noblock) | ||
398 | { | 404 | { |
399 | struct file *file = iocb->ki_filp; | ||
400 | struct macvtap_queue *q = file->private_data; | ||
401 | |||
402 | DECLARE_WAITQUEUE(wait, current); | 405 | DECLARE_WAITQUEUE(wait, current); |
403 | struct sk_buff *skb; | 406 | struct sk_buff *skb; |
404 | ssize_t len, ret = 0; | 407 | ssize_t ret = 0; |
405 | |||
406 | if (!q) { | ||
407 | ret = -ENOLINK; | ||
408 | goto out; | ||
409 | } | ||
410 | |||
411 | len = iov_length(iv, count); | ||
412 | if (len < 0) { | ||
413 | ret = -EINVAL; | ||
414 | goto out; | ||
415 | } | ||
416 | 408 | ||
417 | add_wait_queue(q->sk.sk_sleep, &wait); | 409 | add_wait_queue(q->sk.sk_sleep, &wait); |
418 | while (len) { | 410 | while (len) { |
@@ -421,7 +413,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, | |||
421 | /* Read frames from the queue */ | 413 | /* Read frames from the queue */ |
422 | skb = skb_dequeue(&q->sk.sk_receive_queue); | 414 | skb = skb_dequeue(&q->sk.sk_receive_queue); |
423 | if (!skb) { | 415 | if (!skb) { |
424 | if (file->f_flags & O_NONBLOCK) { | 416 | if (noblock) { |
425 | ret = -EAGAIN; | 417 | ret = -EAGAIN; |
426 | break; | 418 | break; |
427 | } | 419 | } |
@@ -440,7 +432,24 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, | |||
440 | 432 | ||
441 | current->state = TASK_RUNNING; | 433 | current->state = TASK_RUNNING; |
442 | remove_wait_queue(q->sk.sk_sleep, &wait); | 434 | remove_wait_queue(q->sk.sk_sleep, &wait); |
435 | return ret; | ||
436 | } | ||
437 | |||
438 | static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, | ||
439 | unsigned long count, loff_t pos) | ||
440 | { | ||
441 | struct file *file = iocb->ki_filp; | ||
442 | struct macvtap_queue *q = file->private_data; | ||
443 | ssize_t len, ret = 0; | ||
443 | 444 | ||
445 | len = iov_length(iv, count); | ||
446 | if (len < 0) { | ||
447 | ret = -EINVAL; | ||
448 | goto out; | ||
449 | } | ||
450 | |||
451 | ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); | ||
452 | ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ | ||
444 | out: | 453 | out: |
445 | return ret; | 454 | return ret; |
446 | } | 455 | } |
@@ -538,6 +547,53 @@ static const struct file_operations macvtap_fops = { | |||
538 | #endif | 547 | #endif |
539 | }; | 548 | }; |
540 | 549 | ||
550 | static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, | ||
551 | struct msghdr *m, size_t total_len) | ||
552 | { | ||
553 | struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); | ||
554 | return macvtap_get_user(q, m->msg_iov, total_len, | ||
555 | m->msg_flags & MSG_DONTWAIT); | ||
556 | } | ||
557 | |||
558 | static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, | ||
559 | struct msghdr *m, size_t total_len, | ||
560 | int flags) | ||
561 | { | ||
562 | struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); | ||
563 | int ret; | ||
564 | if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) | ||
565 | return -EINVAL; | ||
566 | ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, | ||
567 | flags & MSG_DONTWAIT); | ||
568 | if (ret > total_len) { | ||
569 | m->msg_flags |= MSG_TRUNC; | ||
570 | ret = flags & MSG_TRUNC ? ret : total_len; | ||
571 | } | ||
572 | return ret; | ||
573 | } | ||
574 | |||
575 | /* Ops structure to mimic raw sockets with tun */ | ||
576 | static const struct proto_ops macvtap_socket_ops = { | ||
577 | .sendmsg = macvtap_sendmsg, | ||
578 | .recvmsg = macvtap_recvmsg, | ||
579 | }; | ||
580 | |||
581 | /* Get an underlying socket object from tun file. Returns error unless file is | ||
582 | * attached to a device. The returned object works like a packet socket, it | ||
583 | * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for | ||
584 | * holding a reference to the file for as long as the socket is in use. */ | ||
585 | struct socket *macvtap_get_socket(struct file *file) | ||
586 | { | ||
587 | struct macvtap_queue *q; | ||
588 | if (file->f_op != &macvtap_fops) | ||
589 | return ERR_PTR(-EINVAL); | ||
590 | q = file->private_data; | ||
591 | if (!q) | ||
592 | return ERR_PTR(-EBADFD); | ||
593 | return &q->sock; | ||
594 | } | ||
595 | EXPORT_SYMBOL_GPL(macvtap_get_socket); | ||
596 | |||
541 | static int macvtap_init(void) | 597 | static int macvtap_init(void) |
542 | { | 598 | { |
543 | int err; | 599 | int err; |
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 9e9355367bb3..e4e2fd1b5107 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config VHOST_NET | 1 | config VHOST_NET |
2 | tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)" | 2 | tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)" |
3 | depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL | 3 | depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) && EXPERIMENTAL |
4 | ---help--- | 4 | ---help--- |
5 | This kernel module can be loaded in host kernel to accelerate | 5 | This kernel module can be loaded in host kernel to accelerate |
6 | guest networking with virtio_net. Not to be confused with virtio_net | 6 | guest networking with virtio_net. Not to be confused with virtio_net |
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4c8928319e1d..91a324cc2298 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/if_packet.h> | 22 | #include <linux/if_packet.h> |
23 | #include <linux/if_arp.h> | 23 | #include <linux/if_arp.h> |
24 | #include <linux/if_tun.h> | 24 | #include <linux/if_tun.h> |
25 | #include <linux/if_macvlan.h> | ||
25 | 26 | ||
26 | #include <net/sock.h> | 27 | #include <net/sock.h> |
27 | 28 | ||
@@ -452,13 +453,16 @@ err: | |||
452 | return ERR_PTR(r); | 453 | return ERR_PTR(r); |
453 | } | 454 | } |
454 | 455 | ||
455 | static struct socket *get_tun_socket(int fd) | 456 | static struct socket *get_tap_socket(int fd) |
456 | { | 457 | { |
457 | struct file *file = fget(fd); | 458 | struct file *file = fget(fd); |
458 | struct socket *sock; | 459 | struct socket *sock; |
459 | if (!file) | 460 | if (!file) |
460 | return ERR_PTR(-EBADF); | 461 | return ERR_PTR(-EBADF); |
461 | sock = tun_get_socket(file); | 462 | sock = tun_get_socket(file); |
463 | if (!IS_ERR(sock)) | ||
464 | return sock; | ||
465 | sock = macvtap_get_socket(file); | ||
462 | if (IS_ERR(sock)) | 466 | if (IS_ERR(sock)) |
463 | fput(file); | 467 | fput(file); |
464 | return sock; | 468 | return sock; |
@@ -473,7 +477,7 @@ static struct socket *get_socket(int fd) | |||
473 | sock = get_raw_socket(fd); | 477 | sock = get_raw_socket(fd); |
474 | if (!IS_ERR(sock)) | 478 | if (!IS_ERR(sock)) |
475 | return sock; | 479 | return sock; |
476 | sock = get_tun_socket(fd); | 480 | sock = get_tap_socket(fd); |
477 | if (!IS_ERR(sock)) | 481 | if (!IS_ERR(sock)) |
478 | return sock; | 482 | return sock; |
479 | return ERR_PTR(-ENOTSOCK); | 483 | return ERR_PTR(-ENOTSOCK); |