aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost/net.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /drivers/vhost/net.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r--drivers/vhost/net.c201
1 files changed, 49 insertions, 152 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7c8008225ee3..e224a92baa16 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -10,7 +10,6 @@
10#include <linux/eventfd.h> 10#include <linux/eventfd.h>
11#include <linux/vhost.h> 11#include <linux/vhost.h>
12#include <linux/virtio_net.h> 12#include <linux/virtio_net.h>
13#include <linux/mmu_context.h>
14#include <linux/miscdevice.h> 13#include <linux/miscdevice.h>
15#include <linux/module.h> 14#include <linux/module.h>
16#include <linux/mutex.h> 15#include <linux/mutex.h>
@@ -61,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to,
61{ 60{
62 int seg = 0; 61 int seg = 0;
63 size_t size; 62 size_t size;
63
64 while (len && seg < iov_count) { 64 while (len && seg < iov_count) {
65 size = min(from->iov_len, len); 65 size = min(from->iov_len, len);
66 to->iov_base = from->iov_base; 66 to->iov_base = from->iov_base;
@@ -80,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
80{ 80{
81 int seg = 0; 81 int seg = 0;
82 size_t size; 82 size_t size;
83
83 while (len && seg < iovcount) { 84 while (len && seg < iovcount) {
84 size = min(from->iov_len, len); 85 size = min(from->iov_len, len);
85 to->iov_base = from->iov_base; 86 to->iov_base = from->iov_base;
@@ -127,7 +128,10 @@ static void handle_tx(struct vhost_net *net)
127 size_t len, total_len = 0; 128 size_t len, total_len = 0;
128 int err, wmem; 129 int err, wmem;
129 size_t hdr_size; 130 size_t hdr_size;
130 struct socket *sock = rcu_dereference(vq->private_data); 131 struct socket *sock;
132
133 /* TODO: check that we are running from vhost_worker? */
134 sock = rcu_dereference_check(vq->private_data, 1);
131 if (!sock) 135 if (!sock)
132 return; 136 return;
133 137
@@ -139,9 +143,8 @@ static void handle_tx(struct vhost_net *net)
139 return; 143 return;
140 } 144 }
141 145
142 use_mm(net->dev.mm);
143 mutex_lock(&vq->mutex); 146 mutex_lock(&vq->mutex);
144 vhost_disable_notify(vq); 147 vhost_disable_notify(&net->dev, vq);
145 148
146 if (wmem < sock->sk->sk_sndbuf / 2) 149 if (wmem < sock->sk->sk_sndbuf / 2)
147 tx_poll_stop(net); 150 tx_poll_stop(net);
@@ -163,8 +166,8 @@ static void handle_tx(struct vhost_net *net)
163 set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 166 set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
164 break; 167 break;
165 } 168 }
166 if (unlikely(vhost_enable_notify(vq))) { 169 if (unlikely(vhost_enable_notify(&net->dev, vq))) {
167 vhost_disable_notify(vq); 170 vhost_disable_notify(&net->dev, vq);
168 continue; 171 continue;
169 } 172 }
170 break; 173 break;
@@ -204,19 +207,19 @@ static void handle_tx(struct vhost_net *net)
204 } 207 }
205 208
206 mutex_unlock(&vq->mutex); 209 mutex_unlock(&vq->mutex);
207 unuse_mm(net->dev.mm);
208} 210}
209 211
210static int peek_head_len(struct sock *sk) 212static int peek_head_len(struct sock *sk)
211{ 213{
212 struct sk_buff *head; 214 struct sk_buff *head;
213 int len = 0; 215 int len = 0;
216 unsigned long flags;
214 217
215 lock_sock(sk); 218 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
216 head = skb_peek(&sk->sk_receive_queue); 219 head = skb_peek(&sk->sk_receive_queue);
217 if (head) 220 if (likely(head))
218 len = head->len; 221 len = head->len;
219 release_sock(sk); 222 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
220 return len; 223 return len;
221} 224}
222 225
@@ -227,6 +230,7 @@ static int peek_head_len(struct sock *sk)
227 * @iovcount - returned count of io vectors we fill 230 * @iovcount - returned count of io vectors we fill
228 * @log - vhost log 231 * @log - vhost log
229 * @log_num - log offset 232 * @log_num - log offset
233 * @quota - headcount quota, 1 for big buffer
230 * returns number of buffer heads allocated, negative on error 234 * returns number of buffer heads allocated, negative on error
231 */ 235 */
232static int get_rx_bufs(struct vhost_virtqueue *vq, 236static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -234,7 +238,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
234 int datalen, 238 int datalen,
235 unsigned *iovcount, 239 unsigned *iovcount,
236 struct vhost_log *log, 240 struct vhost_log *log,
237 unsigned *log_num) 241 unsigned *log_num,
242 unsigned int quota)
238{ 243{
239 unsigned int out, in; 244 unsigned int out, in;
240 int seg = 0; 245 int seg = 0;
@@ -242,8 +247,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
242 unsigned d; 247 unsigned d;
243 int r, nlogs = 0; 248 int r, nlogs = 0;
244 249
245 while (datalen > 0) { 250 while (datalen > 0 && headcount < quota) {
246 if (unlikely(seg >= VHOST_NET_MAX_SG)) { 251 if (unlikely(seg >= UIO_MAXIOV)) {
247 r = -ENOBUFS; 252 r = -ENOBUFS;
248 goto err; 253 goto err;
249 } 254 }
@@ -282,118 +287,7 @@ err:
282 287
283/* Expects to be always run from workqueue - which acts as 288/* Expects to be always run from workqueue - which acts as
284 * read-size critical section for our kind of RCU. */ 289 * read-size critical section for our kind of RCU. */
285static void handle_rx_big(struct vhost_net *net) 290static void handle_rx(struct vhost_net *net)
286{
287 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
288 unsigned out, in, log, s;
289 int head;
290 struct vhost_log *vq_log;
291 struct msghdr msg = {
292 .msg_name = NULL,
293 .msg_namelen = 0,
294 .msg_control = NULL, /* FIXME: get and handle RX aux data. */
295 .msg_controllen = 0,
296 .msg_iov = vq->iov,
297 .msg_flags = MSG_DONTWAIT,
298 };
299
300 struct virtio_net_hdr hdr = {
301 .flags = 0,
302 .gso_type = VIRTIO_NET_HDR_GSO_NONE
303 };
304
305 size_t len, total_len = 0;
306 int err;
307 size_t hdr_size;
308 struct socket *sock = rcu_dereference(vq->private_data);
309 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
310 return;
311
312 use_mm(net->dev.mm);
313 mutex_lock(&vq->mutex);
314 vhost_disable_notify(vq);
315 hdr_size = vq->vhost_hlen;
316
317 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
318 vq->log : NULL;
319
320 for (;;) {
321 head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
322 ARRAY_SIZE(vq->iov),
323 &out, &in,
324 vq_log, &log);
325 /* On error, stop handling until the next kick. */
326 if (unlikely(head < 0))
327 break;
328 /* OK, now we need to know about added descriptors. */
329 if (head == vq->num) {
330 if (unlikely(vhost_enable_notify(vq))) {
331 /* They have slipped one in as we were
332 * doing that: check again. */
333 vhost_disable_notify(vq);
334 continue;
335 }
336 /* Nothing new? Wait for eventfd to tell us
337 * they refilled. */
338 break;
339 }
340 /* We don't need to be notified again. */
341 if (out) {
342 vq_err(vq, "Unexpected descriptor format for RX: "
343 "out %d, int %d\n",
344 out, in);
345 break;
346 }
347 /* Skip header. TODO: support TSO/mergeable rx buffers. */
348 s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
349 msg.msg_iovlen = in;
350 len = iov_length(vq->iov, in);
351 /* Sanity check */
352 if (!len) {
353 vq_err(vq, "Unexpected header len for RX: "
354 "%zd expected %zd\n",
355 iov_length(vq->hdr, s), hdr_size);
356 break;
357 }
358 err = sock->ops->recvmsg(NULL, sock, &msg,
359 len, MSG_DONTWAIT | MSG_TRUNC);
360 /* TODO: Check specific error and bomb out unless EAGAIN? */
361 if (err < 0) {
362 vhost_discard_vq_desc(vq, 1);
363 break;
364 }
365 /* TODO: Should check and handle checksum. */
366 if (err > len) {
367 pr_debug("Discarded truncated rx packet: "
368 " len %d > %zd\n", err, len);
369 vhost_discard_vq_desc(vq, 1);
370 continue;
371 }
372 len = err;
373 err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
374 if (err) {
375 vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
376 vq->iov->iov_base, err);
377 break;
378 }
379 len += hdr_size;
380 vhost_add_used_and_signal(&net->dev, vq, head, len);
381 if (unlikely(vq_log))
382 vhost_log_write(vq, vq_log, log, len);
383 total_len += len;
384 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
385 vhost_poll_queue(&vq->poll);
386 break;
387 }
388 }
389
390 mutex_unlock(&vq->mutex);
391 unuse_mm(net->dev.mm);
392}
393
394/* Expects to be always run from workqueue - which acts as
395 * read-size critical section for our kind of RCU. */
396static void handle_rx_mergeable(struct vhost_net *net)
397{ 291{
398 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; 292 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
399 unsigned uninitialized_var(in), log; 293 unsigned uninitialized_var(in), log;
@@ -406,43 +300,44 @@ static void handle_rx_mergeable(struct vhost_net *net)
406 .msg_iov = vq->iov, 300 .msg_iov = vq->iov,
407 .msg_flags = MSG_DONTWAIT, 301 .msg_flags = MSG_DONTWAIT,
408 }; 302 };
409
410 struct virtio_net_hdr_mrg_rxbuf hdr = { 303 struct virtio_net_hdr_mrg_rxbuf hdr = {
411 .hdr.flags = 0, 304 .hdr.flags = 0,
412 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE 305 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
413 }; 306 };
414
415 size_t total_len = 0; 307 size_t total_len = 0;
416 int err, headcount; 308 int err, headcount, mergeable;
417 size_t vhost_hlen, sock_hlen; 309 size_t vhost_hlen, sock_hlen;
418 size_t vhost_len, sock_len; 310 size_t vhost_len, sock_len;
419 struct socket *sock = rcu_dereference(vq->private_data); 311 /* TODO: check that we are running from vhost_worker? */
420 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) 312 struct socket *sock = rcu_dereference_check(vq->private_data, 1);
313
314 if (!sock)
421 return; 315 return;
422 316
423 use_mm(net->dev.mm);
424 mutex_lock(&vq->mutex); 317 mutex_lock(&vq->mutex);
425 vhost_disable_notify(vq); 318 vhost_disable_notify(&net->dev, vq);
426 vhost_hlen = vq->vhost_hlen; 319 vhost_hlen = vq->vhost_hlen;
427 sock_hlen = vq->sock_hlen; 320 sock_hlen = vq->sock_hlen;
428 321
429 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? 322 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
430 vq->log : NULL; 323 vq->log : NULL;
324 mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
431 325
432 while ((sock_len = peek_head_len(sock->sk))) { 326 while ((sock_len = peek_head_len(sock->sk))) {
433 sock_len += sock_hlen; 327 sock_len += sock_hlen;
434 vhost_len = sock_len + vhost_hlen; 328 vhost_len = sock_len + vhost_hlen;
435 headcount = get_rx_bufs(vq, vq->heads, vhost_len, 329 headcount = get_rx_bufs(vq, vq->heads, vhost_len,
436 &in, vq_log, &log); 330 &in, vq_log, &log,
331 likely(mergeable) ? UIO_MAXIOV : 1);
437 /* On error, stop handling until the next kick. */ 332 /* On error, stop handling until the next kick. */
438 if (unlikely(headcount < 0)) 333 if (unlikely(headcount < 0))
439 break; 334 break;
440 /* OK, now we need to know about added descriptors. */ 335 /* OK, now we need to know about added descriptors. */
441 if (!headcount) { 336 if (!headcount) {
442 if (unlikely(vhost_enable_notify(vq))) { 337 if (unlikely(vhost_enable_notify(&net->dev, vq))) {
443 /* They have slipped one in as we were 338 /* They have slipped one in as we were
444 * doing that: check again. */ 339 * doing that: check again. */
445 vhost_disable_notify(vq); 340 vhost_disable_notify(&net->dev, vq);
446 continue; 341 continue;
447 } 342 }
448 /* Nothing new? Wait for eventfd to tell us 343 /* Nothing new? Wait for eventfd to tell us
@@ -455,7 +350,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
455 move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); 350 move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
456 else 351 else
457 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: 352 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
458 * needed because sendmsg can modify msg_iov. */ 353 * needed because recvmsg can modify msg_iov. */
459 copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); 354 copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in);
460 msg.msg_iovlen = in; 355 msg.msg_iovlen = in;
461 err = sock->ops->recvmsg(NULL, sock, &msg, 356 err = sock->ops->recvmsg(NULL, sock, &msg,
@@ -477,7 +372,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
477 break; 372 break;
478 } 373 }
479 /* TODO: Should check and handle checksum. */ 374 /* TODO: Should check and handle checksum. */
480 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && 375 if (likely(mergeable) &&
481 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, 376 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
482 offsetof(typeof(hdr), num_buffers), 377 offsetof(typeof(hdr), num_buffers),
483 sizeof hdr.num_buffers)) { 378 sizeof hdr.num_buffers)) {
@@ -497,15 +392,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
497 } 392 }
498 393
499 mutex_unlock(&vq->mutex); 394 mutex_unlock(&vq->mutex);
500 unuse_mm(net->dev.mm);
501}
502
503static void handle_rx(struct vhost_net *net)
504{
505 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
506 handle_rx_mergeable(net);
507 else
508 handle_rx_big(net);
509} 395}
510 396
511static void handle_tx_kick(struct vhost_work *work) 397static void handle_tx_kick(struct vhost_work *work)
@@ -582,7 +468,10 @@ static void vhost_net_disable_vq(struct vhost_net *n,
582static void vhost_net_enable_vq(struct vhost_net *n, 468static void vhost_net_enable_vq(struct vhost_net *n,
583 struct vhost_virtqueue *vq) 469 struct vhost_virtqueue *vq)
584{ 470{
585 struct socket *sock = vq->private_data; 471 struct socket *sock;
472
473 sock = rcu_dereference_protected(vq->private_data,
474 lockdep_is_held(&vq->mutex));
586 if (!sock) 475 if (!sock)
587 return; 476 return;
588 if (vq == n->vqs + VHOST_NET_VQ_TX) { 477 if (vq == n->vqs + VHOST_NET_VQ_TX) {
@@ -598,7 +487,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
598 struct socket *sock; 487 struct socket *sock;
599 488
600 mutex_lock(&vq->mutex); 489 mutex_lock(&vq->mutex);
601 sock = vq->private_data; 490 sock = rcu_dereference_protected(vq->private_data,
491 lockdep_is_held(&vq->mutex));
602 vhost_net_disable_vq(n, vq); 492 vhost_net_disable_vq(n, vq);
603 rcu_assign_pointer(vq->private_data, NULL); 493 rcu_assign_pointer(vq->private_data, NULL);
604 mutex_unlock(&vq->mutex); 494 mutex_unlock(&vq->mutex);
@@ -652,6 +542,7 @@ static struct socket *get_raw_socket(int fd)
652 } uaddr; 542 } uaddr;
653 int uaddr_len = sizeof uaddr, r; 543 int uaddr_len = sizeof uaddr, r;
654 struct socket *sock = sockfd_lookup(fd, &r); 544 struct socket *sock = sockfd_lookup(fd, &r);
545
655 if (!sock) 546 if (!sock)
656 return ERR_PTR(-ENOTSOCK); 547 return ERR_PTR(-ENOTSOCK);
657 548
@@ -680,6 +571,7 @@ static struct socket *get_tap_socket(int fd)
680{ 571{
681 struct file *file = fget(fd); 572 struct file *file = fget(fd);
682 struct socket *sock; 573 struct socket *sock;
574
683 if (!file) 575 if (!file)
684 return ERR_PTR(-EBADF); 576 return ERR_PTR(-EBADF);
685 sock = tun_get_socket(file); 577 sock = tun_get_socket(file);
@@ -694,6 +586,7 @@ static struct socket *get_tap_socket(int fd)
694static struct socket *get_socket(int fd) 586static struct socket *get_socket(int fd)
695{ 587{
696 struct socket *sock; 588 struct socket *sock;
589
697 /* special case to disable backend */ 590 /* special case to disable backend */
698 if (fd == -1) 591 if (fd == -1)
699 return NULL; 592 return NULL;
@@ -736,11 +629,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
736 } 629 }
737 630
738 /* start polling new socket */ 631 /* start polling new socket */
739 oldsock = vq->private_data; 632 oldsock = rcu_dereference_protected(vq->private_data,
633 lockdep_is_held(&vq->mutex));
740 if (sock != oldsock) { 634 if (sock != oldsock) {
741 vhost_net_disable_vq(n, vq); 635 vhost_net_disable_vq(n, vq);
742 rcu_assign_pointer(vq->private_data, sock); 636 rcu_assign_pointer(vq->private_data, sock);
743 vhost_net_enable_vq(n, vq); 637 vhost_net_enable_vq(n, vq);
744 } 638 }
745 639
746 mutex_unlock(&vq->mutex); 640 mutex_unlock(&vq->mutex);
@@ -765,6 +659,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
765 struct socket *tx_sock = NULL; 659 struct socket *tx_sock = NULL;
766 struct socket *rx_sock = NULL; 660 struct socket *rx_sock = NULL;
767 long err; 661 long err;
662
768 mutex_lock(&n->dev.mutex); 663 mutex_lock(&n->dev.mutex);
769 err = vhost_dev_check_owner(&n->dev); 664 err = vhost_dev_check_owner(&n->dev);
770 if (err) 665 if (err)
@@ -826,6 +721,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
826 struct vhost_vring_file backend; 721 struct vhost_vring_file backend;
827 u64 features; 722 u64 features;
828 int r; 723 int r;
724
829 switch (ioctl) { 725 switch (ioctl) {
830 case VHOST_NET_SET_BACKEND: 726 case VHOST_NET_SET_BACKEND:
831 if (copy_from_user(&backend, argp, sizeof backend)) 727 if (copy_from_user(&backend, argp, sizeof backend))
@@ -869,6 +765,7 @@ static const struct file_operations vhost_net_fops = {
869 .compat_ioctl = vhost_net_compat_ioctl, 765 .compat_ioctl = vhost_net_compat_ioctl,
870#endif 766#endif
871 .open = vhost_net_open, 767 .open = vhost_net_open,
768 .llseek = noop_llseek,
872}; 769};
873 770
874static struct miscdevice vhost_net_misc = { 771static struct miscdevice vhost_net_misc = {