aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost/net.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-03-20 17:35:09 -0400
committerDavid S. Miller <davem@davemloft.net>2011-03-20 17:35:09 -0400
commit0e24d34a5b95226cfc335817aefd9cf9744e5659 (patch)
tree6d260dce843bd4b86debb778c6c7a0d9466369ef /drivers/vhost/net.c
parent1a0c83307d8211463df27af7c70465099c4979d3 (diff)
parentde4d768a428d9de943dd6dc82bcd61742955cb6e (diff)
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r--drivers/vhost/net.c159
1 files changed, 26 insertions, 133 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f616cefc95ba..2f7c76a85e53 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to,
60{ 60{
61 int seg = 0; 61 int seg = 0;
62 size_t size; 62 size_t size;
63
63 while (len && seg < iov_count) { 64 while (len && seg < iov_count) {
64 size = min(from->iov_len, len); 65 size = min(from->iov_len, len);
65 to->iov_base = from->iov_base; 66 to->iov_base = from->iov_base;
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
79{ 80{
80 int seg = 0; 81 int seg = 0;
81 size_t size; 82 size_t size;
83
82 while (len && seg < iovcount) { 84 while (len && seg < iovcount) {
83 size = min(from->iov_len, len); 85 size = min(from->iov_len, len);
84 to->iov_base = from->iov_base; 86 to->iov_base = from->iov_base;
@@ -211,12 +213,13 @@ static int peek_head_len(struct sock *sk)
211{ 213{
212 struct sk_buff *head; 214 struct sk_buff *head;
213 int len = 0; 215 int len = 0;
216 unsigned long flags;
214 217
215 lock_sock(sk); 218 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
216 head = skb_peek(&sk->sk_receive_queue); 219 head = skb_peek(&sk->sk_receive_queue);
217 if (head) 220 if (likely(head))
218 len = head->len; 221 len = head->len;
219 release_sock(sk); 222 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
220 return len; 223 return len;
221} 224}
222 225
@@ -227,6 +230,7 @@ static int peek_head_len(struct sock *sk)
227 * @iovcount - returned count of io vectors we fill 230 * @iovcount - returned count of io vectors we fill
228 * @log - vhost log 231 * @log - vhost log
229 * @log_num - log offset 232 * @log_num - log offset
233 * @quota - headcount quota, 1 for big buffer
230 * returns number of buffer heads allocated, negative on error 234 * returns number of buffer heads allocated, negative on error
231 */ 235 */
232static int get_rx_bufs(struct vhost_virtqueue *vq, 236static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -234,7 +238,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
234 int datalen, 238 int datalen,
235 unsigned *iovcount, 239 unsigned *iovcount,
236 struct vhost_log *log, 240 struct vhost_log *log,
237 unsigned *log_num) 241 unsigned *log_num,
242 unsigned int quota)
238{ 243{
239 unsigned int out, in; 244 unsigned int out, in;
240 int seg = 0; 245 int seg = 0;
@@ -242,7 +247,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
242 unsigned d; 247 unsigned d;
243 int r, nlogs = 0; 248 int r, nlogs = 0;
244 249
245 while (datalen > 0) { 250 while (datalen > 0 && headcount < quota) {
246 if (unlikely(seg >= UIO_MAXIOV)) { 251 if (unlikely(seg >= UIO_MAXIOV)) {
247 r = -ENOBUFS; 252 r = -ENOBUFS;
248 goto err; 253 goto err;
@@ -282,117 +287,7 @@ err:
282 287
283/* Expects to be always run from workqueue - which acts as 288/* Expects to be always run from workqueue - which acts as
284 * read-size critical section for our kind of RCU. */ 289 * read-size critical section for our kind of RCU. */
285static void handle_rx_big(struct vhost_net *net) 290static void handle_rx(struct vhost_net *net)
286{
287 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
288 unsigned out, in, log, s;
289 int head;
290 struct vhost_log *vq_log;
291 struct msghdr msg = {
292 .msg_name = NULL,
293 .msg_namelen = 0,
294 .msg_control = NULL, /* FIXME: get and handle RX aux data. */
295 .msg_controllen = 0,
296 .msg_iov = vq->iov,
297 .msg_flags = MSG_DONTWAIT,
298 };
299
300 struct virtio_net_hdr hdr = {
301 .flags = 0,
302 .gso_type = VIRTIO_NET_HDR_GSO_NONE
303 };
304
305 size_t len, total_len = 0;
306 int err;
307 size_t hdr_size;
308 /* TODO: check that we are running from vhost_worker? */
309 struct socket *sock = rcu_dereference_check(vq->private_data, 1);
310 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
311 return;
312
313 mutex_lock(&vq->mutex);
314 vhost_disable_notify(vq);
315 hdr_size = vq->vhost_hlen;
316
317 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
318 vq->log : NULL;
319
320 for (;;) {
321 head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
322 ARRAY_SIZE(vq->iov),
323 &out, &in,
324 vq_log, &log);
325 /* On error, stop handling until the next kick. */
326 if (unlikely(head < 0))
327 break;
328 /* OK, now we need to know about added descriptors. */
329 if (head == vq->num) {
330 if (unlikely(vhost_enable_notify(vq))) {
331 /* They have slipped one in as we were
332 * doing that: check again. */
333 vhost_disable_notify(vq);
334 continue;
335 }
336 /* Nothing new? Wait for eventfd to tell us
337 * they refilled. */
338 break;
339 }
340 /* We don't need to be notified again. */
341 if (out) {
342 vq_err(vq, "Unexpected descriptor format for RX: "
343 "out %d, int %d\n",
344 out, in);
345 break;
346 }
347 /* Skip header. TODO: support TSO/mergeable rx buffers. */
348 s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
349 msg.msg_iovlen = in;
350 len = iov_length(vq->iov, in);
351 /* Sanity check */
352 if (!len) {
353 vq_err(vq, "Unexpected header len for RX: "
354 "%zd expected %zd\n",
355 iov_length(vq->hdr, s), hdr_size);
356 break;
357 }
358 err = sock->ops->recvmsg(NULL, sock, &msg,
359 len, MSG_DONTWAIT | MSG_TRUNC);
360 /* TODO: Check specific error and bomb out unless EAGAIN? */
361 if (err < 0) {
362 vhost_discard_vq_desc(vq, 1);
363 break;
364 }
365 /* TODO: Should check and handle checksum. */
366 if (err > len) {
367 pr_debug("Discarded truncated rx packet: "
368 " len %d > %zd\n", err, len);
369 vhost_discard_vq_desc(vq, 1);
370 continue;
371 }
372 len = err;
373 err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
374 if (err) {
375 vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
376 vq->iov->iov_base, err);
377 break;
378 }
379 len += hdr_size;
380 vhost_add_used_and_signal(&net->dev, vq, head, len);
381 if (unlikely(vq_log))
382 vhost_log_write(vq, vq_log, log, len);
383 total_len += len;
384 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
385 vhost_poll_queue(&vq->poll);
386 break;
387 }
388 }
389
390 mutex_unlock(&vq->mutex);
391}
392
393/* Expects to be always run from workqueue - which acts as
394 * read-size critical section for our kind of RCU. */
395static void handle_rx_mergeable(struct vhost_net *net)
396{ 291{
397 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; 292 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
398 unsigned uninitialized_var(in), log; 293 unsigned uninitialized_var(in), log;
@@ -405,19 +300,18 @@ static void handle_rx_mergeable(struct vhost_net *net)
405 .msg_iov = vq->iov, 300 .msg_iov = vq->iov,
406 .msg_flags = MSG_DONTWAIT, 301 .msg_flags = MSG_DONTWAIT,
407 }; 302 };
408
409 struct virtio_net_hdr_mrg_rxbuf hdr = { 303 struct virtio_net_hdr_mrg_rxbuf hdr = {
410 .hdr.flags = 0, 304 .hdr.flags = 0,
411 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE 305 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
412 }; 306 };
413
414 size_t total_len = 0; 307 size_t total_len = 0;
415 int err, headcount; 308 int err, headcount, mergeable;
416 size_t vhost_hlen, sock_hlen; 309 size_t vhost_hlen, sock_hlen;
417 size_t vhost_len, sock_len; 310 size_t vhost_len, sock_len;
418 /* TODO: check that we are running from vhost_worker? */ 311 /* TODO: check that we are running from vhost_worker? */
419 struct socket *sock = rcu_dereference_check(vq->private_data, 1); 312 struct socket *sock = rcu_dereference_check(vq->private_data, 1);
420 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) 313
314 if (!sock)
421 return; 315 return;
422 316
423 mutex_lock(&vq->mutex); 317 mutex_lock(&vq->mutex);
@@ -427,12 +321,14 @@ static void handle_rx_mergeable(struct vhost_net *net)
427 321
428 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? 322 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
429 vq->log : NULL; 323 vq->log : NULL;
324 mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
430 325
431 while ((sock_len = peek_head_len(sock->sk))) { 326 while ((sock_len = peek_head_len(sock->sk))) {
432 sock_len += sock_hlen; 327 sock_len += sock_hlen;
433 vhost_len = sock_len + vhost_hlen; 328 vhost_len = sock_len + vhost_hlen;
434 headcount = get_rx_bufs(vq, vq->heads, vhost_len, 329 headcount = get_rx_bufs(vq, vq->heads, vhost_len,
435 &in, vq_log, &log); 330 &in, vq_log, &log,
331 likely(mergeable) ? UIO_MAXIOV : 1);
436 /* On error, stop handling until the next kick. */ 332 /* On error, stop handling until the next kick. */
437 if (unlikely(headcount < 0)) 333 if (unlikely(headcount < 0))
438 break; 334 break;
@@ -476,7 +372,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
476 break; 372 break;
477 } 373 }
478 /* TODO: Should check and handle checksum. */ 374 /* TODO: Should check and handle checksum. */
479 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && 375 if (likely(mergeable) &&
480 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, 376 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
481 offsetof(typeof(hdr), num_buffers), 377 offsetof(typeof(hdr), num_buffers),
482 sizeof hdr.num_buffers)) { 378 sizeof hdr.num_buffers)) {
@@ -498,14 +394,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
498 mutex_unlock(&vq->mutex); 394 mutex_unlock(&vq->mutex);
499} 395}
500 396
501static void handle_rx(struct vhost_net *net)
502{
503 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
504 handle_rx_mergeable(net);
505 else
506 handle_rx_big(net);
507}
508
509static void handle_tx_kick(struct vhost_work *work) 397static void handle_tx_kick(struct vhost_work *work)
510{ 398{
511 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 399 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -654,6 +542,7 @@ static struct socket *get_raw_socket(int fd)
654 } uaddr; 542 } uaddr;
655 int uaddr_len = sizeof uaddr, r; 543 int uaddr_len = sizeof uaddr, r;
656 struct socket *sock = sockfd_lookup(fd, &r); 544 struct socket *sock = sockfd_lookup(fd, &r);
545
657 if (!sock) 546 if (!sock)
658 return ERR_PTR(-ENOTSOCK); 547 return ERR_PTR(-ENOTSOCK);
659 548
@@ -682,6 +571,7 @@ static struct socket *get_tap_socket(int fd)
682{ 571{
683 struct file *file = fget(fd); 572 struct file *file = fget(fd);
684 struct socket *sock; 573 struct socket *sock;
574
685 if (!file) 575 if (!file)
686 return ERR_PTR(-EBADF); 576 return ERR_PTR(-EBADF);
687 sock = tun_get_socket(file); 577 sock = tun_get_socket(file);
@@ -696,6 +586,7 @@ static struct socket *get_tap_socket(int fd)
696static struct socket *get_socket(int fd) 586static struct socket *get_socket(int fd)
697{ 587{
698 struct socket *sock; 588 struct socket *sock;
589
699 /* special case to disable backend */ 590 /* special case to disable backend */
700 if (fd == -1) 591 if (fd == -1)
701 return NULL; 592 return NULL;
@@ -741,9 +632,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
741 oldsock = rcu_dereference_protected(vq->private_data, 632 oldsock = rcu_dereference_protected(vq->private_data,
742 lockdep_is_held(&vq->mutex)); 633 lockdep_is_held(&vq->mutex));
743 if (sock != oldsock) { 634 if (sock != oldsock) {
744 vhost_net_disable_vq(n, vq); 635 vhost_net_disable_vq(n, vq);
745 rcu_assign_pointer(vq->private_data, sock); 636 rcu_assign_pointer(vq->private_data, sock);
746 vhost_net_enable_vq(n, vq); 637 vhost_net_enable_vq(n, vq);
747 } 638 }
748 639
749 mutex_unlock(&vq->mutex); 640 mutex_unlock(&vq->mutex);
@@ -768,6 +659,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
768 struct socket *tx_sock = NULL; 659 struct socket *tx_sock = NULL;
769 struct socket *rx_sock = NULL; 660 struct socket *rx_sock = NULL;
770 long err; 661 long err;
662
771 mutex_lock(&n->dev.mutex); 663 mutex_lock(&n->dev.mutex);
772 err = vhost_dev_check_owner(&n->dev); 664 err = vhost_dev_check_owner(&n->dev);
773 if (err) 665 if (err)
@@ -829,6 +721,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
829 struct vhost_vring_file backend; 721 struct vhost_vring_file backend;
830 u64 features; 722 u64 features;
831 int r; 723 int r;
724
832 switch (ioctl) { 725 switch (ioctl) {
833 case VHOST_NET_SET_BACKEND: 726 case VHOST_NET_SET_BACKEND:
834 if (copy_from_user(&backend, argp, sizeof backend)) 727 if (copy_from_user(&backend, argp, sizeof backend))