diff options
author | David S. Miller <davem@davemloft.net> | 2011-03-20 17:35:09 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-03-20 17:35:09 -0400 |
commit | 0e24d34a5b95226cfc335817aefd9cf9744e5659 (patch) | |
tree | 6d260dce843bd4b86debb778c6c7a0d9466369ef /drivers/vhost/net.c | |
parent | 1a0c83307d8211463df27af7c70465099c4979d3 (diff) | |
parent | de4d768a428d9de943dd6dc82bcd61742955cb6e (diff) |
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r-- | drivers/vhost/net.c | 159 |
1 files changed, 26 insertions, 133 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f616cefc95ba..2f7c76a85e53 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to, | |||
60 | { | 60 | { |
61 | int seg = 0; | 61 | int seg = 0; |
62 | size_t size; | 62 | size_t size; |
63 | |||
63 | while (len && seg < iov_count) { | 64 | while (len && seg < iov_count) { |
64 | size = min(from->iov_len, len); | 65 | size = min(from->iov_len, len); |
65 | to->iov_base = from->iov_base; | 66 | to->iov_base = from->iov_base; |
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, | |||
79 | { | 80 | { |
80 | int seg = 0; | 81 | int seg = 0; |
81 | size_t size; | 82 | size_t size; |
83 | |||
82 | while (len && seg < iovcount) { | 84 | while (len && seg < iovcount) { |
83 | size = min(from->iov_len, len); | 85 | size = min(from->iov_len, len); |
84 | to->iov_base = from->iov_base; | 86 | to->iov_base = from->iov_base; |
@@ -211,12 +213,13 @@ static int peek_head_len(struct sock *sk) | |||
211 | { | 213 | { |
212 | struct sk_buff *head; | 214 | struct sk_buff *head; |
213 | int len = 0; | 215 | int len = 0; |
216 | unsigned long flags; | ||
214 | 217 | ||
215 | lock_sock(sk); | 218 | spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); |
216 | head = skb_peek(&sk->sk_receive_queue); | 219 | head = skb_peek(&sk->sk_receive_queue); |
217 | if (head) | 220 | if (likely(head)) |
218 | len = head->len; | 221 | len = head->len; |
219 | release_sock(sk); | 222 | spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); |
220 | return len; | 223 | return len; |
221 | } | 224 | } |
222 | 225 | ||
@@ -227,6 +230,7 @@ static int peek_head_len(struct sock *sk) | |||
227 | * @iovcount - returned count of io vectors we fill | 230 | * @iovcount - returned count of io vectors we fill |
228 | * @log - vhost log | 231 | * @log - vhost log |
229 | * @log_num - log offset | 232 | * @log_num - log offset |
233 | * @quota - headcount quota, 1 for big buffer | ||
230 | * returns number of buffer heads allocated, negative on error | 234 | * returns number of buffer heads allocated, negative on error |
231 | */ | 235 | */ |
232 | static int get_rx_bufs(struct vhost_virtqueue *vq, | 236 | static int get_rx_bufs(struct vhost_virtqueue *vq, |
@@ -234,7 +238,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, | |||
234 | int datalen, | 238 | int datalen, |
235 | unsigned *iovcount, | 239 | unsigned *iovcount, |
236 | struct vhost_log *log, | 240 | struct vhost_log *log, |
237 | unsigned *log_num) | 241 | unsigned *log_num, |
242 | unsigned int quota) | ||
238 | { | 243 | { |
239 | unsigned int out, in; | 244 | unsigned int out, in; |
240 | int seg = 0; | 245 | int seg = 0; |
@@ -242,7 +247,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, | |||
242 | unsigned d; | 247 | unsigned d; |
243 | int r, nlogs = 0; | 248 | int r, nlogs = 0; |
244 | 249 | ||
245 | while (datalen > 0) { | 250 | while (datalen > 0 && headcount < quota) { |
246 | if (unlikely(seg >= UIO_MAXIOV)) { | 251 | if (unlikely(seg >= UIO_MAXIOV)) { |
247 | r = -ENOBUFS; | 252 | r = -ENOBUFS; |
248 | goto err; | 253 | goto err; |
@@ -282,117 +287,7 @@ err: | |||
282 | 287 | ||
283 | /* Expects to be always run from workqueue - which acts as | 288 | /* Expects to be always run from workqueue - which acts as |
284 | * read-size critical section for our kind of RCU. */ | 289 | * read-size critical section for our kind of RCU. */ |
285 | static void handle_rx_big(struct vhost_net *net) | 290 | static void handle_rx(struct vhost_net *net) |
286 | { | ||
287 | struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; | ||
288 | unsigned out, in, log, s; | ||
289 | int head; | ||
290 | struct vhost_log *vq_log; | ||
291 | struct msghdr msg = { | ||
292 | .msg_name = NULL, | ||
293 | .msg_namelen = 0, | ||
294 | .msg_control = NULL, /* FIXME: get and handle RX aux data. */ | ||
295 | .msg_controllen = 0, | ||
296 | .msg_iov = vq->iov, | ||
297 | .msg_flags = MSG_DONTWAIT, | ||
298 | }; | ||
299 | |||
300 | struct virtio_net_hdr hdr = { | ||
301 | .flags = 0, | ||
302 | .gso_type = VIRTIO_NET_HDR_GSO_NONE | ||
303 | }; | ||
304 | |||
305 | size_t len, total_len = 0; | ||
306 | int err; | ||
307 | size_t hdr_size; | ||
308 | /* TODO: check that we are running from vhost_worker? */ | ||
309 | struct socket *sock = rcu_dereference_check(vq->private_data, 1); | ||
310 | if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) | ||
311 | return; | ||
312 | |||
313 | mutex_lock(&vq->mutex); | ||
314 | vhost_disable_notify(vq); | ||
315 | hdr_size = vq->vhost_hlen; | ||
316 | |||
317 | vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? | ||
318 | vq->log : NULL; | ||
319 | |||
320 | for (;;) { | ||
321 | head = vhost_get_vq_desc(&net->dev, vq, vq->iov, | ||
322 | ARRAY_SIZE(vq->iov), | ||
323 | &out, &in, | ||
324 | vq_log, &log); | ||
325 | /* On error, stop handling until the next kick. */ | ||
326 | if (unlikely(head < 0)) | ||
327 | break; | ||
328 | /* OK, now we need to know about added descriptors. */ | ||
329 | if (head == vq->num) { | ||
330 | if (unlikely(vhost_enable_notify(vq))) { | ||
331 | /* They have slipped one in as we were | ||
332 | * doing that: check again. */ | ||
333 | vhost_disable_notify(vq); | ||
334 | continue; | ||
335 | } | ||
336 | /* Nothing new? Wait for eventfd to tell us | ||
337 | * they refilled. */ | ||
338 | break; | ||
339 | } | ||
340 | /* We don't need to be notified again. */ | ||
341 | if (out) { | ||
342 | vq_err(vq, "Unexpected descriptor format for RX: " | ||
343 | "out %d, int %d\n", | ||
344 | out, in); | ||
345 | break; | ||
346 | } | ||
347 | /* Skip header. TODO: support TSO/mergeable rx buffers. */ | ||
348 | s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); | ||
349 | msg.msg_iovlen = in; | ||
350 | len = iov_length(vq->iov, in); | ||
351 | /* Sanity check */ | ||
352 | if (!len) { | ||
353 | vq_err(vq, "Unexpected header len for RX: " | ||
354 | "%zd expected %zd\n", | ||
355 | iov_length(vq->hdr, s), hdr_size); | ||
356 | break; | ||
357 | } | ||
358 | err = sock->ops->recvmsg(NULL, sock, &msg, | ||
359 | len, MSG_DONTWAIT | MSG_TRUNC); | ||
360 | /* TODO: Check specific error and bomb out unless EAGAIN? */ | ||
361 | if (err < 0) { | ||
362 | vhost_discard_vq_desc(vq, 1); | ||
363 | break; | ||
364 | } | ||
365 | /* TODO: Should check and handle checksum. */ | ||
366 | if (err > len) { | ||
367 | pr_debug("Discarded truncated rx packet: " | ||
368 | " len %d > %zd\n", err, len); | ||
369 | vhost_discard_vq_desc(vq, 1); | ||
370 | continue; | ||
371 | } | ||
372 | len = err; | ||
373 | err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size); | ||
374 | if (err) { | ||
375 | vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n", | ||
376 | vq->iov->iov_base, err); | ||
377 | break; | ||
378 | } | ||
379 | len += hdr_size; | ||
380 | vhost_add_used_and_signal(&net->dev, vq, head, len); | ||
381 | if (unlikely(vq_log)) | ||
382 | vhost_log_write(vq, vq_log, log, len); | ||
383 | total_len += len; | ||
384 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { | ||
385 | vhost_poll_queue(&vq->poll); | ||
386 | break; | ||
387 | } | ||
388 | } | ||
389 | |||
390 | mutex_unlock(&vq->mutex); | ||
391 | } | ||
392 | |||
393 | /* Expects to be always run from workqueue - which acts as | ||
394 | * read-size critical section for our kind of RCU. */ | ||
395 | static void handle_rx_mergeable(struct vhost_net *net) | ||
396 | { | 291 | { |
397 | struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; | 292 | struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; |
398 | unsigned uninitialized_var(in), log; | 293 | unsigned uninitialized_var(in), log; |
@@ -405,19 +300,18 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
405 | .msg_iov = vq->iov, | 300 | .msg_iov = vq->iov, |
406 | .msg_flags = MSG_DONTWAIT, | 301 | .msg_flags = MSG_DONTWAIT, |
407 | }; | 302 | }; |
408 | |||
409 | struct virtio_net_hdr_mrg_rxbuf hdr = { | 303 | struct virtio_net_hdr_mrg_rxbuf hdr = { |
410 | .hdr.flags = 0, | 304 | .hdr.flags = 0, |
411 | .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE | 305 | .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE |
412 | }; | 306 | }; |
413 | |||
414 | size_t total_len = 0; | 307 | size_t total_len = 0; |
415 | int err, headcount; | 308 | int err, headcount, mergeable; |
416 | size_t vhost_hlen, sock_hlen; | 309 | size_t vhost_hlen, sock_hlen; |
417 | size_t vhost_len, sock_len; | 310 | size_t vhost_len, sock_len; |
418 | /* TODO: check that we are running from vhost_worker? */ | 311 | /* TODO: check that we are running from vhost_worker? */ |
419 | struct socket *sock = rcu_dereference_check(vq->private_data, 1); | 312 | struct socket *sock = rcu_dereference_check(vq->private_data, 1); |
420 | if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) | 313 | |
314 | if (!sock) | ||
421 | return; | 315 | return; |
422 | 316 | ||
423 | mutex_lock(&vq->mutex); | 317 | mutex_lock(&vq->mutex); |
@@ -427,12 +321,14 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
427 | 321 | ||
428 | vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? | 322 | vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? |
429 | vq->log : NULL; | 323 | vq->log : NULL; |
324 | mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF); | ||
430 | 325 | ||
431 | while ((sock_len = peek_head_len(sock->sk))) { | 326 | while ((sock_len = peek_head_len(sock->sk))) { |
432 | sock_len += sock_hlen; | 327 | sock_len += sock_hlen; |
433 | vhost_len = sock_len + vhost_hlen; | 328 | vhost_len = sock_len + vhost_hlen; |
434 | headcount = get_rx_bufs(vq, vq->heads, vhost_len, | 329 | headcount = get_rx_bufs(vq, vq->heads, vhost_len, |
435 | &in, vq_log, &log); | 330 | &in, vq_log, &log, |
331 | likely(mergeable) ? UIO_MAXIOV : 1); | ||
436 | /* On error, stop handling until the next kick. */ | 332 | /* On error, stop handling until the next kick. */ |
437 | if (unlikely(headcount < 0)) | 333 | if (unlikely(headcount < 0)) |
438 | break; | 334 | break; |
@@ -476,7 +372,7 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
476 | break; | 372 | break; |
477 | } | 373 | } |
478 | /* TODO: Should check and handle checksum. */ | 374 | /* TODO: Should check and handle checksum. */ |
479 | if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && | 375 | if (likely(mergeable) && |
480 | memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, | 376 | memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, |
481 | offsetof(typeof(hdr), num_buffers), | 377 | offsetof(typeof(hdr), num_buffers), |
482 | sizeof hdr.num_buffers)) { | 378 | sizeof hdr.num_buffers)) { |
@@ -498,14 +394,6 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
498 | mutex_unlock(&vq->mutex); | 394 | mutex_unlock(&vq->mutex); |
499 | } | 395 | } |
500 | 396 | ||
501 | static void handle_rx(struct vhost_net *net) | ||
502 | { | ||
503 | if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) | ||
504 | handle_rx_mergeable(net); | ||
505 | else | ||
506 | handle_rx_big(net); | ||
507 | } | ||
508 | |||
509 | static void handle_tx_kick(struct vhost_work *work) | 397 | static void handle_tx_kick(struct vhost_work *work) |
510 | { | 398 | { |
511 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, | 399 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, |
@@ -654,6 +542,7 @@ static struct socket *get_raw_socket(int fd) | |||
654 | } uaddr; | 542 | } uaddr; |
655 | int uaddr_len = sizeof uaddr, r; | 543 | int uaddr_len = sizeof uaddr, r; |
656 | struct socket *sock = sockfd_lookup(fd, &r); | 544 | struct socket *sock = sockfd_lookup(fd, &r); |
545 | |||
657 | if (!sock) | 546 | if (!sock) |
658 | return ERR_PTR(-ENOTSOCK); | 547 | return ERR_PTR(-ENOTSOCK); |
659 | 548 | ||
@@ -682,6 +571,7 @@ static struct socket *get_tap_socket(int fd) | |||
682 | { | 571 | { |
683 | struct file *file = fget(fd); | 572 | struct file *file = fget(fd); |
684 | struct socket *sock; | 573 | struct socket *sock; |
574 | |||
685 | if (!file) | 575 | if (!file) |
686 | return ERR_PTR(-EBADF); | 576 | return ERR_PTR(-EBADF); |
687 | sock = tun_get_socket(file); | 577 | sock = tun_get_socket(file); |
@@ -696,6 +586,7 @@ static struct socket *get_tap_socket(int fd) | |||
696 | static struct socket *get_socket(int fd) | 586 | static struct socket *get_socket(int fd) |
697 | { | 587 | { |
698 | struct socket *sock; | 588 | struct socket *sock; |
589 | |||
699 | /* special case to disable backend */ | 590 | /* special case to disable backend */ |
700 | if (fd == -1) | 591 | if (fd == -1) |
701 | return NULL; | 592 | return NULL; |
@@ -741,9 +632,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
741 | oldsock = rcu_dereference_protected(vq->private_data, | 632 | oldsock = rcu_dereference_protected(vq->private_data, |
742 | lockdep_is_held(&vq->mutex)); | 633 | lockdep_is_held(&vq->mutex)); |
743 | if (sock != oldsock) { | 634 | if (sock != oldsock) { |
744 | vhost_net_disable_vq(n, vq); | 635 | vhost_net_disable_vq(n, vq); |
745 | rcu_assign_pointer(vq->private_data, sock); | 636 | rcu_assign_pointer(vq->private_data, sock); |
746 | vhost_net_enable_vq(n, vq); | 637 | vhost_net_enable_vq(n, vq); |
747 | } | 638 | } |
748 | 639 | ||
749 | mutex_unlock(&vq->mutex); | 640 | mutex_unlock(&vq->mutex); |
@@ -768,6 +659,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) | |||
768 | struct socket *tx_sock = NULL; | 659 | struct socket *tx_sock = NULL; |
769 | struct socket *rx_sock = NULL; | 660 | struct socket *rx_sock = NULL; |
770 | long err; | 661 | long err; |
662 | |||
771 | mutex_lock(&n->dev.mutex); | 663 | mutex_lock(&n->dev.mutex); |
772 | err = vhost_dev_check_owner(&n->dev); | 664 | err = vhost_dev_check_owner(&n->dev); |
773 | if (err) | 665 | if (err) |
@@ -829,6 +721,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, | |||
829 | struct vhost_vring_file backend; | 721 | struct vhost_vring_file backend; |
830 | u64 features; | 722 | u64 features; |
831 | int r; | 723 | int r; |
724 | |||
832 | switch (ioctl) { | 725 | switch (ioctl) { |
833 | case VHOST_NET_SET_BACKEND: | 726 | case VHOST_NET_SET_BACKEND: |
834 | if (copy_from_user(&backend, argp, sizeof backend)) | 727 | if (copy_from_user(&backend, argp, sizeof backend)) |