diff options
author | David S. Miller <davem@davemloft.net> | 2011-03-20 17:35:09 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-03-20 17:35:09 -0400 |
commit | 0e24d34a5b95226cfc335817aefd9cf9744e5659 (patch) | |
tree | 6d260dce843bd4b86debb778c6c7a0d9466369ef | |
parent | 1a0c83307d8211463df27af7c70465099c4979d3 (diff) | |
parent | de4d768a428d9de943dd6dc82bcd61742955cb6e (diff) |
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
-rw-r--r-- | drivers/vhost/net.c | 159 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 55 |
2 files changed, 64 insertions, 150 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f616cefc95ba..2f7c76a85e53 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to, | |||
60 | { | 60 | { |
61 | int seg = 0; | 61 | int seg = 0; |
62 | size_t size; | 62 | size_t size; |
63 | |||
63 | while (len && seg < iov_count) { | 64 | while (len && seg < iov_count) { |
64 | size = min(from->iov_len, len); | 65 | size = min(from->iov_len, len); |
65 | to->iov_base = from->iov_base; | 66 | to->iov_base = from->iov_base; |
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, | |||
79 | { | 80 | { |
80 | int seg = 0; | 81 | int seg = 0; |
81 | size_t size; | 82 | size_t size; |
83 | |||
82 | while (len && seg < iovcount) { | 84 | while (len && seg < iovcount) { |
83 | size = min(from->iov_len, len); | 85 | size = min(from->iov_len, len); |
84 | to->iov_base = from->iov_base; | 86 | to->iov_base = from->iov_base; |
@@ -211,12 +213,13 @@ static int peek_head_len(struct sock *sk) | |||
211 | { | 213 | { |
212 | struct sk_buff *head; | 214 | struct sk_buff *head; |
213 | int len = 0; | 215 | int len = 0; |
216 | unsigned long flags; | ||
214 | 217 | ||
215 | lock_sock(sk); | 218 | spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); |
216 | head = skb_peek(&sk->sk_receive_queue); | 219 | head = skb_peek(&sk->sk_receive_queue); |
217 | if (head) | 220 | if (likely(head)) |
218 | len = head->len; | 221 | len = head->len; |
219 | release_sock(sk); | 222 | spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); |
220 | return len; | 223 | return len; |
221 | } | 224 | } |
222 | 225 | ||
@@ -227,6 +230,7 @@ static int peek_head_len(struct sock *sk) | |||
227 | * @iovcount - returned count of io vectors we fill | 230 | * @iovcount - returned count of io vectors we fill |
228 | * @log - vhost log | 231 | * @log - vhost log |
229 | * @log_num - log offset | 232 | * @log_num - log offset |
233 | * @quota - headcount quota, 1 for big buffer | ||
230 | * returns number of buffer heads allocated, negative on error | 234 | * returns number of buffer heads allocated, negative on error |
231 | */ | 235 | */ |
232 | static int get_rx_bufs(struct vhost_virtqueue *vq, | 236 | static int get_rx_bufs(struct vhost_virtqueue *vq, |
@@ -234,7 +238,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, | |||
234 | int datalen, | 238 | int datalen, |
235 | unsigned *iovcount, | 239 | unsigned *iovcount, |
236 | struct vhost_log *log, | 240 | struct vhost_log *log, |
237 | unsigned *log_num) | 241 | unsigned *log_num, |
242 | unsigned int quota) | ||
238 | { | 243 | { |
239 | unsigned int out, in; | 244 | unsigned int out, in; |
240 | int seg = 0; | 245 | int seg = 0; |
@@ -242,7 +247,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, | |||
242 | unsigned d; | 247 | unsigned d; |
243 | int r, nlogs = 0; | 248 | int r, nlogs = 0; |
244 | 249 | ||
245 | while (datalen > 0) { | 250 | while (datalen > 0 && headcount < quota) { |
246 | if (unlikely(seg >= UIO_MAXIOV)) { | 251 | if (unlikely(seg >= UIO_MAXIOV)) { |
247 | r = -ENOBUFS; | 252 | r = -ENOBUFS; |
248 | goto err; | 253 | goto err; |
@@ -282,117 +287,7 @@ err: | |||
282 | 287 | ||
283 | /* Expects to be always run from workqueue - which acts as | 288 | /* Expects to be always run from workqueue - which acts as |
284 | * read-size critical section for our kind of RCU. */ | 289 | * read-size critical section for our kind of RCU. */ |
285 | static void handle_rx_big(struct vhost_net *net) | 290 | static void handle_rx(struct vhost_net *net) |
286 | { | ||
287 | struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; | ||
288 | unsigned out, in, log, s; | ||
289 | int head; | ||
290 | struct vhost_log *vq_log; | ||
291 | struct msghdr msg = { | ||
292 | .msg_name = NULL, | ||
293 | .msg_namelen = 0, | ||
294 | .msg_control = NULL, /* FIXME: get and handle RX aux data. */ | ||
295 | .msg_controllen = 0, | ||
296 | .msg_iov = vq->iov, | ||
297 | .msg_flags = MSG_DONTWAIT, | ||
298 | }; | ||
299 | |||
300 | struct virtio_net_hdr hdr = { | ||
301 | .flags = 0, | ||
302 | .gso_type = VIRTIO_NET_HDR_GSO_NONE | ||
303 | }; | ||
304 | |||
305 | size_t len, total_len = 0; | ||
306 | int err; | ||
307 | size_t hdr_size; | ||
308 | /* TODO: check that we are running from vhost_worker? */ | ||
309 | struct socket *sock = rcu_dereference_check(vq->private_data, 1); | ||
310 | if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) | ||
311 | return; | ||
312 | |||
313 | mutex_lock(&vq->mutex); | ||
314 | vhost_disable_notify(vq); | ||
315 | hdr_size = vq->vhost_hlen; | ||
316 | |||
317 | vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? | ||
318 | vq->log : NULL; | ||
319 | |||
320 | for (;;) { | ||
321 | head = vhost_get_vq_desc(&net->dev, vq, vq->iov, | ||
322 | ARRAY_SIZE(vq->iov), | ||
323 | &out, &in, | ||
324 | vq_log, &log); | ||
325 | /* On error, stop handling until the next kick. */ | ||
326 | if (unlikely(head < 0)) | ||
327 | break; | ||
328 | /* OK, now we need to know about added descriptors. */ | ||
329 | if (head == vq->num) { | ||
330 | if (unlikely(vhost_enable_notify(vq))) { | ||
331 | /* They have slipped one in as we were | ||
332 | * doing that: check again. */ | ||
333 | vhost_disable_notify(vq); | ||
334 | continue; | ||
335 | } | ||
336 | /* Nothing new? Wait for eventfd to tell us | ||
337 | * they refilled. */ | ||
338 | break; | ||
339 | } | ||
340 | /* We don't need to be notified again. */ | ||
341 | if (out) { | ||
342 | vq_err(vq, "Unexpected descriptor format for RX: " | ||
343 | "out %d, int %d\n", | ||
344 | out, in); | ||
345 | break; | ||
346 | } | ||
347 | /* Skip header. TODO: support TSO/mergeable rx buffers. */ | ||
348 | s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); | ||
349 | msg.msg_iovlen = in; | ||
350 | len = iov_length(vq->iov, in); | ||
351 | /* Sanity check */ | ||
352 | if (!len) { | ||
353 | vq_err(vq, "Unexpected header len for RX: " | ||
354 | "%zd expected %zd\n", | ||
355 | iov_length(vq->hdr, s), hdr_size); | ||
356 | break; | ||
357 | } | ||
358 | err = sock->ops->recvmsg(NULL, sock, &msg, | ||
359 | len, MSG_DONTWAIT | MSG_TRUNC); | ||
360 | /* TODO: Check specific error and bomb out unless EAGAIN? */ | ||
361 | if (err < 0) { | ||
362 | vhost_discard_vq_desc(vq, 1); | ||
363 | break; | ||
364 | } | ||
365 | /* TODO: Should check and handle checksum. */ | ||
366 | if (err > len) { | ||
367 | pr_debug("Discarded truncated rx packet: " | ||
368 | " len %d > %zd\n", err, len); | ||
369 | vhost_discard_vq_desc(vq, 1); | ||
370 | continue; | ||
371 | } | ||
372 | len = err; | ||
373 | err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size); | ||
374 | if (err) { | ||
375 | vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n", | ||
376 | vq->iov->iov_base, err); | ||
377 | break; | ||
378 | } | ||
379 | len += hdr_size; | ||
380 | vhost_add_used_and_signal(&net->dev, vq, head, len); | ||
381 | if (unlikely(vq_log)) | ||
382 | vhost_log_write(vq, vq_log, log, len); | ||
383 | total_len += len; | ||
384 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { | ||
385 | vhost_poll_queue(&vq->poll); | ||
386 | break; | ||
387 | } | ||
388 | } | ||
389 | |||
390 | mutex_unlock(&vq->mutex); | ||
391 | } | ||
392 | |||
393 | /* Expects to be always run from workqueue - which acts as | ||
394 | * read-size critical section for our kind of RCU. */ | ||
395 | static void handle_rx_mergeable(struct vhost_net *net) | ||
396 | { | 291 | { |
397 | struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; | 292 | struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; |
398 | unsigned uninitialized_var(in), log; | 293 | unsigned uninitialized_var(in), log; |
@@ -405,19 +300,18 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
405 | .msg_iov = vq->iov, | 300 | .msg_iov = vq->iov, |
406 | .msg_flags = MSG_DONTWAIT, | 301 | .msg_flags = MSG_DONTWAIT, |
407 | }; | 302 | }; |
408 | |||
409 | struct virtio_net_hdr_mrg_rxbuf hdr = { | 303 | struct virtio_net_hdr_mrg_rxbuf hdr = { |
410 | .hdr.flags = 0, | 304 | .hdr.flags = 0, |
411 | .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE | 305 | .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE |
412 | }; | 306 | }; |
413 | |||
414 | size_t total_len = 0; | 307 | size_t total_len = 0; |
415 | int err, headcount; | 308 | int err, headcount, mergeable; |
416 | size_t vhost_hlen, sock_hlen; | 309 | size_t vhost_hlen, sock_hlen; |
417 | size_t vhost_len, sock_len; | 310 | size_t vhost_len, sock_len; |
418 | /* TODO: check that we are running from vhost_worker? */ | 311 | /* TODO: check that we are running from vhost_worker? */ |
419 | struct socket *sock = rcu_dereference_check(vq->private_data, 1); | 312 | struct socket *sock = rcu_dereference_check(vq->private_data, 1); |
420 | if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) | 313 | |
314 | if (!sock) | ||
421 | return; | 315 | return; |
422 | 316 | ||
423 | mutex_lock(&vq->mutex); | 317 | mutex_lock(&vq->mutex); |
@@ -427,12 +321,14 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
427 | 321 | ||
428 | vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? | 322 | vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? |
429 | vq->log : NULL; | 323 | vq->log : NULL; |
324 | mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF); | ||
430 | 325 | ||
431 | while ((sock_len = peek_head_len(sock->sk))) { | 326 | while ((sock_len = peek_head_len(sock->sk))) { |
432 | sock_len += sock_hlen; | 327 | sock_len += sock_hlen; |
433 | vhost_len = sock_len + vhost_hlen; | 328 | vhost_len = sock_len + vhost_hlen; |
434 | headcount = get_rx_bufs(vq, vq->heads, vhost_len, | 329 | headcount = get_rx_bufs(vq, vq->heads, vhost_len, |
435 | &in, vq_log, &log); | 330 | &in, vq_log, &log, |
331 | likely(mergeable) ? UIO_MAXIOV : 1); | ||
436 | /* On error, stop handling until the next kick. */ | 332 | /* On error, stop handling until the next kick. */ |
437 | if (unlikely(headcount < 0)) | 333 | if (unlikely(headcount < 0)) |
438 | break; | 334 | break; |
@@ -476,7 +372,7 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
476 | break; | 372 | break; |
477 | } | 373 | } |
478 | /* TODO: Should check and handle checksum. */ | 374 | /* TODO: Should check and handle checksum. */ |
479 | if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && | 375 | if (likely(mergeable) && |
480 | memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, | 376 | memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, |
481 | offsetof(typeof(hdr), num_buffers), | 377 | offsetof(typeof(hdr), num_buffers), |
482 | sizeof hdr.num_buffers)) { | 378 | sizeof hdr.num_buffers)) { |
@@ -498,14 +394,6 @@ static void handle_rx_mergeable(struct vhost_net *net) | |||
498 | mutex_unlock(&vq->mutex); | 394 | mutex_unlock(&vq->mutex); |
499 | } | 395 | } |
500 | 396 | ||
501 | static void handle_rx(struct vhost_net *net) | ||
502 | { | ||
503 | if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) | ||
504 | handle_rx_mergeable(net); | ||
505 | else | ||
506 | handle_rx_big(net); | ||
507 | } | ||
508 | |||
509 | static void handle_tx_kick(struct vhost_work *work) | 397 | static void handle_tx_kick(struct vhost_work *work) |
510 | { | 398 | { |
511 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, | 399 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, |
@@ -654,6 +542,7 @@ static struct socket *get_raw_socket(int fd) | |||
654 | } uaddr; | 542 | } uaddr; |
655 | int uaddr_len = sizeof uaddr, r; | 543 | int uaddr_len = sizeof uaddr, r; |
656 | struct socket *sock = sockfd_lookup(fd, &r); | 544 | struct socket *sock = sockfd_lookup(fd, &r); |
545 | |||
657 | if (!sock) | 546 | if (!sock) |
658 | return ERR_PTR(-ENOTSOCK); | 547 | return ERR_PTR(-ENOTSOCK); |
659 | 548 | ||
@@ -682,6 +571,7 @@ static struct socket *get_tap_socket(int fd) | |||
682 | { | 571 | { |
683 | struct file *file = fget(fd); | 572 | struct file *file = fget(fd); |
684 | struct socket *sock; | 573 | struct socket *sock; |
574 | |||
685 | if (!file) | 575 | if (!file) |
686 | return ERR_PTR(-EBADF); | 576 | return ERR_PTR(-EBADF); |
687 | sock = tun_get_socket(file); | 577 | sock = tun_get_socket(file); |
@@ -696,6 +586,7 @@ static struct socket *get_tap_socket(int fd) | |||
696 | static struct socket *get_socket(int fd) | 586 | static struct socket *get_socket(int fd) |
697 | { | 587 | { |
698 | struct socket *sock; | 588 | struct socket *sock; |
589 | |||
699 | /* special case to disable backend */ | 590 | /* special case to disable backend */ |
700 | if (fd == -1) | 591 | if (fd == -1) |
701 | return NULL; | 592 | return NULL; |
@@ -741,9 +632,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
741 | oldsock = rcu_dereference_protected(vq->private_data, | 632 | oldsock = rcu_dereference_protected(vq->private_data, |
742 | lockdep_is_held(&vq->mutex)); | 633 | lockdep_is_held(&vq->mutex)); |
743 | if (sock != oldsock) { | 634 | if (sock != oldsock) { |
744 | vhost_net_disable_vq(n, vq); | 635 | vhost_net_disable_vq(n, vq); |
745 | rcu_assign_pointer(vq->private_data, sock); | 636 | rcu_assign_pointer(vq->private_data, sock); |
746 | vhost_net_enable_vq(n, vq); | 637 | vhost_net_enable_vq(n, vq); |
747 | } | 638 | } |
748 | 639 | ||
749 | mutex_unlock(&vq->mutex); | 640 | mutex_unlock(&vq->mutex); |
@@ -768,6 +659,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) | |||
768 | struct socket *tx_sock = NULL; | 659 | struct socket *tx_sock = NULL; |
769 | struct socket *rx_sock = NULL; | 660 | struct socket *rx_sock = NULL; |
770 | long err; | 661 | long err; |
662 | |||
771 | mutex_lock(&n->dev.mutex); | 663 | mutex_lock(&n->dev.mutex); |
772 | err = vhost_dev_check_owner(&n->dev); | 664 | err = vhost_dev_check_owner(&n->dev); |
773 | if (err) | 665 | if (err) |
@@ -829,6 +721,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, | |||
829 | struct vhost_vring_file backend; | 721 | struct vhost_vring_file backend; |
830 | u64 features; | 722 | u64 features; |
831 | int r; | 723 | int r; |
724 | |||
832 | switch (ioctl) { | 725 | switch (ioctl) { |
833 | case VHOST_NET_SET_BACKEND: | 726 | case VHOST_NET_SET_BACKEND: |
834 | if (copy_from_user(&backend, argp, sizeof backend)) | 727 | if (copy_from_user(&backend, argp, sizeof backend)) |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ade0568c07a4..2ab291241635 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -41,8 +41,8 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | |||
41 | poll_table *pt) | 41 | poll_table *pt) |
42 | { | 42 | { |
43 | struct vhost_poll *poll; | 43 | struct vhost_poll *poll; |
44 | poll = container_of(pt, struct vhost_poll, table); | ||
45 | 44 | ||
45 | poll = container_of(pt, struct vhost_poll, table); | ||
46 | poll->wqh = wqh; | 46 | poll->wqh = wqh; |
47 | add_wait_queue(wqh, &poll->wait); | 47 | add_wait_queue(wqh, &poll->wait); |
48 | } | 48 | } |
@@ -85,6 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, | |||
85 | void vhost_poll_start(struct vhost_poll *poll, struct file *file) | 85 | void vhost_poll_start(struct vhost_poll *poll, struct file *file) |
86 | { | 86 | { |
87 | unsigned long mask; | 87 | unsigned long mask; |
88 | |||
88 | mask = file->f_op->poll(file, &poll->table); | 89 | mask = file->f_op->poll(file, &poll->table); |
89 | if (mask) | 90 | if (mask) |
90 | vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); | 91 | vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); |
@@ -101,6 +102,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, | |||
101 | unsigned seq) | 102 | unsigned seq) |
102 | { | 103 | { |
103 | int left; | 104 | int left; |
105 | |||
104 | spin_lock_irq(&dev->work_lock); | 106 | spin_lock_irq(&dev->work_lock); |
105 | left = seq - work->done_seq; | 107 | left = seq - work->done_seq; |
106 | spin_unlock_irq(&dev->work_lock); | 108 | spin_unlock_irq(&dev->work_lock); |
@@ -222,6 +224,7 @@ static int vhost_worker(void *data) | |||
222 | static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) | 224 | static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) |
223 | { | 225 | { |
224 | int i; | 226 | int i; |
227 | |||
225 | for (i = 0; i < dev->nvqs; ++i) { | 228 | for (i = 0; i < dev->nvqs; ++i) { |
226 | dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * | 229 | dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * |
227 | UIO_MAXIOV, GFP_KERNEL); | 230 | UIO_MAXIOV, GFP_KERNEL); |
@@ -235,6 +238,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) | |||
235 | goto err_nomem; | 238 | goto err_nomem; |
236 | } | 239 | } |
237 | return 0; | 240 | return 0; |
241 | |||
238 | err_nomem: | 242 | err_nomem: |
239 | for (; i >= 0; --i) { | 243 | for (; i >= 0; --i) { |
240 | kfree(dev->vqs[i].indirect); | 244 | kfree(dev->vqs[i].indirect); |
@@ -247,6 +251,7 @@ err_nomem: | |||
247 | static void vhost_dev_free_iovecs(struct vhost_dev *dev) | 251 | static void vhost_dev_free_iovecs(struct vhost_dev *dev) |
248 | { | 252 | { |
249 | int i; | 253 | int i; |
254 | |||
250 | for (i = 0; i < dev->nvqs; ++i) { | 255 | for (i = 0; i < dev->nvqs; ++i) { |
251 | kfree(dev->vqs[i].indirect); | 256 | kfree(dev->vqs[i].indirect); |
252 | dev->vqs[i].indirect = NULL; | 257 | dev->vqs[i].indirect = NULL; |
@@ -296,26 +301,28 @@ long vhost_dev_check_owner(struct vhost_dev *dev) | |||
296 | } | 301 | } |
297 | 302 | ||
298 | struct vhost_attach_cgroups_struct { | 303 | struct vhost_attach_cgroups_struct { |
299 | struct vhost_work work; | 304 | struct vhost_work work; |
300 | struct task_struct *owner; | 305 | struct task_struct *owner; |
301 | int ret; | 306 | int ret; |
302 | }; | 307 | }; |
303 | 308 | ||
304 | static void vhost_attach_cgroups_work(struct vhost_work *work) | 309 | static void vhost_attach_cgroups_work(struct vhost_work *work) |
305 | { | 310 | { |
306 | struct vhost_attach_cgroups_struct *s; | 311 | struct vhost_attach_cgroups_struct *s; |
307 | s = container_of(work, struct vhost_attach_cgroups_struct, work); | 312 | |
308 | s->ret = cgroup_attach_task_all(s->owner, current); | 313 | s = container_of(work, struct vhost_attach_cgroups_struct, work); |
314 | s->ret = cgroup_attach_task_all(s->owner, current); | ||
309 | } | 315 | } |
310 | 316 | ||
311 | static int vhost_attach_cgroups(struct vhost_dev *dev) | 317 | static int vhost_attach_cgroups(struct vhost_dev *dev) |
312 | { | 318 | { |
313 | struct vhost_attach_cgroups_struct attach; | 319 | struct vhost_attach_cgroups_struct attach; |
314 | attach.owner = current; | 320 | |
315 | vhost_work_init(&attach.work, vhost_attach_cgroups_work); | 321 | attach.owner = current; |
316 | vhost_work_queue(dev, &attach.work); | 322 | vhost_work_init(&attach.work, vhost_attach_cgroups_work); |
317 | vhost_work_flush(dev, &attach.work); | 323 | vhost_work_queue(dev, &attach.work); |
318 | return attach.ret; | 324 | vhost_work_flush(dev, &attach.work); |
325 | return attach.ret; | ||
319 | } | 326 | } |
320 | 327 | ||
321 | /* Caller should have device mutex */ | 328 | /* Caller should have device mutex */ |
@@ -323,11 +330,13 @@ static long vhost_dev_set_owner(struct vhost_dev *dev) | |||
323 | { | 330 | { |
324 | struct task_struct *worker; | 331 | struct task_struct *worker; |
325 | int err; | 332 | int err; |
333 | |||
326 | /* Is there an owner already? */ | 334 | /* Is there an owner already? */ |
327 | if (dev->mm) { | 335 | if (dev->mm) { |
328 | err = -EBUSY; | 336 | err = -EBUSY; |
329 | goto err_mm; | 337 | goto err_mm; |
330 | } | 338 | } |
339 | |||
331 | /* No owner, become one */ | 340 | /* No owner, become one */ |
332 | dev->mm = get_task_mm(current); | 341 | dev->mm = get_task_mm(current); |
333 | worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); | 342 | worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); |
@@ -380,6 +389,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) | |||
380 | void vhost_dev_cleanup(struct vhost_dev *dev) | 389 | void vhost_dev_cleanup(struct vhost_dev *dev) |
381 | { | 390 | { |
382 | int i; | 391 | int i; |
392 | |||
383 | for (i = 0; i < dev->nvqs; ++i) { | 393 | for (i = 0; i < dev->nvqs; ++i) { |
384 | if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { | 394 | if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { |
385 | vhost_poll_stop(&dev->vqs[i].poll); | 395 | vhost_poll_stop(&dev->vqs[i].poll); |
@@ -421,6 +431,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
421 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) | 431 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) |
422 | { | 432 | { |
423 | u64 a = addr / VHOST_PAGE_SIZE / 8; | 433 | u64 a = addr / VHOST_PAGE_SIZE / 8; |
434 | |||
424 | /* Make sure 64 bit math will not overflow. */ | 435 | /* Make sure 64 bit math will not overflow. */ |
425 | if (a > ULONG_MAX - (unsigned long)log_base || | 436 | if (a > ULONG_MAX - (unsigned long)log_base || |
426 | a + (unsigned long)log_base > ULONG_MAX) | 437 | a + (unsigned long)log_base > ULONG_MAX) |
@@ -461,6 +472,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, | |||
461 | int log_all) | 472 | int log_all) |
462 | { | 473 | { |
463 | int i; | 474 | int i; |
475 | |||
464 | for (i = 0; i < d->nvqs; ++i) { | 476 | for (i = 0; i < d->nvqs; ++i) { |
465 | int ok; | 477 | int ok; |
466 | mutex_lock(&d->vqs[i].mutex); | 478 | mutex_lock(&d->vqs[i].mutex); |
@@ -527,6 +539,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | |||
527 | { | 539 | { |
528 | struct vhost_memory mem, *newmem, *oldmem; | 540 | struct vhost_memory mem, *newmem, *oldmem; |
529 | unsigned long size = offsetof(struct vhost_memory, regions); | 541 | unsigned long size = offsetof(struct vhost_memory, regions); |
542 | |||
530 | if (copy_from_user(&mem, m, size)) | 543 | if (copy_from_user(&mem, m, size)) |
531 | return -EFAULT; | 544 | return -EFAULT; |
532 | if (mem.padding) | 545 | if (mem.padding) |
@@ -544,7 +557,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | |||
544 | return -EFAULT; | 557 | return -EFAULT; |
545 | } | 558 | } |
546 | 559 | ||
547 | if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) { | 560 | if (!memory_access_ok(d, newmem, |
561 | vhost_has_feature(d, VHOST_F_LOG_ALL))) { | ||
548 | kfree(newmem); | 562 | kfree(newmem); |
549 | return -EFAULT; | 563 | return -EFAULT; |
550 | } | 564 | } |
@@ -560,6 +574,7 @@ static int init_used(struct vhost_virtqueue *vq, | |||
560 | struct vring_used __user *used) | 574 | struct vring_used __user *used) |
561 | { | 575 | { |
562 | int r = put_user(vq->used_flags, &used->flags); | 576 | int r = put_user(vq->used_flags, &used->flags); |
577 | |||
563 | if (r) | 578 | if (r) |
564 | return r; | 579 | return r; |
565 | return get_user(vq->last_used_idx, &used->idx); | 580 | return get_user(vq->last_used_idx, &used->idx); |
@@ -849,6 +864,7 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem, | |||
849 | { | 864 | { |
850 | struct vhost_memory_region *reg; | 865 | struct vhost_memory_region *reg; |
851 | int i; | 866 | int i; |
867 | |||
852 | /* linear search is not brilliant, but we really have on the order of 6 | 868 | /* linear search is not brilliant, but we really have on the order of 6 |
853 | * regions in practice */ | 869 | * regions in practice */ |
854 | for (i = 0; i < mem->nregions; ++i) { | 870 | for (i = 0; i < mem->nregions; ++i) { |
@@ -871,6 +887,7 @@ static int set_bit_to_user(int nr, void __user *addr) | |||
871 | void *base; | 887 | void *base; |
872 | int bit = nr + (log % PAGE_SIZE) * 8; | 888 | int bit = nr + (log % PAGE_SIZE) * 8; |
873 | int r; | 889 | int r; |
890 | |||
874 | r = get_user_pages_fast(log, 1, 1, &page); | 891 | r = get_user_pages_fast(log, 1, 1, &page); |
875 | if (r < 0) | 892 | if (r < 0) |
876 | return r; | 893 | return r; |
@@ -888,6 +905,7 @@ static int log_write(void __user *log_base, | |||
888 | { | 905 | { |
889 | u64 write_page = write_address / VHOST_PAGE_SIZE; | 906 | u64 write_page = write_address / VHOST_PAGE_SIZE; |
890 | int r; | 907 | int r; |
908 | |||
891 | if (!write_length) | 909 | if (!write_length) |
892 | return 0; | 910 | return 0; |
893 | write_length += write_address % VHOST_PAGE_SIZE; | 911 | write_length += write_address % VHOST_PAGE_SIZE; |
@@ -1037,8 +1055,8 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1037 | i, count); | 1055 | i, count); |
1038 | return -EINVAL; | 1056 | return -EINVAL; |
1039 | } | 1057 | } |
1040 | if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect, | 1058 | if (unlikely(memcpy_fromiovec((unsigned char *)&desc, |
1041 | sizeof desc))) { | 1059 | vq->indirect, sizeof desc))) { |
1042 | vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", | 1060 | vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", |
1043 | i, (size_t)indirect->addr + i * sizeof desc); | 1061 | i, (size_t)indirect->addr + i * sizeof desc); |
1044 | return -EINVAL; | 1062 | return -EINVAL; |
@@ -1153,7 +1171,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1153 | i, vq->num, head); | 1171 | i, vq->num, head); |
1154 | return -EINVAL; | 1172 | return -EINVAL; |
1155 | } | 1173 | } |
1156 | ret = copy_from_user(&desc, vq->desc + i, sizeof desc); | 1174 | ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); |
1157 | if (unlikely(ret)) { | 1175 | if (unlikely(ret)) { |
1158 | vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", | 1176 | vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", |
1159 | i, vq->desc + i); | 1177 | i, vq->desc + i); |
@@ -1317,6 +1335,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, | |||
1317 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) | 1335 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
1318 | { | 1336 | { |
1319 | __u16 flags; | 1337 | __u16 flags; |
1338 | |||
1320 | /* Flush out used index updates. This is paired | 1339 | /* Flush out used index updates. This is paired |
1321 | * with the barrier that the Guest executes when enabling | 1340 | * with the barrier that the Guest executes when enabling |
1322 | * interrupts. */ | 1341 | * interrupts. */ |
@@ -1361,6 +1380,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) | |||
1361 | { | 1380 | { |
1362 | u16 avail_idx; | 1381 | u16 avail_idx; |
1363 | int r; | 1382 | int r; |
1383 | |||
1364 | if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) | 1384 | if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) |
1365 | return false; | 1385 | return false; |
1366 | vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; | 1386 | vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; |
@@ -1387,6 +1407,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) | |||
1387 | void vhost_disable_notify(struct vhost_virtqueue *vq) | 1407 | void vhost_disable_notify(struct vhost_virtqueue *vq) |
1388 | { | 1408 | { |
1389 | int r; | 1409 | int r; |
1410 | |||
1390 | if (vq->used_flags & VRING_USED_F_NO_NOTIFY) | 1411 | if (vq->used_flags & VRING_USED_F_NO_NOTIFY) |
1391 | return; | 1412 | return; |
1392 | vq->used_flags |= VRING_USED_F_NO_NOTIFY; | 1413 | vq->used_flags |= VRING_USED_F_NO_NOTIFY; |