aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-03-20 17:35:09 -0400
committerDavid S. Miller <davem@davemloft.net>2011-03-20 17:35:09 -0400
commit0e24d34a5b95226cfc335817aefd9cf9744e5659 (patch)
tree6d260dce843bd4b86debb778c6c7a0d9466369ef
parent1a0c83307d8211463df27af7c70465099c4979d3 (diff)
parentde4d768a428d9de943dd6dc82bcd61742955cb6e (diff)
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
-rw-r--r--drivers/vhost/net.c159
-rw-r--r--drivers/vhost/vhost.c55
2 files changed, 64 insertions, 150 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f616cefc95ba..2f7c76a85e53 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to,
60{ 60{
61 int seg = 0; 61 int seg = 0;
62 size_t size; 62 size_t size;
63
63 while (len && seg < iov_count) { 64 while (len && seg < iov_count) {
64 size = min(from->iov_len, len); 65 size = min(from->iov_len, len);
65 to->iov_base = from->iov_base; 66 to->iov_base = from->iov_base;
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
79{ 80{
80 int seg = 0; 81 int seg = 0;
81 size_t size; 82 size_t size;
83
82 while (len && seg < iovcount) { 84 while (len && seg < iovcount) {
83 size = min(from->iov_len, len); 85 size = min(from->iov_len, len);
84 to->iov_base = from->iov_base; 86 to->iov_base = from->iov_base;
@@ -211,12 +213,13 @@ static int peek_head_len(struct sock *sk)
211{ 213{
212 struct sk_buff *head; 214 struct sk_buff *head;
213 int len = 0; 215 int len = 0;
216 unsigned long flags;
214 217
215 lock_sock(sk); 218 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
216 head = skb_peek(&sk->sk_receive_queue); 219 head = skb_peek(&sk->sk_receive_queue);
217 if (head) 220 if (likely(head))
218 len = head->len; 221 len = head->len;
219 release_sock(sk); 222 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
220 return len; 223 return len;
221} 224}
222 225
@@ -227,6 +230,7 @@ static int peek_head_len(struct sock *sk)
227 * @iovcount - returned count of io vectors we fill 230 * @iovcount - returned count of io vectors we fill
228 * @log - vhost log 231 * @log - vhost log
229 * @log_num - log offset 232 * @log_num - log offset
233 * @quota - headcount quota, 1 for big buffer
230 * returns number of buffer heads allocated, negative on error 234 * returns number of buffer heads allocated, negative on error
231 */ 235 */
232static int get_rx_bufs(struct vhost_virtqueue *vq, 236static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -234,7 +238,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
234 int datalen, 238 int datalen,
235 unsigned *iovcount, 239 unsigned *iovcount,
236 struct vhost_log *log, 240 struct vhost_log *log,
237 unsigned *log_num) 241 unsigned *log_num,
242 unsigned int quota)
238{ 243{
239 unsigned int out, in; 244 unsigned int out, in;
240 int seg = 0; 245 int seg = 0;
@@ -242,7 +247,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
242 unsigned d; 247 unsigned d;
243 int r, nlogs = 0; 248 int r, nlogs = 0;
244 249
245 while (datalen > 0) { 250 while (datalen > 0 && headcount < quota) {
246 if (unlikely(seg >= UIO_MAXIOV)) { 251 if (unlikely(seg >= UIO_MAXIOV)) {
247 r = -ENOBUFS; 252 r = -ENOBUFS;
248 goto err; 253 goto err;
@@ -282,117 +287,7 @@ err:
282 287
283/* Expects to be always run from workqueue - which acts as 288/* Expects to be always run from workqueue - which acts as
284 * read-size critical section for our kind of RCU. */ 289 * read-size critical section for our kind of RCU. */
285static void handle_rx_big(struct vhost_net *net) 290static void handle_rx(struct vhost_net *net)
286{
287 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
288 unsigned out, in, log, s;
289 int head;
290 struct vhost_log *vq_log;
291 struct msghdr msg = {
292 .msg_name = NULL,
293 .msg_namelen = 0,
294 .msg_control = NULL, /* FIXME: get and handle RX aux data. */
295 .msg_controllen = 0,
296 .msg_iov = vq->iov,
297 .msg_flags = MSG_DONTWAIT,
298 };
299
300 struct virtio_net_hdr hdr = {
301 .flags = 0,
302 .gso_type = VIRTIO_NET_HDR_GSO_NONE
303 };
304
305 size_t len, total_len = 0;
306 int err;
307 size_t hdr_size;
308 /* TODO: check that we are running from vhost_worker? */
309 struct socket *sock = rcu_dereference_check(vq->private_data, 1);
310 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
311 return;
312
313 mutex_lock(&vq->mutex);
314 vhost_disable_notify(vq);
315 hdr_size = vq->vhost_hlen;
316
317 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
318 vq->log : NULL;
319
320 for (;;) {
321 head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
322 ARRAY_SIZE(vq->iov),
323 &out, &in,
324 vq_log, &log);
325 /* On error, stop handling until the next kick. */
326 if (unlikely(head < 0))
327 break;
328 /* OK, now we need to know about added descriptors. */
329 if (head == vq->num) {
330 if (unlikely(vhost_enable_notify(vq))) {
331 /* They have slipped one in as we were
332 * doing that: check again. */
333 vhost_disable_notify(vq);
334 continue;
335 }
336 /* Nothing new? Wait for eventfd to tell us
337 * they refilled. */
338 break;
339 }
340 /* We don't need to be notified again. */
341 if (out) {
342 vq_err(vq, "Unexpected descriptor format for RX: "
343 "out %d, int %d\n",
344 out, in);
345 break;
346 }
347 /* Skip header. TODO: support TSO/mergeable rx buffers. */
348 s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
349 msg.msg_iovlen = in;
350 len = iov_length(vq->iov, in);
351 /* Sanity check */
352 if (!len) {
353 vq_err(vq, "Unexpected header len for RX: "
354 "%zd expected %zd\n",
355 iov_length(vq->hdr, s), hdr_size);
356 break;
357 }
358 err = sock->ops->recvmsg(NULL, sock, &msg,
359 len, MSG_DONTWAIT | MSG_TRUNC);
360 /* TODO: Check specific error and bomb out unless EAGAIN? */
361 if (err < 0) {
362 vhost_discard_vq_desc(vq, 1);
363 break;
364 }
365 /* TODO: Should check and handle checksum. */
366 if (err > len) {
367 pr_debug("Discarded truncated rx packet: "
368 " len %d > %zd\n", err, len);
369 vhost_discard_vq_desc(vq, 1);
370 continue;
371 }
372 len = err;
373 err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
374 if (err) {
375 vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
376 vq->iov->iov_base, err);
377 break;
378 }
379 len += hdr_size;
380 vhost_add_used_and_signal(&net->dev, vq, head, len);
381 if (unlikely(vq_log))
382 vhost_log_write(vq, vq_log, log, len);
383 total_len += len;
384 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
385 vhost_poll_queue(&vq->poll);
386 break;
387 }
388 }
389
390 mutex_unlock(&vq->mutex);
391}
392
393/* Expects to be always run from workqueue - which acts as
394 * read-size critical section for our kind of RCU. */
395static void handle_rx_mergeable(struct vhost_net *net)
396{ 291{
397 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; 292 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
398 unsigned uninitialized_var(in), log; 293 unsigned uninitialized_var(in), log;
@@ -405,19 +300,18 @@ static void handle_rx_mergeable(struct vhost_net *net)
405 .msg_iov = vq->iov, 300 .msg_iov = vq->iov,
406 .msg_flags = MSG_DONTWAIT, 301 .msg_flags = MSG_DONTWAIT,
407 }; 302 };
408
409 struct virtio_net_hdr_mrg_rxbuf hdr = { 303 struct virtio_net_hdr_mrg_rxbuf hdr = {
410 .hdr.flags = 0, 304 .hdr.flags = 0,
411 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE 305 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
412 }; 306 };
413
414 size_t total_len = 0; 307 size_t total_len = 0;
415 int err, headcount; 308 int err, headcount, mergeable;
416 size_t vhost_hlen, sock_hlen; 309 size_t vhost_hlen, sock_hlen;
417 size_t vhost_len, sock_len; 310 size_t vhost_len, sock_len;
418 /* TODO: check that we are running from vhost_worker? */ 311 /* TODO: check that we are running from vhost_worker? */
419 struct socket *sock = rcu_dereference_check(vq->private_data, 1); 312 struct socket *sock = rcu_dereference_check(vq->private_data, 1);
420 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) 313
314 if (!sock)
421 return; 315 return;
422 316
423 mutex_lock(&vq->mutex); 317 mutex_lock(&vq->mutex);
@@ -427,12 +321,14 @@ static void handle_rx_mergeable(struct vhost_net *net)
427 321
428 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? 322 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
429 vq->log : NULL; 323 vq->log : NULL;
324 mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
430 325
431 while ((sock_len = peek_head_len(sock->sk))) { 326 while ((sock_len = peek_head_len(sock->sk))) {
432 sock_len += sock_hlen; 327 sock_len += sock_hlen;
433 vhost_len = sock_len + vhost_hlen; 328 vhost_len = sock_len + vhost_hlen;
434 headcount = get_rx_bufs(vq, vq->heads, vhost_len, 329 headcount = get_rx_bufs(vq, vq->heads, vhost_len,
435 &in, vq_log, &log); 330 &in, vq_log, &log,
331 likely(mergeable) ? UIO_MAXIOV : 1);
436 /* On error, stop handling until the next kick. */ 332 /* On error, stop handling until the next kick. */
437 if (unlikely(headcount < 0)) 333 if (unlikely(headcount < 0))
438 break; 334 break;
@@ -476,7 +372,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
476 break; 372 break;
477 } 373 }
478 /* TODO: Should check and handle checksum. */ 374 /* TODO: Should check and handle checksum. */
479 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && 375 if (likely(mergeable) &&
480 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, 376 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
481 offsetof(typeof(hdr), num_buffers), 377 offsetof(typeof(hdr), num_buffers),
482 sizeof hdr.num_buffers)) { 378 sizeof hdr.num_buffers)) {
@@ -498,14 +394,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
498 mutex_unlock(&vq->mutex); 394 mutex_unlock(&vq->mutex);
499} 395}
500 396
501static void handle_rx(struct vhost_net *net)
502{
503 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
504 handle_rx_mergeable(net);
505 else
506 handle_rx_big(net);
507}
508
509static void handle_tx_kick(struct vhost_work *work) 397static void handle_tx_kick(struct vhost_work *work)
510{ 398{
511 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 399 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -654,6 +542,7 @@ static struct socket *get_raw_socket(int fd)
654 } uaddr; 542 } uaddr;
655 int uaddr_len = sizeof uaddr, r; 543 int uaddr_len = sizeof uaddr, r;
656 struct socket *sock = sockfd_lookup(fd, &r); 544 struct socket *sock = sockfd_lookup(fd, &r);
545
657 if (!sock) 546 if (!sock)
658 return ERR_PTR(-ENOTSOCK); 547 return ERR_PTR(-ENOTSOCK);
659 548
@@ -682,6 +571,7 @@ static struct socket *get_tap_socket(int fd)
682{ 571{
683 struct file *file = fget(fd); 572 struct file *file = fget(fd);
684 struct socket *sock; 573 struct socket *sock;
574
685 if (!file) 575 if (!file)
686 return ERR_PTR(-EBADF); 576 return ERR_PTR(-EBADF);
687 sock = tun_get_socket(file); 577 sock = tun_get_socket(file);
@@ -696,6 +586,7 @@ static struct socket *get_tap_socket(int fd)
696static struct socket *get_socket(int fd) 586static struct socket *get_socket(int fd)
697{ 587{
698 struct socket *sock; 588 struct socket *sock;
589
699 /* special case to disable backend */ 590 /* special case to disable backend */
700 if (fd == -1) 591 if (fd == -1)
701 return NULL; 592 return NULL;
@@ -741,9 +632,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
741 oldsock = rcu_dereference_protected(vq->private_data, 632 oldsock = rcu_dereference_protected(vq->private_data,
742 lockdep_is_held(&vq->mutex)); 633 lockdep_is_held(&vq->mutex));
743 if (sock != oldsock) { 634 if (sock != oldsock) {
744 vhost_net_disable_vq(n, vq); 635 vhost_net_disable_vq(n, vq);
745 rcu_assign_pointer(vq->private_data, sock); 636 rcu_assign_pointer(vq->private_data, sock);
746 vhost_net_enable_vq(n, vq); 637 vhost_net_enable_vq(n, vq);
747 } 638 }
748 639
749 mutex_unlock(&vq->mutex); 640 mutex_unlock(&vq->mutex);
@@ -768,6 +659,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
768 struct socket *tx_sock = NULL; 659 struct socket *tx_sock = NULL;
769 struct socket *rx_sock = NULL; 660 struct socket *rx_sock = NULL;
770 long err; 661 long err;
662
771 mutex_lock(&n->dev.mutex); 663 mutex_lock(&n->dev.mutex);
772 err = vhost_dev_check_owner(&n->dev); 664 err = vhost_dev_check_owner(&n->dev);
773 if (err) 665 if (err)
@@ -829,6 +721,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
829 struct vhost_vring_file backend; 721 struct vhost_vring_file backend;
830 u64 features; 722 u64 features;
831 int r; 723 int r;
724
832 switch (ioctl) { 725 switch (ioctl) {
833 case VHOST_NET_SET_BACKEND: 726 case VHOST_NET_SET_BACKEND:
834 if (copy_from_user(&backend, argp, sizeof backend)) 727 if (copy_from_user(&backend, argp, sizeof backend))
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ade0568c07a4..2ab291241635 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -41,8 +41,8 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
41 poll_table *pt) 41 poll_table *pt)
42{ 42{
43 struct vhost_poll *poll; 43 struct vhost_poll *poll;
44 poll = container_of(pt, struct vhost_poll, table);
45 44
45 poll = container_of(pt, struct vhost_poll, table);
46 poll->wqh = wqh; 46 poll->wqh = wqh;
47 add_wait_queue(wqh, &poll->wait); 47 add_wait_queue(wqh, &poll->wait);
48} 48}
@@ -85,6 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
85void vhost_poll_start(struct vhost_poll *poll, struct file *file) 85void vhost_poll_start(struct vhost_poll *poll, struct file *file)
86{ 86{
87 unsigned long mask; 87 unsigned long mask;
88
88 mask = file->f_op->poll(file, &poll->table); 89 mask = file->f_op->poll(file, &poll->table);
89 if (mask) 90 if (mask)
90 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); 91 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
@@ -101,6 +102,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
101 unsigned seq) 102 unsigned seq)
102{ 103{
103 int left; 104 int left;
105
104 spin_lock_irq(&dev->work_lock); 106 spin_lock_irq(&dev->work_lock);
105 left = seq - work->done_seq; 107 left = seq - work->done_seq;
106 spin_unlock_irq(&dev->work_lock); 108 spin_unlock_irq(&dev->work_lock);
@@ -222,6 +224,7 @@ static int vhost_worker(void *data)
222static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) 224static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
223{ 225{
224 int i; 226 int i;
227
225 for (i = 0; i < dev->nvqs; ++i) { 228 for (i = 0; i < dev->nvqs; ++i) {
226 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * 229 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
227 UIO_MAXIOV, GFP_KERNEL); 230 UIO_MAXIOV, GFP_KERNEL);
@@ -235,6 +238,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
235 goto err_nomem; 238 goto err_nomem;
236 } 239 }
237 return 0; 240 return 0;
241
238err_nomem: 242err_nomem:
239 for (; i >= 0; --i) { 243 for (; i >= 0; --i) {
240 kfree(dev->vqs[i].indirect); 244 kfree(dev->vqs[i].indirect);
@@ -247,6 +251,7 @@ err_nomem:
247static void vhost_dev_free_iovecs(struct vhost_dev *dev) 251static void vhost_dev_free_iovecs(struct vhost_dev *dev)
248{ 252{
249 int i; 253 int i;
254
250 for (i = 0; i < dev->nvqs; ++i) { 255 for (i = 0; i < dev->nvqs; ++i) {
251 kfree(dev->vqs[i].indirect); 256 kfree(dev->vqs[i].indirect);
252 dev->vqs[i].indirect = NULL; 257 dev->vqs[i].indirect = NULL;
@@ -296,26 +301,28 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
296} 301}
297 302
298struct vhost_attach_cgroups_struct { 303struct vhost_attach_cgroups_struct {
299 struct vhost_work work; 304 struct vhost_work work;
300 struct task_struct *owner; 305 struct task_struct *owner;
301 int ret; 306 int ret;
302}; 307};
303 308
304static void vhost_attach_cgroups_work(struct vhost_work *work) 309static void vhost_attach_cgroups_work(struct vhost_work *work)
305{ 310{
306 struct vhost_attach_cgroups_struct *s; 311 struct vhost_attach_cgroups_struct *s;
307 s = container_of(work, struct vhost_attach_cgroups_struct, work); 312
308 s->ret = cgroup_attach_task_all(s->owner, current); 313 s = container_of(work, struct vhost_attach_cgroups_struct, work);
314 s->ret = cgroup_attach_task_all(s->owner, current);
309} 315}
310 316
311static int vhost_attach_cgroups(struct vhost_dev *dev) 317static int vhost_attach_cgroups(struct vhost_dev *dev)
312{ 318{
313 struct vhost_attach_cgroups_struct attach; 319 struct vhost_attach_cgroups_struct attach;
314 attach.owner = current; 320
315 vhost_work_init(&attach.work, vhost_attach_cgroups_work); 321 attach.owner = current;
316 vhost_work_queue(dev, &attach.work); 322 vhost_work_init(&attach.work, vhost_attach_cgroups_work);
317 vhost_work_flush(dev, &attach.work); 323 vhost_work_queue(dev, &attach.work);
318 return attach.ret; 324 vhost_work_flush(dev, &attach.work);
325 return attach.ret;
319} 326}
320 327
321/* Caller should have device mutex */ 328/* Caller should have device mutex */
@@ -323,11 +330,13 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
323{ 330{
324 struct task_struct *worker; 331 struct task_struct *worker;
325 int err; 332 int err;
333
326 /* Is there an owner already? */ 334 /* Is there an owner already? */
327 if (dev->mm) { 335 if (dev->mm) {
328 err = -EBUSY; 336 err = -EBUSY;
329 goto err_mm; 337 goto err_mm;
330 } 338 }
339
331 /* No owner, become one */ 340 /* No owner, become one */
332 dev->mm = get_task_mm(current); 341 dev->mm = get_task_mm(current);
333 worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); 342 worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
@@ -380,6 +389,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
380void vhost_dev_cleanup(struct vhost_dev *dev) 389void vhost_dev_cleanup(struct vhost_dev *dev)
381{ 390{
382 int i; 391 int i;
392
383 for (i = 0; i < dev->nvqs; ++i) { 393 for (i = 0; i < dev->nvqs; ++i) {
384 if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { 394 if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
385 vhost_poll_stop(&dev->vqs[i].poll); 395 vhost_poll_stop(&dev->vqs[i].poll);
@@ -421,6 +431,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
421static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) 431static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
422{ 432{
423 u64 a = addr / VHOST_PAGE_SIZE / 8; 433 u64 a = addr / VHOST_PAGE_SIZE / 8;
434
424 /* Make sure 64 bit math will not overflow. */ 435 /* Make sure 64 bit math will not overflow. */
425 if (a > ULONG_MAX - (unsigned long)log_base || 436 if (a > ULONG_MAX - (unsigned long)log_base ||
426 a + (unsigned long)log_base > ULONG_MAX) 437 a + (unsigned long)log_base > ULONG_MAX)
@@ -461,6 +472,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
461 int log_all) 472 int log_all)
462{ 473{
463 int i; 474 int i;
475
464 for (i = 0; i < d->nvqs; ++i) { 476 for (i = 0; i < d->nvqs; ++i) {
465 int ok; 477 int ok;
466 mutex_lock(&d->vqs[i].mutex); 478 mutex_lock(&d->vqs[i].mutex);
@@ -527,6 +539,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
527{ 539{
528 struct vhost_memory mem, *newmem, *oldmem; 540 struct vhost_memory mem, *newmem, *oldmem;
529 unsigned long size = offsetof(struct vhost_memory, regions); 541 unsigned long size = offsetof(struct vhost_memory, regions);
542
530 if (copy_from_user(&mem, m, size)) 543 if (copy_from_user(&mem, m, size))
531 return -EFAULT; 544 return -EFAULT;
532 if (mem.padding) 545 if (mem.padding)
@@ -544,7 +557,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
544 return -EFAULT; 557 return -EFAULT;
545 } 558 }
546 559
547 if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) { 560 if (!memory_access_ok(d, newmem,
561 vhost_has_feature(d, VHOST_F_LOG_ALL))) {
548 kfree(newmem); 562 kfree(newmem);
549 return -EFAULT; 563 return -EFAULT;
550 } 564 }
@@ -560,6 +574,7 @@ static int init_used(struct vhost_virtqueue *vq,
560 struct vring_used __user *used) 574 struct vring_used __user *used)
561{ 575{
562 int r = put_user(vq->used_flags, &used->flags); 576 int r = put_user(vq->used_flags, &used->flags);
577
563 if (r) 578 if (r)
564 return r; 579 return r;
565 return get_user(vq->last_used_idx, &used->idx); 580 return get_user(vq->last_used_idx, &used->idx);
@@ -849,6 +864,7 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
849{ 864{
850 struct vhost_memory_region *reg; 865 struct vhost_memory_region *reg;
851 int i; 866 int i;
867
852 /* linear search is not brilliant, but we really have on the order of 6 868 /* linear search is not brilliant, but we really have on the order of 6
853 * regions in practice */ 869 * regions in practice */
854 for (i = 0; i < mem->nregions; ++i) { 870 for (i = 0; i < mem->nregions; ++i) {
@@ -871,6 +887,7 @@ static int set_bit_to_user(int nr, void __user *addr)
871 void *base; 887 void *base;
872 int bit = nr + (log % PAGE_SIZE) * 8; 888 int bit = nr + (log % PAGE_SIZE) * 8;
873 int r; 889 int r;
890
874 r = get_user_pages_fast(log, 1, 1, &page); 891 r = get_user_pages_fast(log, 1, 1, &page);
875 if (r < 0) 892 if (r < 0)
876 return r; 893 return r;
@@ -888,6 +905,7 @@ static int log_write(void __user *log_base,
888{ 905{
889 u64 write_page = write_address / VHOST_PAGE_SIZE; 906 u64 write_page = write_address / VHOST_PAGE_SIZE;
890 int r; 907 int r;
908
891 if (!write_length) 909 if (!write_length)
892 return 0; 910 return 0;
893 write_length += write_address % VHOST_PAGE_SIZE; 911 write_length += write_address % VHOST_PAGE_SIZE;
@@ -1037,8 +1055,8 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1037 i, count); 1055 i, count);
1038 return -EINVAL; 1056 return -EINVAL;
1039 } 1057 }
1040 if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect, 1058 if (unlikely(memcpy_fromiovec((unsigned char *)&desc,
1041 sizeof desc))) { 1059 vq->indirect, sizeof desc))) {
1042 vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", 1060 vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
1043 i, (size_t)indirect->addr + i * sizeof desc); 1061 i, (size_t)indirect->addr + i * sizeof desc);
1044 return -EINVAL; 1062 return -EINVAL;
@@ -1153,7 +1171,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1153 i, vq->num, head); 1171 i, vq->num, head);
1154 return -EINVAL; 1172 return -EINVAL;
1155 } 1173 }
1156 ret = copy_from_user(&desc, vq->desc + i, sizeof desc); 1174 ret = __copy_from_user(&desc, vq->desc + i, sizeof desc);
1157 if (unlikely(ret)) { 1175 if (unlikely(ret)) {
1158 vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", 1176 vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
1159 i, vq->desc + i); 1177 i, vq->desc + i);
@@ -1317,6 +1335,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
1317void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) 1335void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1318{ 1336{
1319 __u16 flags; 1337 __u16 flags;
1338
1320 /* Flush out used index updates. This is paired 1339 /* Flush out used index updates. This is paired
1321 * with the barrier that the Guest executes when enabling 1340 * with the barrier that the Guest executes when enabling
1322 * interrupts. */ 1341 * interrupts. */
@@ -1361,6 +1380,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
1361{ 1380{
1362 u16 avail_idx; 1381 u16 avail_idx;
1363 int r; 1382 int r;
1383
1364 if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) 1384 if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
1365 return false; 1385 return false;
1366 vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; 1386 vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
@@ -1387,6 +1407,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
1387void vhost_disable_notify(struct vhost_virtqueue *vq) 1407void vhost_disable_notify(struct vhost_virtqueue *vq)
1388{ 1408{
1389 int r; 1409 int r;
1410
1390 if (vq->used_flags & VRING_USED_F_NO_NOTIFY) 1411 if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
1391 return; 1412 return;
1392 vq->used_flags |= VRING_USED_F_NO_NOTIFY; 1413 vq->used_flags |= VRING_USED_F_NO_NOTIFY;