aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-11 20:08:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-11 20:08:16 -0400
commit3c81bdd9e7ec5c7e28bedf7c7bd3b8911ffee94a (patch)
tree84c162f7a2024fb7e74c3fc795d6dcffb4ae6b65 /drivers/vhost
parent7ec6131b55184084d091953fad9e5c785c5b500b (diff)
parent47283bef7ed356629467d1fac61687756e48f254 (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull vhost infrastructure updates from Michael S. Tsirkin: "This reworks vhost core dropping unnecessary RCU uses in favor of VQ mutexes which are used on fast path anyway. This fixes worst-case latency for users which change the memory mappings a lot. Memory allocation for vhost-net now supports fallback on vmalloc (same as for vhost-scsi) this makes it possible to create the device on systems where memory is very fragmented, with slightly lower performance" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: vhost: move memory pointer to VQs vhost: move acked_features to VQs vhost: replace rcu with mutex vhost-net: extend device allocation to vmalloc
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c35
-rw-r--r--drivers/vhost/scsi.c26
-rw-r--r--drivers/vhost/test.c11
-rw-r--r--drivers/vhost/vhost.c97
-rw-r--r--drivers/vhost/vhost.h19
5 files changed, 101 insertions, 87 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index be414d2b2b22..971a760af4a1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -17,6 +17,7 @@
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/file.h> 18#include <linux/file.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/vmalloc.h>
20 21
21#include <linux/net.h> 22#include <linux/net.h>
22#include <linux/if_packet.h> 23#include <linux/if_packet.h>
@@ -373,7 +374,7 @@ static void handle_tx(struct vhost_net *net)
373 % UIO_MAXIOV == nvq->done_idx)) 374 % UIO_MAXIOV == nvq->done_idx))
374 break; 375 break;
375 376
376 head = vhost_get_vq_desc(&net->dev, vq, vq->iov, 377 head = vhost_get_vq_desc(vq, vq->iov,
377 ARRAY_SIZE(vq->iov), 378 ARRAY_SIZE(vq->iov),
378 &out, &in, 379 &out, &in,
379 NULL, NULL); 380 NULL, NULL);
@@ -505,7 +506,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
505 r = -ENOBUFS; 506 r = -ENOBUFS;
506 goto err; 507 goto err;
507 } 508 }
508 r = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, 509 r = vhost_get_vq_desc(vq, vq->iov + seg,
509 ARRAY_SIZE(vq->iov) - seg, &out, 510 ARRAY_SIZE(vq->iov) - seg, &out,
510 &in, log, log_num); 511 &in, log, log_num);
511 if (unlikely(r < 0)) 512 if (unlikely(r < 0))
@@ -584,9 +585,9 @@ static void handle_rx(struct vhost_net *net)
584 vhost_hlen = nvq->vhost_hlen; 585 vhost_hlen = nvq->vhost_hlen;
585 sock_hlen = nvq->sock_hlen; 586 sock_hlen = nvq->sock_hlen;
586 587
587 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? 588 vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
588 vq->log : NULL; 589 vq->log : NULL;
589 mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF); 590 mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
590 591
591 while ((sock_len = peek_head_len(sock->sk))) { 592 while ((sock_len = peek_head_len(sock->sk))) {
592 sock_len += sock_hlen; 593 sock_len += sock_hlen;
@@ -699,18 +700,30 @@ static void handle_rx_net(struct vhost_work *work)
699 handle_rx(net); 700 handle_rx(net);
700} 701}
701 702
703static void vhost_net_free(void *addr)
704{
705 if (is_vmalloc_addr(addr))
706 vfree(addr);
707 else
708 kfree(addr);
709}
710
702static int vhost_net_open(struct inode *inode, struct file *f) 711static int vhost_net_open(struct inode *inode, struct file *f)
703{ 712{
704 struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); 713 struct vhost_net *n;
705 struct vhost_dev *dev; 714 struct vhost_dev *dev;
706 struct vhost_virtqueue **vqs; 715 struct vhost_virtqueue **vqs;
707 int i; 716 int i;
708 717
709 if (!n) 718 n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
710 return -ENOMEM; 719 if (!n) {
720 n = vmalloc(sizeof *n);
721 if (!n)
722 return -ENOMEM;
723 }
711 vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); 724 vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
712 if (!vqs) { 725 if (!vqs) {
713 kfree(n); 726 vhost_net_free(n);
714 return -ENOMEM; 727 return -ENOMEM;
715 } 728 }
716 729
@@ -827,7 +840,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
827 * since jobs can re-queue themselves. */ 840 * since jobs can re-queue themselves. */
828 vhost_net_flush(n); 841 vhost_net_flush(n);
829 kfree(n->dev.vqs); 842 kfree(n->dev.vqs);
830 kfree(n); 843 vhost_net_free(n);
831 return 0; 844 return 0;
832} 845}
833 846
@@ -1038,15 +1051,13 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
1038 mutex_unlock(&n->dev.mutex); 1051 mutex_unlock(&n->dev.mutex);
1039 return -EFAULT; 1052 return -EFAULT;
1040 } 1053 }
1041 n->dev.acked_features = features;
1042 smp_wmb();
1043 for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { 1054 for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
1044 mutex_lock(&n->vqs[i].vq.mutex); 1055 mutex_lock(&n->vqs[i].vq.mutex);
1056 n->vqs[i].vq.acked_features = features;
1045 n->vqs[i].vhost_hlen = vhost_hlen; 1057 n->vqs[i].vhost_hlen = vhost_hlen;
1046 n->vqs[i].sock_hlen = sock_hlen; 1058 n->vqs[i].sock_hlen = sock_hlen;
1047 mutex_unlock(&n->vqs[i].vq.mutex); 1059 mutex_unlock(&n->vqs[i].vq.mutex);
1048 } 1060 }
1049 vhost_net_flush(n);
1050 mutex_unlock(&n->dev.mutex); 1061 mutex_unlock(&n->dev.mutex);
1051 return 0; 1062 return 0;
1052} 1063}
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index aeb513108448..e9c280f55819 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -606,7 +606,7 @@ tcm_vhost_do_evt_work(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
606 606
607again: 607again:
608 vhost_disable_notify(&vs->dev, vq); 608 vhost_disable_notify(&vs->dev, vq);
609 head = vhost_get_vq_desc(&vs->dev, vq, vq->iov, 609 head = vhost_get_vq_desc(vq, vq->iov,
610 ARRAY_SIZE(vq->iov), &out, &in, 610 ARRAY_SIZE(vq->iov), &out, &in,
611 NULL, NULL); 611 NULL, NULL);
612 if (head < 0) { 612 if (head < 0) {
@@ -945,7 +945,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
945 vhost_disable_notify(&vs->dev, vq); 945 vhost_disable_notify(&vs->dev, vq);
946 946
947 for (;;) { 947 for (;;) {
948 head = vhost_get_vq_desc(&vs->dev, vq, vq->iov, 948 head = vhost_get_vq_desc(vq, vq->iov,
949 ARRAY_SIZE(vq->iov), &out, &in, 949 ARRAY_SIZE(vq->iov), &out, &in,
950 NULL, NULL); 950 NULL, NULL);
951 pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", 951 pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
@@ -1373,6 +1373,9 @@ err_dev:
1373 1373
1374static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) 1374static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
1375{ 1375{
1376 struct vhost_virtqueue *vq;
1377 int i;
1378
1376 if (features & ~VHOST_SCSI_FEATURES) 1379 if (features & ~VHOST_SCSI_FEATURES)
1377 return -EOPNOTSUPP; 1380 return -EOPNOTSUPP;
1378 1381
@@ -1382,9 +1385,13 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
1382 mutex_unlock(&vs->dev.mutex); 1385 mutex_unlock(&vs->dev.mutex);
1383 return -EFAULT; 1386 return -EFAULT;
1384 } 1387 }
1385 vs->dev.acked_features = features; 1388
1386 smp_wmb(); 1389 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1387 vhost_scsi_flush(vs); 1390 vq = &vs->vqs[i].vq;
1391 mutex_lock(&vq->mutex);
1392 vq->acked_features = features;
1393 mutex_unlock(&vq->mutex);
1394 }
1388 mutex_unlock(&vs->dev.mutex); 1395 mutex_unlock(&vs->dev.mutex);
1389 return 0; 1396 return 0;
1390} 1397}
@@ -1591,10 +1598,6 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
1591 return; 1598 return;
1592 1599
1593 mutex_lock(&vs->dev.mutex); 1600 mutex_lock(&vs->dev.mutex);
1594 if (!vhost_has_feature(&vs->dev, VIRTIO_SCSI_F_HOTPLUG)) {
1595 mutex_unlock(&vs->dev.mutex);
1596 return;
1597 }
1598 1601
1599 if (plug) 1602 if (plug)
1600 reason = VIRTIO_SCSI_EVT_RESET_RESCAN; 1603 reason = VIRTIO_SCSI_EVT_RESET_RESCAN;
@@ -1603,8 +1606,9 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
1603 1606
1604 vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; 1607 vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
1605 mutex_lock(&vq->mutex); 1608 mutex_lock(&vq->mutex);
1606 tcm_vhost_send_evt(vs, tpg, lun, 1609 if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG))
1607 VIRTIO_SCSI_T_TRANSPORT_RESET, reason); 1610 tcm_vhost_send_evt(vs, tpg, lun,
1611 VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
1608 mutex_unlock(&vq->mutex); 1612 mutex_unlock(&vq->mutex);
1609 mutex_unlock(&vs->dev.mutex); 1613 mutex_unlock(&vs->dev.mutex);
1610} 1614}
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index c2a54fbf7f99..d9c501eaa6c3 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -53,7 +53,7 @@ static void handle_vq(struct vhost_test *n)
53 vhost_disable_notify(&n->dev, vq); 53 vhost_disable_notify(&n->dev, vq);
54 54
55 for (;;) { 55 for (;;) {
56 head = vhost_get_vq_desc(&n->dev, vq, vq->iov, 56 head = vhost_get_vq_desc(vq, vq->iov,
57 ARRAY_SIZE(vq->iov), 57 ARRAY_SIZE(vq->iov),
58 &out, &in, 58 &out, &in,
59 NULL, NULL); 59 NULL, NULL);
@@ -241,15 +241,18 @@ done:
241 241
242static int vhost_test_set_features(struct vhost_test *n, u64 features) 242static int vhost_test_set_features(struct vhost_test *n, u64 features)
243{ 243{
244 struct vhost_virtqueue *vq;
245
244 mutex_lock(&n->dev.mutex); 246 mutex_lock(&n->dev.mutex);
245 if ((features & (1 << VHOST_F_LOG_ALL)) && 247 if ((features & (1 << VHOST_F_LOG_ALL)) &&
246 !vhost_log_access_ok(&n->dev)) { 248 !vhost_log_access_ok(&n->dev)) {
247 mutex_unlock(&n->dev.mutex); 249 mutex_unlock(&n->dev.mutex);
248 return -EFAULT; 250 return -EFAULT;
249 } 251 }
250 n->dev.acked_features = features; 252 vq = &n->vqs[VHOST_TEST_VQ];
251 smp_wmb(); 253 mutex_lock(&vq->mutex);
252 vhost_test_flush(n); 254 vq->acked_features = features;
255 mutex_unlock(&vq->mutex);
253 mutex_unlock(&n->dev.mutex); 256 mutex_unlock(&n->dev.mutex);
254 return 0; 257 return 0;
255} 258}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 78987e481bc6..c90f4374442a 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -18,7 +18,6 @@
18#include <linux/mmu_context.h> 18#include <linux/mmu_context.h>
19#include <linux/miscdevice.h> 19#include <linux/miscdevice.h>
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21#include <linux/rcupdate.h>
22#include <linux/poll.h> 21#include <linux/poll.h>
23#include <linux/file.h> 22#include <linux/file.h>
24#include <linux/highmem.h> 23#include <linux/highmem.h>
@@ -191,6 +190,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
191 vq->log_used = false; 190 vq->log_used = false;
192 vq->log_addr = -1ull; 191 vq->log_addr = -1ull;
193 vq->private_data = NULL; 192 vq->private_data = NULL;
193 vq->acked_features = 0;
194 vq->log_base = NULL; 194 vq->log_base = NULL;
195 vq->error_ctx = NULL; 195 vq->error_ctx = NULL;
196 vq->error = NULL; 196 vq->error = NULL;
@@ -198,6 +198,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
198 vq->call_ctx = NULL; 198 vq->call_ctx = NULL;
199 vq->call = NULL; 199 vq->call = NULL;
200 vq->log_ctx = NULL; 200 vq->log_ctx = NULL;
201 vq->memory = NULL;
201} 202}
202 203
203static int vhost_worker(void *data) 204static int vhost_worker(void *data)
@@ -415,11 +416,18 @@ EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
415/* Caller should have device mutex */ 416/* Caller should have device mutex */
416void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) 417void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
417{ 418{
419 int i;
420
418 vhost_dev_cleanup(dev, true); 421 vhost_dev_cleanup(dev, true);
419 422
420 /* Restore memory to default empty mapping. */ 423 /* Restore memory to default empty mapping. */
421 memory->nregions = 0; 424 memory->nregions = 0;
422 RCU_INIT_POINTER(dev->memory, memory); 425 dev->memory = memory;
426 /* We don't need VQ locks below since vhost_dev_cleanup makes sure
427 * VQs aren't running.
428 */
429 for (i = 0; i < dev->nvqs; ++i)
430 dev->vqs[i]->memory = memory;
423} 431}
424EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); 432EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
425 433
@@ -462,10 +470,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
462 fput(dev->log_file); 470 fput(dev->log_file);
463 dev->log_file = NULL; 471 dev->log_file = NULL;
464 /* No one will access memory at this point */ 472 /* No one will access memory at this point */
465 kfree(rcu_dereference_protected(dev->memory, 473 kfree(dev->memory);
466 locked == 474 dev->memory = NULL;
467 lockdep_is_held(&dev->mutex)));
468 RCU_INIT_POINTER(dev->memory, NULL);
469 WARN_ON(!list_empty(&dev->work_list)); 475 WARN_ON(!list_empty(&dev->work_list));
470 if (dev->worker) { 476 if (dev->worker) {
471 kthread_stop(dev->worker); 477 kthread_stop(dev->worker);
@@ -524,11 +530,13 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
524 530
525 for (i = 0; i < d->nvqs; ++i) { 531 for (i = 0; i < d->nvqs; ++i) {
526 int ok; 532 int ok;
533 bool log;
534
527 mutex_lock(&d->vqs[i]->mutex); 535 mutex_lock(&d->vqs[i]->mutex);
536 log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
528 /* If ring is inactive, will check when it's enabled. */ 537 /* If ring is inactive, will check when it's enabled. */
529 if (d->vqs[i]->private_data) 538 if (d->vqs[i]->private_data)
530 ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, 539 ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log);
531 log_all);
532 else 540 else
533 ok = 1; 541 ok = 1;
534 mutex_unlock(&d->vqs[i]->mutex); 542 mutex_unlock(&d->vqs[i]->mutex);
@@ -538,12 +546,12 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
538 return 1; 546 return 1;
539} 547}
540 548
541static int vq_access_ok(struct vhost_dev *d, unsigned int num, 549static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
542 struct vring_desc __user *desc, 550 struct vring_desc __user *desc,
543 struct vring_avail __user *avail, 551 struct vring_avail __user *avail,
544 struct vring_used __user *used) 552 struct vring_used __user *used)
545{ 553{
546 size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 554 size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
547 return access_ok(VERIFY_READ, desc, num * sizeof *desc) && 555 return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
548 access_ok(VERIFY_READ, avail, 556 access_ok(VERIFY_READ, avail,
549 sizeof *avail + num * sizeof *avail->ring + s) && 557 sizeof *avail + num * sizeof *avail->ring + s) &&
@@ -555,26 +563,19 @@ static int vq_access_ok(struct vhost_dev *d, unsigned int num,
555/* Caller should have device mutex but not vq mutex */ 563/* Caller should have device mutex but not vq mutex */
556int vhost_log_access_ok(struct vhost_dev *dev) 564int vhost_log_access_ok(struct vhost_dev *dev)
557{ 565{
558 struct vhost_memory *mp; 566 return memory_access_ok(dev, dev->memory, 1);
559
560 mp = rcu_dereference_protected(dev->memory,
561 lockdep_is_held(&dev->mutex));
562 return memory_access_ok(dev, mp, 1);
563} 567}
564EXPORT_SYMBOL_GPL(vhost_log_access_ok); 568EXPORT_SYMBOL_GPL(vhost_log_access_ok);
565 569
566/* Verify access for write logging. */ 570/* Verify access for write logging. */
567/* Caller should have vq mutex and device mutex */ 571/* Caller should have vq mutex and device mutex */
568static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, 572static int vq_log_access_ok(struct vhost_virtqueue *vq,
569 void __user *log_base) 573 void __user *log_base)
570{ 574{
571 struct vhost_memory *mp; 575 size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
572 size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
573 576
574 mp = rcu_dereference_protected(vq->dev->memory, 577 return vq_memory_access_ok(log_base, vq->memory,
575 lockdep_is_held(&vq->mutex)); 578 vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
576 return vq_memory_access_ok(log_base, mp,
577 vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
578 (!vq->log_used || log_access_ok(log_base, vq->log_addr, 579 (!vq->log_used || log_access_ok(log_base, vq->log_addr,
579 sizeof *vq->used + 580 sizeof *vq->used +
580 vq->num * sizeof *vq->used->ring + s)); 581 vq->num * sizeof *vq->used->ring + s));
@@ -584,8 +585,8 @@ static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
584/* Caller should have vq mutex and device mutex */ 585/* Caller should have vq mutex and device mutex */
585int vhost_vq_access_ok(struct vhost_virtqueue *vq) 586int vhost_vq_access_ok(struct vhost_virtqueue *vq)
586{ 587{
587 return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && 588 return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) &&
588 vq_log_access_ok(vq->dev, vq, vq->log_base); 589 vq_log_access_ok(vq, vq->log_base);
589} 590}
590EXPORT_SYMBOL_GPL(vhost_vq_access_ok); 591EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
591 592
@@ -593,6 +594,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
593{ 594{
594 struct vhost_memory mem, *newmem, *oldmem; 595 struct vhost_memory mem, *newmem, *oldmem;
595 unsigned long size = offsetof(struct vhost_memory, regions); 596 unsigned long size = offsetof(struct vhost_memory, regions);
597 int i;
596 598
597 if (copy_from_user(&mem, m, size)) 599 if (copy_from_user(&mem, m, size))
598 return -EFAULT; 600 return -EFAULT;
@@ -611,15 +613,19 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
611 return -EFAULT; 613 return -EFAULT;
612 } 614 }
613 615
614 if (!memory_access_ok(d, newmem, 616 if (!memory_access_ok(d, newmem, 0)) {
615 vhost_has_feature(d, VHOST_F_LOG_ALL))) {
616 kfree(newmem); 617 kfree(newmem);
617 return -EFAULT; 618 return -EFAULT;
618 } 619 }
619 oldmem = rcu_dereference_protected(d->memory, 620 oldmem = d->memory;
620 lockdep_is_held(&d->mutex)); 621 d->memory = newmem;
621 rcu_assign_pointer(d->memory, newmem); 622
622 synchronize_rcu(); 623 /* All memory accesses are done under some VQ mutex. */
624 for (i = 0; i < d->nvqs; ++i) {
625 mutex_lock(&d->vqs[i]->mutex);
626 d->vqs[i]->memory = newmem;
627 mutex_unlock(&d->vqs[i]->mutex);
628 }
623 kfree(oldmem); 629 kfree(oldmem);
624 return 0; 630 return 0;
625} 631}
@@ -718,7 +724,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
718 * If it is not, we don't as size might not have been setup. 724 * If it is not, we don't as size might not have been setup.
719 * We will verify when backend is configured. */ 725 * We will verify when backend is configured. */
720 if (vq->private_data) { 726 if (vq->private_data) {
721 if (!vq_access_ok(d, vq->num, 727 if (!vq_access_ok(vq, vq->num,
722 (void __user *)(unsigned long)a.desc_user_addr, 728 (void __user *)(unsigned long)a.desc_user_addr,
723 (void __user *)(unsigned long)a.avail_user_addr, 729 (void __user *)(unsigned long)a.avail_user_addr,
724 (void __user *)(unsigned long)a.used_user_addr)) { 730 (void __user *)(unsigned long)a.used_user_addr)) {
@@ -858,7 +864,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
858 vq = d->vqs[i]; 864 vq = d->vqs[i];
859 mutex_lock(&vq->mutex); 865 mutex_lock(&vq->mutex);
860 /* If ring is inactive, will check when it's enabled. */ 866 /* If ring is inactive, will check when it's enabled. */
861 if (vq->private_data && !vq_log_access_ok(d, vq, base)) 867 if (vq->private_data && !vq_log_access_ok(vq, base))
862 r = -EFAULT; 868 r = -EFAULT;
863 else 869 else
864 vq->log_base = base; 870 vq->log_base = base;
@@ -1044,7 +1050,7 @@ int vhost_init_used(struct vhost_virtqueue *vq)
1044} 1050}
1045EXPORT_SYMBOL_GPL(vhost_init_used); 1051EXPORT_SYMBOL_GPL(vhost_init_used);
1046 1052
1047static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, 1053static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
1048 struct iovec iov[], int iov_size) 1054 struct iovec iov[], int iov_size)
1049{ 1055{
1050 const struct vhost_memory_region *reg; 1056 const struct vhost_memory_region *reg;
@@ -1053,9 +1059,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
1053 u64 s = 0; 1059 u64 s = 0;
1054 int ret = 0; 1060 int ret = 0;
1055 1061
1056 rcu_read_lock(); 1062 mem = vq->memory;
1057
1058 mem = rcu_dereference(dev->memory);
1059 while ((u64)len > s) { 1063 while ((u64)len > s) {
1060 u64 size; 1064 u64 size;
1061 if (unlikely(ret >= iov_size)) { 1065 if (unlikely(ret >= iov_size)) {
@@ -1077,7 +1081,6 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
1077 ++ret; 1081 ++ret;
1078 } 1082 }
1079 1083
1080 rcu_read_unlock();
1081 return ret; 1084 return ret;
1082} 1085}
1083 1086
@@ -1102,7 +1105,7 @@ static unsigned next_desc(struct vring_desc *desc)
1102 return next; 1105 return next;
1103} 1106}
1104 1107
1105static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, 1108static int get_indirect(struct vhost_virtqueue *vq,
1106 struct iovec iov[], unsigned int iov_size, 1109 struct iovec iov[], unsigned int iov_size,
1107 unsigned int *out_num, unsigned int *in_num, 1110 unsigned int *out_num, unsigned int *in_num,
1108 struct vhost_log *log, unsigned int *log_num, 1111 struct vhost_log *log, unsigned int *log_num,
@@ -1121,7 +1124,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1121 return -EINVAL; 1124 return -EINVAL;
1122 } 1125 }
1123 1126
1124 ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, 1127 ret = translate_desc(vq, indirect->addr, indirect->len, vq->indirect,
1125 UIO_MAXIOV); 1128 UIO_MAXIOV);
1126 if (unlikely(ret < 0)) { 1129 if (unlikely(ret < 0)) {
1127 vq_err(vq, "Translation failure %d in indirect.\n", ret); 1130 vq_err(vq, "Translation failure %d in indirect.\n", ret);
@@ -1161,7 +1164,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1161 return -EINVAL; 1164 return -EINVAL;
1162 } 1165 }
1163 1166
1164 ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, 1167 ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count,
1165 iov_size - iov_count); 1168 iov_size - iov_count);
1166 if (unlikely(ret < 0)) { 1169 if (unlikely(ret < 0)) {
1167 vq_err(vq, "Translation failure %d indirect idx %d\n", 1170 vq_err(vq, "Translation failure %d indirect idx %d\n",
@@ -1198,7 +1201,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1198 * This function returns the descriptor number found, or vq->num (which is 1201 * This function returns the descriptor number found, or vq->num (which is
1199 * never a valid descriptor number) if none was found. A negative code is 1202 * never a valid descriptor number) if none was found. A negative code is
1200 * returned on error. */ 1203 * returned on error. */
1201int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, 1204int vhost_get_vq_desc(struct vhost_virtqueue *vq,
1202 struct iovec iov[], unsigned int iov_size, 1205 struct iovec iov[], unsigned int iov_size,
1203 unsigned int *out_num, unsigned int *in_num, 1206 unsigned int *out_num, unsigned int *in_num,
1204 struct vhost_log *log, unsigned int *log_num) 1207 struct vhost_log *log, unsigned int *log_num)
@@ -1272,7 +1275,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1272 return -EFAULT; 1275 return -EFAULT;
1273 } 1276 }
1274 if (desc.flags & VRING_DESC_F_INDIRECT) { 1277 if (desc.flags & VRING_DESC_F_INDIRECT) {
1275 ret = get_indirect(dev, vq, iov, iov_size, 1278 ret = get_indirect(vq, iov, iov_size,
1276 out_num, in_num, 1279 out_num, in_num,
1277 log, log_num, &desc); 1280 log, log_num, &desc);
1278 if (unlikely(ret < 0)) { 1281 if (unlikely(ret < 0)) {
@@ -1283,7 +1286,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1283 continue; 1286 continue;
1284 } 1287 }
1285 1288
1286 ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, 1289 ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count,
1287 iov_size - iov_count); 1290 iov_size - iov_count);
1288 if (unlikely(ret < 0)) { 1291 if (unlikely(ret < 0)) {
1289 vq_err(vq, "Translation failure %d descriptor idx %d\n", 1292 vq_err(vq, "Translation failure %d descriptor idx %d\n",
@@ -1426,11 +1429,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1426 * interrupts. */ 1429 * interrupts. */
1427 smp_mb(); 1430 smp_mb();
1428 1431
1429 if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && 1432 if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1430 unlikely(vq->avail_idx == vq->last_avail_idx)) 1433 unlikely(vq->avail_idx == vq->last_avail_idx))
1431 return true; 1434 return true;
1432 1435
1433 if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { 1436 if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
1434 __u16 flags; 1437 __u16 flags;
1435 if (__get_user(flags, &vq->avail->flags)) { 1438 if (__get_user(flags, &vq->avail->flags)) {
1436 vq_err(vq, "Failed to get flags"); 1439 vq_err(vq, "Failed to get flags");
@@ -1491,7 +1494,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1491 if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) 1494 if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
1492 return false; 1495 return false;
1493 vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; 1496 vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
1494 if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { 1497 if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
1495 r = vhost_update_used_flags(vq); 1498 r = vhost_update_used_flags(vq);
1496 if (r) { 1499 if (r) {
1497 vq_err(vq, "Failed to enable notification at %p: %d\n", 1500 vq_err(vq, "Failed to enable notification at %p: %d\n",
@@ -1528,7 +1531,7 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1528 if (vq->used_flags & VRING_USED_F_NO_NOTIFY) 1531 if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
1529 return; 1532 return;
1530 vq->used_flags |= VRING_USED_F_NO_NOTIFY; 1533 vq->used_flags |= VRING_USED_F_NO_NOTIFY;
1531 if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { 1534 if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
1532 r = vhost_update_used_flags(vq); 1535 r = vhost_update_used_flags(vq);
1533 if (r) 1536 if (r)
1534 vq_err(vq, "Failed to enable notification at %p: %d\n", 1537 vq_err(vq, "Failed to enable notification at %p: %d\n",
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 35eeb2a1bada..3eda654b8f5a 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -104,20 +104,18 @@ struct vhost_virtqueue {
104 struct iovec *indirect; 104 struct iovec *indirect;
105 struct vring_used_elem *heads; 105 struct vring_used_elem *heads;
106 /* Protected by virtqueue mutex. */ 106 /* Protected by virtqueue mutex. */
107 struct vhost_memory *memory;
107 void *private_data; 108 void *private_data;
109 unsigned acked_features;
108 /* Log write descriptors */ 110 /* Log write descriptors */
109 void __user *log_base; 111 void __user *log_base;
110 struct vhost_log *log; 112 struct vhost_log *log;
111}; 113};
112 114
113struct vhost_dev { 115struct vhost_dev {
114 /* Readers use RCU to access memory table pointer 116 struct vhost_memory *memory;
115 * log base pointer and features.
116 * Writers use mutex below.*/
117 struct vhost_memory __rcu *memory;
118 struct mm_struct *mm; 117 struct mm_struct *mm;
119 struct mutex mutex; 118 struct mutex mutex;
120 unsigned acked_features;
121 struct vhost_virtqueue **vqs; 119 struct vhost_virtqueue **vqs;
122 int nvqs; 120 int nvqs;
123 struct file *log_file; 121 struct file *log_file;
@@ -140,7 +138,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp);
140int vhost_vq_access_ok(struct vhost_virtqueue *vq); 138int vhost_vq_access_ok(struct vhost_virtqueue *vq);
141int vhost_log_access_ok(struct vhost_dev *); 139int vhost_log_access_ok(struct vhost_dev *);
142 140
143int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, 141int vhost_get_vq_desc(struct vhost_virtqueue *,
144 struct iovec iov[], unsigned int iov_count, 142 struct iovec iov[], unsigned int iov_count,
145 unsigned int *out_num, unsigned int *in_num, 143 unsigned int *out_num, unsigned int *in_num,
146 struct vhost_log *log, unsigned int *log_num); 144 struct vhost_log *log, unsigned int *log_num);
@@ -174,13 +172,8 @@ enum {
174 (1ULL << VHOST_F_LOG_ALL), 172 (1ULL << VHOST_F_LOG_ALL),
175}; 173};
176 174
177static inline int vhost_has_feature(struct vhost_dev *dev, int bit) 175static inline int vhost_has_feature(struct vhost_virtqueue *vq, int bit)
178{ 176{
179 unsigned acked_features; 177 return vq->acked_features & (1 << bit);
180
181 /* TODO: check that we are running from vhost_worker or dev mutex is
182 * held? */
183 acked_features = rcu_dereference_index_check(dev->acked_features, 1);
184 return acked_features & (1 << bit);
185} 178}
186#endif 179#endif