diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-11 20:08:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-11 20:08:16 -0400 |
commit | 3c81bdd9e7ec5c7e28bedf7c7bd3b8911ffee94a (patch) | |
tree | 84c162f7a2024fb7e74c3fc795d6dcffb4ae6b65 /drivers/vhost | |
parent | 7ec6131b55184084d091953fad9e5c785c5b500b (diff) | |
parent | 47283bef7ed356629467d1fac61687756e48f254 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull vhost infrastructure updates from Michael S. Tsirkin:
"This reworks vhost core dropping unnecessary RCU uses in favor of VQ
mutexes which are used on fast path anyway. This fixes worst-case
latency for users which change the memory mappings a lot. Memory
allocation for vhost-net now supports fallback on vmalloc (same as for
vhost-scsi) this makes it possible to create the device on systems
where memory is very fragmented, with slightly lower performance"
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
vhost: move memory pointer to VQs
vhost: move acked_features to VQs
vhost: replace rcu with mutex
vhost-net: extend device allocation to vmalloc
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/net.c | 35 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 26 | ||||
-rw-r--r-- | drivers/vhost/test.c | 11 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 97 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 19 |
5 files changed, 101 insertions, 87 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index be414d2b2b22..971a760af4a1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/workqueue.h> | 17 | #include <linux/workqueue.h> |
18 | #include <linux/file.h> | 18 | #include <linux/file.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/vmalloc.h> | ||
20 | 21 | ||
21 | #include <linux/net.h> | 22 | #include <linux/net.h> |
22 | #include <linux/if_packet.h> | 23 | #include <linux/if_packet.h> |
@@ -373,7 +374,7 @@ static void handle_tx(struct vhost_net *net) | |||
373 | % UIO_MAXIOV == nvq->done_idx)) | 374 | % UIO_MAXIOV == nvq->done_idx)) |
374 | break; | 375 | break; |
375 | 376 | ||
376 | head = vhost_get_vq_desc(&net->dev, vq, vq->iov, | 377 | head = vhost_get_vq_desc(vq, vq->iov, |
377 | ARRAY_SIZE(vq->iov), | 378 | ARRAY_SIZE(vq->iov), |
378 | &out, &in, | 379 | &out, &in, |
379 | NULL, NULL); | 380 | NULL, NULL); |
@@ -505,7 +506,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, | |||
505 | r = -ENOBUFS; | 506 | r = -ENOBUFS; |
506 | goto err; | 507 | goto err; |
507 | } | 508 | } |
508 | r = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, | 509 | r = vhost_get_vq_desc(vq, vq->iov + seg, |
509 | ARRAY_SIZE(vq->iov) - seg, &out, | 510 | ARRAY_SIZE(vq->iov) - seg, &out, |
510 | &in, log, log_num); | 511 | &in, log, log_num); |
511 | if (unlikely(r < 0)) | 512 | if (unlikely(r < 0)) |
@@ -584,9 +585,9 @@ static void handle_rx(struct vhost_net *net) | |||
584 | vhost_hlen = nvq->vhost_hlen; | 585 | vhost_hlen = nvq->vhost_hlen; |
585 | sock_hlen = nvq->sock_hlen; | 586 | sock_hlen = nvq->sock_hlen; |
586 | 587 | ||
587 | vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? | 588 | vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? |
588 | vq->log : NULL; | 589 | vq->log : NULL; |
589 | mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF); | 590 | mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); |
590 | 591 | ||
591 | while ((sock_len = peek_head_len(sock->sk))) { | 592 | while ((sock_len = peek_head_len(sock->sk))) { |
592 | sock_len += sock_hlen; | 593 | sock_len += sock_hlen; |
@@ -699,18 +700,30 @@ static void handle_rx_net(struct vhost_work *work) | |||
699 | handle_rx(net); | 700 | handle_rx(net); |
700 | } | 701 | } |
701 | 702 | ||
703 | static void vhost_net_free(void *addr) | ||
704 | { | ||
705 | if (is_vmalloc_addr(addr)) | ||
706 | vfree(addr); | ||
707 | else | ||
708 | kfree(addr); | ||
709 | } | ||
710 | |||
702 | static int vhost_net_open(struct inode *inode, struct file *f) | 711 | static int vhost_net_open(struct inode *inode, struct file *f) |
703 | { | 712 | { |
704 | struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); | 713 | struct vhost_net *n; |
705 | struct vhost_dev *dev; | 714 | struct vhost_dev *dev; |
706 | struct vhost_virtqueue **vqs; | 715 | struct vhost_virtqueue **vqs; |
707 | int i; | 716 | int i; |
708 | 717 | ||
709 | if (!n) | 718 | n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); |
710 | return -ENOMEM; | 719 | if (!n) { |
720 | n = vmalloc(sizeof *n); | ||
721 | if (!n) | ||
722 | return -ENOMEM; | ||
723 | } | ||
711 | vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); | 724 | vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); |
712 | if (!vqs) { | 725 | if (!vqs) { |
713 | kfree(n); | 726 | vhost_net_free(n); |
714 | return -ENOMEM; | 727 | return -ENOMEM; |
715 | } | 728 | } |
716 | 729 | ||
@@ -827,7 +840,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) | |||
827 | * since jobs can re-queue themselves. */ | 840 | * since jobs can re-queue themselves. */ |
828 | vhost_net_flush(n); | 841 | vhost_net_flush(n); |
829 | kfree(n->dev.vqs); | 842 | kfree(n->dev.vqs); |
830 | kfree(n); | 843 | vhost_net_free(n); |
831 | return 0; | 844 | return 0; |
832 | } | 845 | } |
833 | 846 | ||
@@ -1038,15 +1051,13 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) | |||
1038 | mutex_unlock(&n->dev.mutex); | 1051 | mutex_unlock(&n->dev.mutex); |
1039 | return -EFAULT; | 1052 | return -EFAULT; |
1040 | } | 1053 | } |
1041 | n->dev.acked_features = features; | ||
1042 | smp_wmb(); | ||
1043 | for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { | 1054 | for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { |
1044 | mutex_lock(&n->vqs[i].vq.mutex); | 1055 | mutex_lock(&n->vqs[i].vq.mutex); |
1056 | n->vqs[i].vq.acked_features = features; | ||
1045 | n->vqs[i].vhost_hlen = vhost_hlen; | 1057 | n->vqs[i].vhost_hlen = vhost_hlen; |
1046 | n->vqs[i].sock_hlen = sock_hlen; | 1058 | n->vqs[i].sock_hlen = sock_hlen; |
1047 | mutex_unlock(&n->vqs[i].vq.mutex); | 1059 | mutex_unlock(&n->vqs[i].vq.mutex); |
1048 | } | 1060 | } |
1049 | vhost_net_flush(n); | ||
1050 | mutex_unlock(&n->dev.mutex); | 1061 | mutex_unlock(&n->dev.mutex); |
1051 | return 0; | 1062 | return 0; |
1052 | } | 1063 | } |
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index aeb513108448..e9c280f55819 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c | |||
@@ -606,7 +606,7 @@ tcm_vhost_do_evt_work(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) | |||
606 | 606 | ||
607 | again: | 607 | again: |
608 | vhost_disable_notify(&vs->dev, vq); | 608 | vhost_disable_notify(&vs->dev, vq); |
609 | head = vhost_get_vq_desc(&vs->dev, vq, vq->iov, | 609 | head = vhost_get_vq_desc(vq, vq->iov, |
610 | ARRAY_SIZE(vq->iov), &out, &in, | 610 | ARRAY_SIZE(vq->iov), &out, &in, |
611 | NULL, NULL); | 611 | NULL, NULL); |
612 | if (head < 0) { | 612 | if (head < 0) { |
@@ -945,7 +945,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | |||
945 | vhost_disable_notify(&vs->dev, vq); | 945 | vhost_disable_notify(&vs->dev, vq); |
946 | 946 | ||
947 | for (;;) { | 947 | for (;;) { |
948 | head = vhost_get_vq_desc(&vs->dev, vq, vq->iov, | 948 | head = vhost_get_vq_desc(vq, vq->iov, |
949 | ARRAY_SIZE(vq->iov), &out, &in, | 949 | ARRAY_SIZE(vq->iov), &out, &in, |
950 | NULL, NULL); | 950 | NULL, NULL); |
951 | pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", | 951 | pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", |
@@ -1373,6 +1373,9 @@ err_dev: | |||
1373 | 1373 | ||
1374 | static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) | 1374 | static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) |
1375 | { | 1375 | { |
1376 | struct vhost_virtqueue *vq; | ||
1377 | int i; | ||
1378 | |||
1376 | if (features & ~VHOST_SCSI_FEATURES) | 1379 | if (features & ~VHOST_SCSI_FEATURES) |
1377 | return -EOPNOTSUPP; | 1380 | return -EOPNOTSUPP; |
1378 | 1381 | ||
@@ -1382,9 +1385,13 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) | |||
1382 | mutex_unlock(&vs->dev.mutex); | 1385 | mutex_unlock(&vs->dev.mutex); |
1383 | return -EFAULT; | 1386 | return -EFAULT; |
1384 | } | 1387 | } |
1385 | vs->dev.acked_features = features; | 1388 | |
1386 | smp_wmb(); | 1389 | for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { |
1387 | vhost_scsi_flush(vs); | 1390 | vq = &vs->vqs[i].vq; |
1391 | mutex_lock(&vq->mutex); | ||
1392 | vq->acked_features = features; | ||
1393 | mutex_unlock(&vq->mutex); | ||
1394 | } | ||
1388 | mutex_unlock(&vs->dev.mutex); | 1395 | mutex_unlock(&vs->dev.mutex); |
1389 | return 0; | 1396 | return 0; |
1390 | } | 1397 | } |
@@ -1591,10 +1598,6 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, | |||
1591 | return; | 1598 | return; |
1592 | 1599 | ||
1593 | mutex_lock(&vs->dev.mutex); | 1600 | mutex_lock(&vs->dev.mutex); |
1594 | if (!vhost_has_feature(&vs->dev, VIRTIO_SCSI_F_HOTPLUG)) { | ||
1595 | mutex_unlock(&vs->dev.mutex); | ||
1596 | return; | ||
1597 | } | ||
1598 | 1601 | ||
1599 | if (plug) | 1602 | if (plug) |
1600 | reason = VIRTIO_SCSI_EVT_RESET_RESCAN; | 1603 | reason = VIRTIO_SCSI_EVT_RESET_RESCAN; |
@@ -1603,8 +1606,9 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, | |||
1603 | 1606 | ||
1604 | vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; | 1607 | vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; |
1605 | mutex_lock(&vq->mutex); | 1608 | mutex_lock(&vq->mutex); |
1606 | tcm_vhost_send_evt(vs, tpg, lun, | 1609 | if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG)) |
1607 | VIRTIO_SCSI_T_TRANSPORT_RESET, reason); | 1610 | tcm_vhost_send_evt(vs, tpg, lun, |
1611 | VIRTIO_SCSI_T_TRANSPORT_RESET, reason); | ||
1608 | mutex_unlock(&vq->mutex); | 1612 | mutex_unlock(&vq->mutex); |
1609 | mutex_unlock(&vs->dev.mutex); | 1613 | mutex_unlock(&vs->dev.mutex); |
1610 | } | 1614 | } |
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index c2a54fbf7f99..d9c501eaa6c3 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c | |||
@@ -53,7 +53,7 @@ static void handle_vq(struct vhost_test *n) | |||
53 | vhost_disable_notify(&n->dev, vq); | 53 | vhost_disable_notify(&n->dev, vq); |
54 | 54 | ||
55 | for (;;) { | 55 | for (;;) { |
56 | head = vhost_get_vq_desc(&n->dev, vq, vq->iov, | 56 | head = vhost_get_vq_desc(vq, vq->iov, |
57 | ARRAY_SIZE(vq->iov), | 57 | ARRAY_SIZE(vq->iov), |
58 | &out, &in, | 58 | &out, &in, |
59 | NULL, NULL); | 59 | NULL, NULL); |
@@ -241,15 +241,18 @@ done: | |||
241 | 241 | ||
242 | static int vhost_test_set_features(struct vhost_test *n, u64 features) | 242 | static int vhost_test_set_features(struct vhost_test *n, u64 features) |
243 | { | 243 | { |
244 | struct vhost_virtqueue *vq; | ||
245 | |||
244 | mutex_lock(&n->dev.mutex); | 246 | mutex_lock(&n->dev.mutex); |
245 | if ((features & (1 << VHOST_F_LOG_ALL)) && | 247 | if ((features & (1 << VHOST_F_LOG_ALL)) && |
246 | !vhost_log_access_ok(&n->dev)) { | 248 | !vhost_log_access_ok(&n->dev)) { |
247 | mutex_unlock(&n->dev.mutex); | 249 | mutex_unlock(&n->dev.mutex); |
248 | return -EFAULT; | 250 | return -EFAULT; |
249 | } | 251 | } |
250 | n->dev.acked_features = features; | 252 | vq = &n->vqs[VHOST_TEST_VQ]; |
251 | smp_wmb(); | 253 | mutex_lock(&vq->mutex); |
252 | vhost_test_flush(n); | 254 | vq->acked_features = features; |
255 | mutex_unlock(&vq->mutex); | ||
253 | mutex_unlock(&n->dev.mutex); | 256 | mutex_unlock(&n->dev.mutex); |
254 | return 0; | 257 | return 0; |
255 | } | 258 | } |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 78987e481bc6..c90f4374442a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/mmu_context.h> | 18 | #include <linux/mmu_context.h> |
19 | #include <linux/miscdevice.h> | 19 | #include <linux/miscdevice.h> |
20 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
21 | #include <linux/rcupdate.h> | ||
22 | #include <linux/poll.h> | 21 | #include <linux/poll.h> |
23 | #include <linux/file.h> | 22 | #include <linux/file.h> |
24 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
@@ -191,6 +190,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
191 | vq->log_used = false; | 190 | vq->log_used = false; |
192 | vq->log_addr = -1ull; | 191 | vq->log_addr = -1ull; |
193 | vq->private_data = NULL; | 192 | vq->private_data = NULL; |
193 | vq->acked_features = 0; | ||
194 | vq->log_base = NULL; | 194 | vq->log_base = NULL; |
195 | vq->error_ctx = NULL; | 195 | vq->error_ctx = NULL; |
196 | vq->error = NULL; | 196 | vq->error = NULL; |
@@ -198,6 +198,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
198 | vq->call_ctx = NULL; | 198 | vq->call_ctx = NULL; |
199 | vq->call = NULL; | 199 | vq->call = NULL; |
200 | vq->log_ctx = NULL; | 200 | vq->log_ctx = NULL; |
201 | vq->memory = NULL; | ||
201 | } | 202 | } |
202 | 203 | ||
203 | static int vhost_worker(void *data) | 204 | static int vhost_worker(void *data) |
@@ -415,11 +416,18 @@ EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); | |||
415 | /* Caller should have device mutex */ | 416 | /* Caller should have device mutex */ |
416 | void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) | 417 | void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) |
417 | { | 418 | { |
419 | int i; | ||
420 | |||
418 | vhost_dev_cleanup(dev, true); | 421 | vhost_dev_cleanup(dev, true); |
419 | 422 | ||
420 | /* Restore memory to default empty mapping. */ | 423 | /* Restore memory to default empty mapping. */ |
421 | memory->nregions = 0; | 424 | memory->nregions = 0; |
422 | RCU_INIT_POINTER(dev->memory, memory); | 425 | dev->memory = memory; |
426 | /* We don't need VQ locks below since vhost_dev_cleanup makes sure | ||
427 | * VQs aren't running. | ||
428 | */ | ||
429 | for (i = 0; i < dev->nvqs; ++i) | ||
430 | dev->vqs[i]->memory = memory; | ||
423 | } | 431 | } |
424 | EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); | 432 | EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); |
425 | 433 | ||
@@ -462,10 +470,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) | |||
462 | fput(dev->log_file); | 470 | fput(dev->log_file); |
463 | dev->log_file = NULL; | 471 | dev->log_file = NULL; |
464 | /* No one will access memory at this point */ | 472 | /* No one will access memory at this point */ |
465 | kfree(rcu_dereference_protected(dev->memory, | 473 | kfree(dev->memory); |
466 | locked == | 474 | dev->memory = NULL; |
467 | lockdep_is_held(&dev->mutex))); | ||
468 | RCU_INIT_POINTER(dev->memory, NULL); | ||
469 | WARN_ON(!list_empty(&dev->work_list)); | 475 | WARN_ON(!list_empty(&dev->work_list)); |
470 | if (dev->worker) { | 476 | if (dev->worker) { |
471 | kthread_stop(dev->worker); | 477 | kthread_stop(dev->worker); |
@@ -524,11 +530,13 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, | |||
524 | 530 | ||
525 | for (i = 0; i < d->nvqs; ++i) { | 531 | for (i = 0; i < d->nvqs; ++i) { |
526 | int ok; | 532 | int ok; |
533 | bool log; | ||
534 | |||
527 | mutex_lock(&d->vqs[i]->mutex); | 535 | mutex_lock(&d->vqs[i]->mutex); |
536 | log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); | ||
528 | /* If ring is inactive, will check when it's enabled. */ | 537 | /* If ring is inactive, will check when it's enabled. */ |
529 | if (d->vqs[i]->private_data) | 538 | if (d->vqs[i]->private_data) |
530 | ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, | 539 | ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log); |
531 | log_all); | ||
532 | else | 540 | else |
533 | ok = 1; | 541 | ok = 1; |
534 | mutex_unlock(&d->vqs[i]->mutex); | 542 | mutex_unlock(&d->vqs[i]->mutex); |
@@ -538,12 +546,12 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, | |||
538 | return 1; | 546 | return 1; |
539 | } | 547 | } |
540 | 548 | ||
541 | static int vq_access_ok(struct vhost_dev *d, unsigned int num, | 549 | static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, |
542 | struct vring_desc __user *desc, | 550 | struct vring_desc __user *desc, |
543 | struct vring_avail __user *avail, | 551 | struct vring_avail __user *avail, |
544 | struct vring_used __user *used) | 552 | struct vring_used __user *used) |
545 | { | 553 | { |
546 | size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | 554 | size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
547 | return access_ok(VERIFY_READ, desc, num * sizeof *desc) && | 555 | return access_ok(VERIFY_READ, desc, num * sizeof *desc) && |
548 | access_ok(VERIFY_READ, avail, | 556 | access_ok(VERIFY_READ, avail, |
549 | sizeof *avail + num * sizeof *avail->ring + s) && | 557 | sizeof *avail + num * sizeof *avail->ring + s) && |
@@ -555,26 +563,19 @@ static int vq_access_ok(struct vhost_dev *d, unsigned int num, | |||
555 | /* Caller should have device mutex but not vq mutex */ | 563 | /* Caller should have device mutex but not vq mutex */ |
556 | int vhost_log_access_ok(struct vhost_dev *dev) | 564 | int vhost_log_access_ok(struct vhost_dev *dev) |
557 | { | 565 | { |
558 | struct vhost_memory *mp; | 566 | return memory_access_ok(dev, dev->memory, 1); |
559 | |||
560 | mp = rcu_dereference_protected(dev->memory, | ||
561 | lockdep_is_held(&dev->mutex)); | ||
562 | return memory_access_ok(dev, mp, 1); | ||
563 | } | 567 | } |
564 | EXPORT_SYMBOL_GPL(vhost_log_access_ok); | 568 | EXPORT_SYMBOL_GPL(vhost_log_access_ok); |
565 | 569 | ||
566 | /* Verify access for write logging. */ | 570 | /* Verify access for write logging. */ |
567 | /* Caller should have vq mutex and device mutex */ | 571 | /* Caller should have vq mutex and device mutex */ |
568 | static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, | 572 | static int vq_log_access_ok(struct vhost_virtqueue *vq, |
569 | void __user *log_base) | 573 | void __user *log_base) |
570 | { | 574 | { |
571 | struct vhost_memory *mp; | 575 | size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
572 | size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | ||
573 | 576 | ||
574 | mp = rcu_dereference_protected(vq->dev->memory, | 577 | return vq_memory_access_ok(log_base, vq->memory, |
575 | lockdep_is_held(&vq->mutex)); | 578 | vhost_has_feature(vq, VHOST_F_LOG_ALL)) && |
576 | return vq_memory_access_ok(log_base, mp, | ||
577 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && | ||
578 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, | 579 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
579 | sizeof *vq->used + | 580 | sizeof *vq->used + |
580 | vq->num * sizeof *vq->used->ring + s)); | 581 | vq->num * sizeof *vq->used->ring + s)); |
@@ -584,8 +585,8 @@ static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, | |||
584 | /* Caller should have vq mutex and device mutex */ | 585 | /* Caller should have vq mutex and device mutex */ |
585 | int vhost_vq_access_ok(struct vhost_virtqueue *vq) | 586 | int vhost_vq_access_ok(struct vhost_virtqueue *vq) |
586 | { | 587 | { |
587 | return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && | 588 | return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) && |
588 | vq_log_access_ok(vq->dev, vq, vq->log_base); | 589 | vq_log_access_ok(vq, vq->log_base); |
589 | } | 590 | } |
590 | EXPORT_SYMBOL_GPL(vhost_vq_access_ok); | 591 | EXPORT_SYMBOL_GPL(vhost_vq_access_ok); |
591 | 592 | ||
@@ -593,6 +594,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | |||
593 | { | 594 | { |
594 | struct vhost_memory mem, *newmem, *oldmem; | 595 | struct vhost_memory mem, *newmem, *oldmem; |
595 | unsigned long size = offsetof(struct vhost_memory, regions); | 596 | unsigned long size = offsetof(struct vhost_memory, regions); |
597 | int i; | ||
596 | 598 | ||
597 | if (copy_from_user(&mem, m, size)) | 599 | if (copy_from_user(&mem, m, size)) |
598 | return -EFAULT; | 600 | return -EFAULT; |
@@ -611,15 +613,19 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | |||
611 | return -EFAULT; | 613 | return -EFAULT; |
612 | } | 614 | } |
613 | 615 | ||
614 | if (!memory_access_ok(d, newmem, | 616 | if (!memory_access_ok(d, newmem, 0)) { |
615 | vhost_has_feature(d, VHOST_F_LOG_ALL))) { | ||
616 | kfree(newmem); | 617 | kfree(newmem); |
617 | return -EFAULT; | 618 | return -EFAULT; |
618 | } | 619 | } |
619 | oldmem = rcu_dereference_protected(d->memory, | 620 | oldmem = d->memory; |
620 | lockdep_is_held(&d->mutex)); | 621 | d->memory = newmem; |
621 | rcu_assign_pointer(d->memory, newmem); | 622 | |
622 | synchronize_rcu(); | 623 | /* All memory accesses are done under some VQ mutex. */ |
624 | for (i = 0; i < d->nvqs; ++i) { | ||
625 | mutex_lock(&d->vqs[i]->mutex); | ||
626 | d->vqs[i]->memory = newmem; | ||
627 | mutex_unlock(&d->vqs[i]->mutex); | ||
628 | } | ||
623 | kfree(oldmem); | 629 | kfree(oldmem); |
624 | return 0; | 630 | return 0; |
625 | } | 631 | } |
@@ -718,7 +724,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) | |||
718 | * If it is not, we don't as size might not have been setup. | 724 | * If it is not, we don't as size might not have been setup. |
719 | * We will verify when backend is configured. */ | 725 | * We will verify when backend is configured. */ |
720 | if (vq->private_data) { | 726 | if (vq->private_data) { |
721 | if (!vq_access_ok(d, vq->num, | 727 | if (!vq_access_ok(vq, vq->num, |
722 | (void __user *)(unsigned long)a.desc_user_addr, | 728 | (void __user *)(unsigned long)a.desc_user_addr, |
723 | (void __user *)(unsigned long)a.avail_user_addr, | 729 | (void __user *)(unsigned long)a.avail_user_addr, |
724 | (void __user *)(unsigned long)a.used_user_addr)) { | 730 | (void __user *)(unsigned long)a.used_user_addr)) { |
@@ -858,7 +864,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) | |||
858 | vq = d->vqs[i]; | 864 | vq = d->vqs[i]; |
859 | mutex_lock(&vq->mutex); | 865 | mutex_lock(&vq->mutex); |
860 | /* If ring is inactive, will check when it's enabled. */ | 866 | /* If ring is inactive, will check when it's enabled. */ |
861 | if (vq->private_data && !vq_log_access_ok(d, vq, base)) | 867 | if (vq->private_data && !vq_log_access_ok(vq, base)) |
862 | r = -EFAULT; | 868 | r = -EFAULT; |
863 | else | 869 | else |
864 | vq->log_base = base; | 870 | vq->log_base = base; |
@@ -1044,7 +1050,7 @@ int vhost_init_used(struct vhost_virtqueue *vq) | |||
1044 | } | 1050 | } |
1045 | EXPORT_SYMBOL_GPL(vhost_init_used); | 1051 | EXPORT_SYMBOL_GPL(vhost_init_used); |
1046 | 1052 | ||
1047 | static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, | 1053 | static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, |
1048 | struct iovec iov[], int iov_size) | 1054 | struct iovec iov[], int iov_size) |
1049 | { | 1055 | { |
1050 | const struct vhost_memory_region *reg; | 1056 | const struct vhost_memory_region *reg; |
@@ -1053,9 +1059,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, | |||
1053 | u64 s = 0; | 1059 | u64 s = 0; |
1054 | int ret = 0; | 1060 | int ret = 0; |
1055 | 1061 | ||
1056 | rcu_read_lock(); | 1062 | mem = vq->memory; |
1057 | |||
1058 | mem = rcu_dereference(dev->memory); | ||
1059 | while ((u64)len > s) { | 1063 | while ((u64)len > s) { |
1060 | u64 size; | 1064 | u64 size; |
1061 | if (unlikely(ret >= iov_size)) { | 1065 | if (unlikely(ret >= iov_size)) { |
@@ -1077,7 +1081,6 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, | |||
1077 | ++ret; | 1081 | ++ret; |
1078 | } | 1082 | } |
1079 | 1083 | ||
1080 | rcu_read_unlock(); | ||
1081 | return ret; | 1084 | return ret; |
1082 | } | 1085 | } |
1083 | 1086 | ||
@@ -1102,7 +1105,7 @@ static unsigned next_desc(struct vring_desc *desc) | |||
1102 | return next; | 1105 | return next; |
1103 | } | 1106 | } |
1104 | 1107 | ||
1105 | static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, | 1108 | static int get_indirect(struct vhost_virtqueue *vq, |
1106 | struct iovec iov[], unsigned int iov_size, | 1109 | struct iovec iov[], unsigned int iov_size, |
1107 | unsigned int *out_num, unsigned int *in_num, | 1110 | unsigned int *out_num, unsigned int *in_num, |
1108 | struct vhost_log *log, unsigned int *log_num, | 1111 | struct vhost_log *log, unsigned int *log_num, |
@@ -1121,7 +1124,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1121 | return -EINVAL; | 1124 | return -EINVAL; |
1122 | } | 1125 | } |
1123 | 1126 | ||
1124 | ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, | 1127 | ret = translate_desc(vq, indirect->addr, indirect->len, vq->indirect, |
1125 | UIO_MAXIOV); | 1128 | UIO_MAXIOV); |
1126 | if (unlikely(ret < 0)) { | 1129 | if (unlikely(ret < 0)) { |
1127 | vq_err(vq, "Translation failure %d in indirect.\n", ret); | 1130 | vq_err(vq, "Translation failure %d in indirect.\n", ret); |
@@ -1161,7 +1164,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1161 | return -EINVAL; | 1164 | return -EINVAL; |
1162 | } | 1165 | } |
1163 | 1166 | ||
1164 | ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, | 1167 | ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count, |
1165 | iov_size - iov_count); | 1168 | iov_size - iov_count); |
1166 | if (unlikely(ret < 0)) { | 1169 | if (unlikely(ret < 0)) { |
1167 | vq_err(vq, "Translation failure %d indirect idx %d\n", | 1170 | vq_err(vq, "Translation failure %d indirect idx %d\n", |
@@ -1198,7 +1201,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1198 | * This function returns the descriptor number found, or vq->num (which is | 1201 | * This function returns the descriptor number found, or vq->num (which is |
1199 | * never a valid descriptor number) if none was found. A negative code is | 1202 | * never a valid descriptor number) if none was found. A negative code is |
1200 | * returned on error. */ | 1203 | * returned on error. */ |
1201 | int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | 1204 | int vhost_get_vq_desc(struct vhost_virtqueue *vq, |
1202 | struct iovec iov[], unsigned int iov_size, | 1205 | struct iovec iov[], unsigned int iov_size, |
1203 | unsigned int *out_num, unsigned int *in_num, | 1206 | unsigned int *out_num, unsigned int *in_num, |
1204 | struct vhost_log *log, unsigned int *log_num) | 1207 | struct vhost_log *log, unsigned int *log_num) |
@@ -1272,7 +1275,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1272 | return -EFAULT; | 1275 | return -EFAULT; |
1273 | } | 1276 | } |
1274 | if (desc.flags & VRING_DESC_F_INDIRECT) { | 1277 | if (desc.flags & VRING_DESC_F_INDIRECT) { |
1275 | ret = get_indirect(dev, vq, iov, iov_size, | 1278 | ret = get_indirect(vq, iov, iov_size, |
1276 | out_num, in_num, | 1279 | out_num, in_num, |
1277 | log, log_num, &desc); | 1280 | log, log_num, &desc); |
1278 | if (unlikely(ret < 0)) { | 1281 | if (unlikely(ret < 0)) { |
@@ -1283,7 +1286,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
1283 | continue; | 1286 | continue; |
1284 | } | 1287 | } |
1285 | 1288 | ||
1286 | ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, | 1289 | ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count, |
1287 | iov_size - iov_count); | 1290 | iov_size - iov_count); |
1288 | if (unlikely(ret < 0)) { | 1291 | if (unlikely(ret < 0)) { |
1289 | vq_err(vq, "Translation failure %d descriptor idx %d\n", | 1292 | vq_err(vq, "Translation failure %d descriptor idx %d\n", |
@@ -1426,11 +1429,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
1426 | * interrupts. */ | 1429 | * interrupts. */ |
1427 | smp_mb(); | 1430 | smp_mb(); |
1428 | 1431 | ||
1429 | if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && | 1432 | if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && |
1430 | unlikely(vq->avail_idx == vq->last_avail_idx)) | 1433 | unlikely(vq->avail_idx == vq->last_avail_idx)) |
1431 | return true; | 1434 | return true; |
1432 | 1435 | ||
1433 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | 1436 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
1434 | __u16 flags; | 1437 | __u16 flags; |
1435 | if (__get_user(flags, &vq->avail->flags)) { | 1438 | if (__get_user(flags, &vq->avail->flags)) { |
1436 | vq_err(vq, "Failed to get flags"); | 1439 | vq_err(vq, "Failed to get flags"); |
@@ -1491,7 +1494,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
1491 | if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) | 1494 | if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) |
1492 | return false; | 1495 | return false; |
1493 | vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; | 1496 | vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; |
1494 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | 1497 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
1495 | r = vhost_update_used_flags(vq); | 1498 | r = vhost_update_used_flags(vq); |
1496 | if (r) { | 1499 | if (r) { |
1497 | vq_err(vq, "Failed to enable notification at %p: %d\n", | 1500 | vq_err(vq, "Failed to enable notification at %p: %d\n", |
@@ -1528,7 +1531,7 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
1528 | if (vq->used_flags & VRING_USED_F_NO_NOTIFY) | 1531 | if (vq->used_flags & VRING_USED_F_NO_NOTIFY) |
1529 | return; | 1532 | return; |
1530 | vq->used_flags |= VRING_USED_F_NO_NOTIFY; | 1533 | vq->used_flags |= VRING_USED_F_NO_NOTIFY; |
1531 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | 1534 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
1532 | r = vhost_update_used_flags(vq); | 1535 | r = vhost_update_used_flags(vq); |
1533 | if (r) | 1536 | if (r) |
1534 | vq_err(vq, "Failed to enable notification at %p: %d\n", | 1537 | vq_err(vq, "Failed to enable notification at %p: %d\n", |
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 35eeb2a1bada..3eda654b8f5a 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
@@ -104,20 +104,18 @@ struct vhost_virtqueue { | |||
104 | struct iovec *indirect; | 104 | struct iovec *indirect; |
105 | struct vring_used_elem *heads; | 105 | struct vring_used_elem *heads; |
106 | /* Protected by virtqueue mutex. */ | 106 | /* Protected by virtqueue mutex. */ |
107 | struct vhost_memory *memory; | ||
107 | void *private_data; | 108 | void *private_data; |
109 | unsigned acked_features; | ||
108 | /* Log write descriptors */ | 110 | /* Log write descriptors */ |
109 | void __user *log_base; | 111 | void __user *log_base; |
110 | struct vhost_log *log; | 112 | struct vhost_log *log; |
111 | }; | 113 | }; |
112 | 114 | ||
113 | struct vhost_dev { | 115 | struct vhost_dev { |
114 | /* Readers use RCU to access memory table pointer | 116 | struct vhost_memory *memory; |
115 | * log base pointer and features. | ||
116 | * Writers use mutex below.*/ | ||
117 | struct vhost_memory __rcu *memory; | ||
118 | struct mm_struct *mm; | 117 | struct mm_struct *mm; |
119 | struct mutex mutex; | 118 | struct mutex mutex; |
120 | unsigned acked_features; | ||
121 | struct vhost_virtqueue **vqs; | 119 | struct vhost_virtqueue **vqs; |
122 | int nvqs; | 120 | int nvqs; |
123 | struct file *log_file; | 121 | struct file *log_file; |
@@ -140,7 +138,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp); | |||
140 | int vhost_vq_access_ok(struct vhost_virtqueue *vq); | 138 | int vhost_vq_access_ok(struct vhost_virtqueue *vq); |
141 | int vhost_log_access_ok(struct vhost_dev *); | 139 | int vhost_log_access_ok(struct vhost_dev *); |
142 | 140 | ||
143 | int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, | 141 | int vhost_get_vq_desc(struct vhost_virtqueue *, |
144 | struct iovec iov[], unsigned int iov_count, | 142 | struct iovec iov[], unsigned int iov_count, |
145 | unsigned int *out_num, unsigned int *in_num, | 143 | unsigned int *out_num, unsigned int *in_num, |
146 | struct vhost_log *log, unsigned int *log_num); | 144 | struct vhost_log *log, unsigned int *log_num); |
@@ -174,13 +172,8 @@ enum { | |||
174 | (1ULL << VHOST_F_LOG_ALL), | 172 | (1ULL << VHOST_F_LOG_ALL), |
175 | }; | 173 | }; |
176 | 174 | ||
177 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) | 175 | static inline int vhost_has_feature(struct vhost_virtqueue *vq, int bit) |
178 | { | 176 | { |
179 | unsigned acked_features; | 177 | return vq->acked_features & (1 << bit); |
180 | |||
181 | /* TODO: check that we are running from vhost_worker or dev mutex is | ||
182 | * held? */ | ||
183 | acked_features = rcu_dereference_index_check(dev->acked_features, 1); | ||
184 | return acked_features & (1 << bit); | ||
185 | } | 178 | } |
186 | #endif | 179 | #endif |