aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost/vhost.c
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2019-05-24 04:12:18 -0400
committerMichael S. Tsirkin <mst@redhat.com>2019-06-05 21:09:18 -0400
commit7f466032dc9e5a61217f22ea34b2df932786bbfc (patch)
tree9c573d71ddb93b2b287865d8d6c4b346a8d36e46 /drivers/vhost/vhost.c
parentfeebcaeac79ad86fb289ef55fa92f4a97ab8314e (diff)
vhost: access vq metadata through kernel virtual address
It was noticed that the copy_to/from_user() friends that was used to access virtqueue metdata tends to be very expensive for dataplane implementation like vhost since it involves lots of software checks, speculation barriers, hardware feature toggling (e.g SMAP). The extra cost will be more obvious when transferring small packets since the time spent on metadata accessing become more significant. This patch tries to eliminate those overheads by accessing them through direct mapping of those pages. Invalidation callbacks is implemented for co-operation with general VM management (swap, KSM, THP or NUMA balancing). We will try to get the direct mapping of vq metadata before each round of packet processing if it doesn't exist. If we fail, we will simplely fallback to copy_to/from_user() friends. This invalidation and direct mapping access are synchronized through spinlock and RCU. All matedata accessing through direct map is protected by RCU, and the setup or invalidation are done under spinlock. This method might does not work for high mem page which requires temporary mapping so we just fallback to normal copy_to/from_user() and may not for arch that has virtual tagged cache since extra cache flushing is needed to eliminate the alias. This will result complex logic and bad performance. For those archs, this patch simply go for copy_to/from_user() friends. This is done by ruling out kernel mapping codes through ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE. Note that this is only done when device IOTLB is not enabled. We could use similar method to optimize IOTLB in the future. Tests shows at most about 23% improvement on TX PPS when using virtio-user + vhost_net + xdp1 + TAP on 2.6GHz Broadwell: SMAP on | SMAP off Before: 5.2Mpps | 7.1Mpps After: 6.4Mpps | 8.2Mpps Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: James Bottomley <James.Bottomley@hansenpartnership.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: David Miller <davem@davemloft.net> Cc: Jerome Glisse <jglisse@redhat.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-parisc@vger.kernel.org Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r--drivers/vhost/vhost.c515
1 files changed, 512 insertions, 3 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e27d1da5f979..dc9301d31f12 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -299,6 +299,160 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
299 __vhost_vq_meta_reset(d->vqs[i]); 299 __vhost_vq_meta_reset(d->vqs[i]);
300} 300}
301 301
302#if VHOST_ARCH_CAN_ACCEL_UACCESS
303static void vhost_map_unprefetch(struct vhost_map *map)
304{
305 kfree(map->pages);
306 map->pages = NULL;
307 map->npages = 0;
308 map->addr = NULL;
309}
310
311static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
312{
313 struct vhost_map *map[VHOST_NUM_ADDRS];
314 int i;
315
316 spin_lock(&vq->mmu_lock);
317 for (i = 0; i < VHOST_NUM_ADDRS; i++) {
318 map[i] = rcu_dereference_protected(vq->maps[i],
319 lockdep_is_held(&vq->mmu_lock));
320 if (map[i])
321 rcu_assign_pointer(vq->maps[i], NULL);
322 }
323 spin_unlock(&vq->mmu_lock);
324
325 synchronize_rcu();
326
327 for (i = 0; i < VHOST_NUM_ADDRS; i++)
328 if (map[i])
329 vhost_map_unprefetch(map[i]);
330
331}
332
333static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
334{
335 int i;
336
337 vhost_uninit_vq_maps(vq);
338 for (i = 0; i < VHOST_NUM_ADDRS; i++)
339 vq->uaddrs[i].size = 0;
340}
341
342static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
343 unsigned long start,
344 unsigned long end)
345{
346 if (unlikely(!uaddr->size))
347 return false;
348
349 return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
350}
351
352static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
353 int index,
354 unsigned long start,
355 unsigned long end)
356{
357 struct vhost_uaddr *uaddr = &vq->uaddrs[index];
358 struct vhost_map *map;
359 int i;
360
361 if (!vhost_map_range_overlap(uaddr, start, end))
362 return;
363
364 spin_lock(&vq->mmu_lock);
365 ++vq->invalidate_count;
366
367 map = rcu_dereference_protected(vq->maps[index],
368 lockdep_is_held(&vq->mmu_lock));
369 if (map) {
370 if (uaddr->write) {
371 for (i = 0; i < map->npages; i++)
372 set_page_dirty(map->pages[i]);
373 }
374 rcu_assign_pointer(vq->maps[index], NULL);
375 }
376 spin_unlock(&vq->mmu_lock);
377
378 if (map) {
379 synchronize_rcu();
380 vhost_map_unprefetch(map);
381 }
382}
383
384static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
385 int index,
386 unsigned long start,
387 unsigned long end)
388{
389 if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end))
390 return;
391
392 spin_lock(&vq->mmu_lock);
393 --vq->invalidate_count;
394 spin_unlock(&vq->mmu_lock);
395}
396
397static int vhost_invalidate_range_start(struct mmu_notifier *mn,
398 const struct mmu_notifier_range *range)
399{
400 struct vhost_dev *dev = container_of(mn, struct vhost_dev,
401 mmu_notifier);
402 int i, j;
403
404 if (!mmu_notifier_range_blockable(range))
405 return -EAGAIN;
406
407 for (i = 0; i < dev->nvqs; i++) {
408 struct vhost_virtqueue *vq = dev->vqs[i];
409
410 for (j = 0; j < VHOST_NUM_ADDRS; j++)
411 vhost_invalidate_vq_start(vq, j,
412 range->start,
413 range->end);
414 }
415
416 return 0;
417}
418
419static void vhost_invalidate_range_end(struct mmu_notifier *mn,
420 const struct mmu_notifier_range *range)
421{
422 struct vhost_dev *dev = container_of(mn, struct vhost_dev,
423 mmu_notifier);
424 int i, j;
425
426 for (i = 0; i < dev->nvqs; i++) {
427 struct vhost_virtqueue *vq = dev->vqs[i];
428
429 for (j = 0; j < VHOST_NUM_ADDRS; j++)
430 vhost_invalidate_vq_end(vq, j,
431 range->start,
432 range->end);
433 }
434}
435
436static const struct mmu_notifier_ops vhost_mmu_notifier_ops = {
437 .invalidate_range_start = vhost_invalidate_range_start,
438 .invalidate_range_end = vhost_invalidate_range_end,
439};
440
441static void vhost_init_maps(struct vhost_dev *dev)
442{
443 struct vhost_virtqueue *vq;
444 int i, j;
445
446 dev->mmu_notifier.ops = &vhost_mmu_notifier_ops;
447
448 for (i = 0; i < dev->nvqs; ++i) {
449 vq = dev->vqs[i];
450 for (j = 0; j < VHOST_NUM_ADDRS; j++)
451 RCU_INIT_POINTER(vq->maps[j], NULL);
452 }
453}
454#endif
455
302static void vhost_vq_reset(struct vhost_dev *dev, 456static void vhost_vq_reset(struct vhost_dev *dev,
303 struct vhost_virtqueue *vq) 457 struct vhost_virtqueue *vq)
304{ 458{
@@ -327,7 +481,11 @@ static void vhost_vq_reset(struct vhost_dev *dev,
327 vq->busyloop_timeout = 0; 481 vq->busyloop_timeout = 0;
328 vq->umem = NULL; 482 vq->umem = NULL;
329 vq->iotlb = NULL; 483 vq->iotlb = NULL;
484 vq->invalidate_count = 0;
330 __vhost_vq_meta_reset(vq); 485 __vhost_vq_meta_reset(vq);
486#if VHOST_ARCH_CAN_ACCEL_UACCESS
487 vhost_reset_vq_maps(vq);
488#endif
331} 489}
332 490
333static int vhost_worker(void *data) 491static int vhost_worker(void *data)
@@ -477,7 +635,9 @@ void vhost_dev_init(struct vhost_dev *dev,
477 INIT_LIST_HEAD(&dev->read_list); 635 INIT_LIST_HEAD(&dev->read_list);
478 INIT_LIST_HEAD(&dev->pending_list); 636 INIT_LIST_HEAD(&dev->pending_list);
479 spin_lock_init(&dev->iotlb_lock); 637 spin_lock_init(&dev->iotlb_lock);
480 638#if VHOST_ARCH_CAN_ACCEL_UACCESS
639 vhost_init_maps(dev);
640#endif
481 641
482 for (i = 0; i < dev->nvqs; ++i) { 642 for (i = 0; i < dev->nvqs; ++i) {
483 vq = dev->vqs[i]; 643 vq = dev->vqs[i];
@@ -486,6 +646,7 @@ void vhost_dev_init(struct vhost_dev *dev,
486 vq->heads = NULL; 646 vq->heads = NULL;
487 vq->dev = dev; 647 vq->dev = dev;
488 mutex_init(&vq->mutex); 648 mutex_init(&vq->mutex);
649 spin_lock_init(&vq->mmu_lock);
489 vhost_vq_reset(dev, vq); 650 vhost_vq_reset(dev, vq);
490 if (vq->handle_kick) 651 if (vq->handle_kick)
491 vhost_poll_init(&vq->poll, vq->handle_kick, 652 vhost_poll_init(&vq->poll, vq->handle_kick,
@@ -565,7 +726,18 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
565 if (err) 726 if (err)
566 goto err_cgroup; 727 goto err_cgroup;
567 728
729#if VHOST_ARCH_CAN_ACCEL_UACCESS
730 err = mmu_notifier_register(&dev->mmu_notifier, dev->mm);
731 if (err)
732 goto err_mmu_notifier;
733#endif
734
568 return 0; 735 return 0;
736
737#if VHOST_ARCH_CAN_ACCEL_UACCESS
738err_mmu_notifier:
739 vhost_dev_free_iovecs(dev);
740#endif
569err_cgroup: 741err_cgroup:
570 kthread_stop(worker); 742 kthread_stop(worker);
571 dev->worker = NULL; 743 dev->worker = NULL;
@@ -656,6 +828,107 @@ static void vhost_clear_msg(struct vhost_dev *dev)
656 spin_unlock(&dev->iotlb_lock); 828 spin_unlock(&dev->iotlb_lock);
657} 829}
658 830
831#if VHOST_ARCH_CAN_ACCEL_UACCESS
832static void vhost_setup_uaddr(struct vhost_virtqueue *vq,
833 int index, unsigned long uaddr,
834 size_t size, bool write)
835{
836 struct vhost_uaddr *addr = &vq->uaddrs[index];
837
838 addr->uaddr = uaddr;
839 addr->size = size;
840 addr->write = write;
841}
842
843static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq)
844{
845 vhost_setup_uaddr(vq, VHOST_ADDR_DESC,
846 (unsigned long)vq->desc,
847 vhost_get_desc_size(vq, vq->num),
848 false);
849 vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL,
850 (unsigned long)vq->avail,
851 vhost_get_avail_size(vq, vq->num),
852 false);
853 vhost_setup_uaddr(vq, VHOST_ADDR_USED,
854 (unsigned long)vq->used,
855 vhost_get_used_size(vq, vq->num),
856 true);
857}
858
859static int vhost_map_prefetch(struct vhost_virtqueue *vq,
860 int index)
861{
862 struct vhost_map *map;
863 struct vhost_uaddr *uaddr = &vq->uaddrs[index];
864 struct page **pages;
865 int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE);
866 int npinned;
867 void *vaddr, *v;
868 int err;
869 int i;
870
871 spin_lock(&vq->mmu_lock);
872
873 err = -EFAULT;
874 if (vq->invalidate_count)
875 goto err;
876
877 err = -ENOMEM;
878 map = kmalloc(sizeof(*map), GFP_ATOMIC);
879 if (!map)
880 goto err;
881
882 pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC);
883 if (!pages)
884 goto err_pages;
885
886 err = EFAULT;
887 npinned = __get_user_pages_fast(uaddr->uaddr, npages,
888 uaddr->write, pages);
889 if (npinned > 0)
890 release_pages(pages, npinned);
891 if (npinned != npages)
892 goto err_gup;
893
894 for (i = 0; i < npinned; i++)
895 if (PageHighMem(pages[i]))
896 goto err_gup;
897
898 vaddr = v = page_address(pages[0]);
899
900 /* For simplicity, fallback to userspace address if VA is not
901 * contigious.
902 */
903 for (i = 1; i < npinned; i++) {
904 v += PAGE_SIZE;
905 if (v != page_address(pages[i]))
906 goto err_gup;
907 }
908
909 map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1));
910 map->npages = npages;
911 map->pages = pages;
912
913 rcu_assign_pointer(vq->maps[index], map);
914 /* No need for a synchronize_rcu(). This function should be
915 * called by dev->worker so we are serialized with all
916 * readers.
917 */
918 spin_unlock(&vq->mmu_lock);
919
920 return 0;
921
922err_gup:
923 kfree(pages);
924err_pages:
925 kfree(map);
926err:
927 spin_unlock(&vq->mmu_lock);
928 return err;
929}
930#endif
931
659void vhost_dev_cleanup(struct vhost_dev *dev) 932void vhost_dev_cleanup(struct vhost_dev *dev)
660{ 933{
661 int i; 934 int i;
@@ -685,8 +958,16 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
685 kthread_stop(dev->worker); 958 kthread_stop(dev->worker);
686 dev->worker = NULL; 959 dev->worker = NULL;
687 } 960 }
688 if (dev->mm) 961 if (dev->mm) {
962#if VHOST_ARCH_CAN_ACCEL_UACCESS
963 mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
964#endif
689 mmput(dev->mm); 965 mmput(dev->mm);
966 }
967#if VHOST_ARCH_CAN_ACCEL_UACCESS
968 for (i = 0; i < dev->nvqs; i++)
969 vhost_uninit_vq_maps(dev->vqs[i]);
970#endif
690 dev->mm = NULL; 971 dev->mm = NULL;
691} 972}
692EXPORT_SYMBOL_GPL(vhost_dev_cleanup); 973EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -915,6 +1196,26 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
915 1196
916static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) 1197static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
917{ 1198{
1199#if VHOST_ARCH_CAN_ACCEL_UACCESS
1200 struct vhost_map *map;
1201 struct vring_used *used;
1202
1203 if (!vq->iotlb) {
1204 rcu_read_lock();
1205
1206 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1207 if (likely(map)) {
1208 used = map->addr;
1209 *((__virtio16 *)&used->ring[vq->num]) =
1210 cpu_to_vhost16(vq, vq->avail_idx);
1211 rcu_read_unlock();
1212 return 0;
1213 }
1214
1215 rcu_read_unlock();
1216 }
1217#endif
1218
918 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), 1219 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
919 vhost_avail_event(vq)); 1220 vhost_avail_event(vq));
920} 1221}
@@ -923,6 +1224,27 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
923 struct vring_used_elem *head, int idx, 1224 struct vring_used_elem *head, int idx,
924 int count) 1225 int count)
925{ 1226{
1227#if VHOST_ARCH_CAN_ACCEL_UACCESS
1228 struct vhost_map *map;
1229 struct vring_used *used;
1230 size_t size;
1231
1232 if (!vq->iotlb) {
1233 rcu_read_lock();
1234
1235 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1236 if (likely(map)) {
1237 used = map->addr;
1238 size = count * sizeof(*head);
1239 memcpy(used->ring + idx, head, size);
1240 rcu_read_unlock();
1241 return 0;
1242 }
1243
1244 rcu_read_unlock();
1245 }
1246#endif
1247
926 return vhost_copy_to_user(vq, vq->used->ring + idx, head, 1248 return vhost_copy_to_user(vq, vq->used->ring + idx, head,
927 count * sizeof(*head)); 1249 count * sizeof(*head));
928} 1250}
@@ -930,6 +1252,25 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
930static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) 1252static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
931 1253
932{ 1254{
1255#if VHOST_ARCH_CAN_ACCEL_UACCESS
1256 struct vhost_map *map;
1257 struct vring_used *used;
1258
1259 if (!vq->iotlb) {
1260 rcu_read_lock();
1261
1262 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1263 if (likely(map)) {
1264 used = map->addr;
1265 used->flags = cpu_to_vhost16(vq, vq->used_flags);
1266 rcu_read_unlock();
1267 return 0;
1268 }
1269
1270 rcu_read_unlock();
1271 }
1272#endif
1273
933 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), 1274 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
934 &vq->used->flags); 1275 &vq->used->flags);
935} 1276}
@@ -937,6 +1278,25 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
937static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) 1278static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
938 1279
939{ 1280{
1281#if VHOST_ARCH_CAN_ACCEL_UACCESS
1282 struct vhost_map *map;
1283 struct vring_used *used;
1284
1285 if (!vq->iotlb) {
1286 rcu_read_lock();
1287
1288 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1289 if (likely(map)) {
1290 used = map->addr;
1291 used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
1292 rcu_read_unlock();
1293 return 0;
1294 }
1295
1296 rcu_read_unlock();
1297 }
1298#endif
1299
940 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), 1300 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
941 &vq->used->idx); 1301 &vq->used->idx);
942} 1302}
@@ -982,12 +1342,50 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
982static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, 1342static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
983 __virtio16 *idx) 1343 __virtio16 *idx)
984{ 1344{
1345#if VHOST_ARCH_CAN_ACCEL_UACCESS
1346 struct vhost_map *map;
1347 struct vring_avail *avail;
1348
1349 if (!vq->iotlb) {
1350 rcu_read_lock();
1351
1352 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1353 if (likely(map)) {
1354 avail = map->addr;
1355 *idx = avail->idx;
1356 rcu_read_unlock();
1357 return 0;
1358 }
1359
1360 rcu_read_unlock();
1361 }
1362#endif
1363
985 return vhost_get_avail(vq, *idx, &vq->avail->idx); 1364 return vhost_get_avail(vq, *idx, &vq->avail->idx);
986} 1365}
987 1366
988static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, 1367static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
989 __virtio16 *head, int idx) 1368 __virtio16 *head, int idx)
990{ 1369{
1370#if VHOST_ARCH_CAN_ACCEL_UACCESS
1371 struct vhost_map *map;
1372 struct vring_avail *avail;
1373
1374 if (!vq->iotlb) {
1375 rcu_read_lock();
1376
1377 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1378 if (likely(map)) {
1379 avail = map->addr;
1380 *head = avail->ring[idx & (vq->num - 1)];
1381 rcu_read_unlock();
1382 return 0;
1383 }
1384
1385 rcu_read_unlock();
1386 }
1387#endif
1388
991 return vhost_get_avail(vq, *head, 1389 return vhost_get_avail(vq, *head,
992 &vq->avail->ring[idx & (vq->num - 1)]); 1390 &vq->avail->ring[idx & (vq->num - 1)]);
993} 1391}
@@ -995,24 +1393,98 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
995static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, 1393static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
996 __virtio16 *flags) 1394 __virtio16 *flags)
997{ 1395{
1396#if VHOST_ARCH_CAN_ACCEL_UACCESS
1397 struct vhost_map *map;
1398 struct vring_avail *avail;
1399
1400 if (!vq->iotlb) {
1401 rcu_read_lock();
1402
1403 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1404 if (likely(map)) {
1405 avail = map->addr;
1406 *flags = avail->flags;
1407 rcu_read_unlock();
1408 return 0;
1409 }
1410
1411 rcu_read_unlock();
1412 }
1413#endif
1414
998 return vhost_get_avail(vq, *flags, &vq->avail->flags); 1415 return vhost_get_avail(vq, *flags, &vq->avail->flags);
999} 1416}
1000 1417
1001static inline int vhost_get_used_event(struct vhost_virtqueue *vq, 1418static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
1002 __virtio16 *event) 1419 __virtio16 *event)
1003{ 1420{
1421#if VHOST_ARCH_CAN_ACCEL_UACCESS
1422 struct vhost_map *map;
1423 struct vring_avail *avail;
1424
1425 if (!vq->iotlb) {
1426 rcu_read_lock();
1427 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1428 if (likely(map)) {
1429 avail = map->addr;
1430 *event = (__virtio16)avail->ring[vq->num];
1431 rcu_read_unlock();
1432 return 0;
1433 }
1434 rcu_read_unlock();
1435 }
1436#endif
1437
1004 return vhost_get_avail(vq, *event, vhost_used_event(vq)); 1438 return vhost_get_avail(vq, *event, vhost_used_event(vq));
1005} 1439}
1006 1440
1007static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, 1441static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
1008 __virtio16 *idx) 1442 __virtio16 *idx)
1009{ 1443{
1444#if VHOST_ARCH_CAN_ACCEL_UACCESS
1445 struct vhost_map *map;
1446 struct vring_used *used;
1447
1448 if (!vq->iotlb) {
1449 rcu_read_lock();
1450
1451 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1452 if (likely(map)) {
1453 used = map->addr;
1454 *idx = used->idx;
1455 rcu_read_unlock();
1456 return 0;
1457 }
1458
1459 rcu_read_unlock();
1460 }
1461#endif
1462
1010 return vhost_get_used(vq, *idx, &vq->used->idx); 1463 return vhost_get_used(vq, *idx, &vq->used->idx);
1011} 1464}
1012 1465
1013static inline int vhost_get_desc(struct vhost_virtqueue *vq, 1466static inline int vhost_get_desc(struct vhost_virtqueue *vq,
1014 struct vring_desc *desc, int idx) 1467 struct vring_desc *desc, int idx)
1015{ 1468{
1469#if VHOST_ARCH_CAN_ACCEL_UACCESS
1470 struct vhost_map *map;
1471 struct vring_desc *d;
1472
1473 if (!vq->iotlb) {
1474 rcu_read_lock();
1475
1476 map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
1477 if (likely(map)) {
1478 d = map->addr;
1479 *desc = *(d + idx);
1480 rcu_read_unlock();
1481 return 0;
1482 }
1483
1484 rcu_read_unlock();
1485 }
1486#endif
1487
1016 return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); 1488 return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
1017} 1489}
1018 1490
@@ -1353,12 +1825,32 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
1353 return true; 1825 return true;
1354} 1826}
1355 1827
1828#if VHOST_ARCH_CAN_ACCEL_UACCESS
1829static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
1830{
1831 struct vhost_map __rcu *map;
1832 int i;
1833
1834 for (i = 0; i < VHOST_NUM_ADDRS; i++) {
1835 rcu_read_lock();
1836 map = rcu_dereference(vq->maps[i]);
1837 rcu_read_unlock();
1838 if (unlikely(!map))
1839 vhost_map_prefetch(vq, i);
1840 }
1841}
1842#endif
1843
1356int vq_meta_prefetch(struct vhost_virtqueue *vq) 1844int vq_meta_prefetch(struct vhost_virtqueue *vq)
1357{ 1845{
1358 unsigned int num = vq->num; 1846 unsigned int num = vq->num;
1359 1847
1360 if (!vq->iotlb) 1848 if (!vq->iotlb) {
1849#if VHOST_ARCH_CAN_ACCEL_UACCESS
1850 vhost_vq_map_prefetch(vq);
1851#endif
1361 return 1; 1852 return 1;
1853 }
1362 1854
1363 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, 1855 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
1364 vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && 1856 vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
@@ -1569,6 +2061,16 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
1569 2061
1570 mutex_lock(&vq->mutex); 2062 mutex_lock(&vq->mutex);
1571 2063
2064#if VHOST_ARCH_CAN_ACCEL_UACCESS
2065 /* Unregister MMU notifer to allow invalidation callback
2066 * can access vq->uaddrs[] without holding a lock.
2067 */
2068 if (d->mm)
2069 mmu_notifier_unregister(&d->mmu_notifier, d->mm);
2070
2071 vhost_uninit_vq_maps(vq);
2072#endif
2073
1572 switch (ioctl) { 2074 switch (ioctl) {
1573 case VHOST_SET_VRING_NUM: 2075 case VHOST_SET_VRING_NUM:
1574 r = vhost_vring_set_num(d, vq, argp); 2076 r = vhost_vring_set_num(d, vq, argp);
@@ -1580,6 +2082,13 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
1580 BUG(); 2082 BUG();
1581 } 2083 }
1582 2084
2085#if VHOST_ARCH_CAN_ACCEL_UACCESS
2086 vhost_setup_vq_uaddr(vq);
2087
2088 if (d->mm)
2089 mmu_notifier_register(&d->mmu_notifier, d->mm);
2090#endif
2091
1583 mutex_unlock(&vq->mutex); 2092 mutex_unlock(&vq->mutex);
1584 2093
1585 return r; 2094 return r;