aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/vhost/vhost.c515
-rw-r--r--drivers/vhost/vhost.h38
2 files changed, 550 insertions, 3 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e27d1da5f979..dc9301d31f12 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -299,6 +299,160 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
299 __vhost_vq_meta_reset(d->vqs[i]); 299 __vhost_vq_meta_reset(d->vqs[i]);
300} 300}
301 301
302#if VHOST_ARCH_CAN_ACCEL_UACCESS
303static void vhost_map_unprefetch(struct vhost_map *map)
304{
305 kfree(map->pages);
306 map->pages = NULL;
307 map->npages = 0;
308 map->addr = NULL;
309}
310
311static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
312{
313 struct vhost_map *map[VHOST_NUM_ADDRS];
314 int i;
315
316 spin_lock(&vq->mmu_lock);
317 for (i = 0; i < VHOST_NUM_ADDRS; i++) {
318 map[i] = rcu_dereference_protected(vq->maps[i],
319 lockdep_is_held(&vq->mmu_lock));
320 if (map[i])
321 rcu_assign_pointer(vq->maps[i], NULL);
322 }
323 spin_unlock(&vq->mmu_lock);
324
325 synchronize_rcu();
326
327 for (i = 0; i < VHOST_NUM_ADDRS; i++)
328 if (map[i])
329 vhost_map_unprefetch(map[i]);
330
331}
332
333static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
334{
335 int i;
336
337 vhost_uninit_vq_maps(vq);
338 for (i = 0; i < VHOST_NUM_ADDRS; i++)
339 vq->uaddrs[i].size = 0;
340}
341
342static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
343 unsigned long start,
344 unsigned long end)
345{
346 if (unlikely(!uaddr->size))
347 return false;
348
349 return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
350}
351
352static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
353 int index,
354 unsigned long start,
355 unsigned long end)
356{
357 struct vhost_uaddr *uaddr = &vq->uaddrs[index];
358 struct vhost_map *map;
359 int i;
360
361 if (!vhost_map_range_overlap(uaddr, start, end))
362 return;
363
364 spin_lock(&vq->mmu_lock);
365 ++vq->invalidate_count;
366
367 map = rcu_dereference_protected(vq->maps[index],
368 lockdep_is_held(&vq->mmu_lock));
369 if (map) {
370 if (uaddr->write) {
371 for (i = 0; i < map->npages; i++)
372 set_page_dirty(map->pages[i]);
373 }
374 rcu_assign_pointer(vq->maps[index], NULL);
375 }
376 spin_unlock(&vq->mmu_lock);
377
378 if (map) {
379 synchronize_rcu();
380 vhost_map_unprefetch(map);
381 }
382}
383
384static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
385 int index,
386 unsigned long start,
387 unsigned long end)
388{
389 if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end))
390 return;
391
392 spin_lock(&vq->mmu_lock);
393 --vq->invalidate_count;
394 spin_unlock(&vq->mmu_lock);
395}
396
397static int vhost_invalidate_range_start(struct mmu_notifier *mn,
398 const struct mmu_notifier_range *range)
399{
400 struct vhost_dev *dev = container_of(mn, struct vhost_dev,
401 mmu_notifier);
402 int i, j;
403
404 if (!mmu_notifier_range_blockable(range))
405 return -EAGAIN;
406
407 for (i = 0; i < dev->nvqs; i++) {
408 struct vhost_virtqueue *vq = dev->vqs[i];
409
410 for (j = 0; j < VHOST_NUM_ADDRS; j++)
411 vhost_invalidate_vq_start(vq, j,
412 range->start,
413 range->end);
414 }
415
416 return 0;
417}
418
419static void vhost_invalidate_range_end(struct mmu_notifier *mn,
420 const struct mmu_notifier_range *range)
421{
422 struct vhost_dev *dev = container_of(mn, struct vhost_dev,
423 mmu_notifier);
424 int i, j;
425
426 for (i = 0; i < dev->nvqs; i++) {
427 struct vhost_virtqueue *vq = dev->vqs[i];
428
429 for (j = 0; j < VHOST_NUM_ADDRS; j++)
430 vhost_invalidate_vq_end(vq, j,
431 range->start,
432 range->end);
433 }
434}
435
436static const struct mmu_notifier_ops vhost_mmu_notifier_ops = {
437 .invalidate_range_start = vhost_invalidate_range_start,
438 .invalidate_range_end = vhost_invalidate_range_end,
439};
440
441static void vhost_init_maps(struct vhost_dev *dev)
442{
443 struct vhost_virtqueue *vq;
444 int i, j;
445
446 dev->mmu_notifier.ops = &vhost_mmu_notifier_ops;
447
448 for (i = 0; i < dev->nvqs; ++i) {
449 vq = dev->vqs[i];
450 for (j = 0; j < VHOST_NUM_ADDRS; j++)
451 RCU_INIT_POINTER(vq->maps[j], NULL);
452 }
453}
454#endif
455
302static void vhost_vq_reset(struct vhost_dev *dev, 456static void vhost_vq_reset(struct vhost_dev *dev,
303 struct vhost_virtqueue *vq) 457 struct vhost_virtqueue *vq)
304{ 458{
@@ -327,7 +481,11 @@ static void vhost_vq_reset(struct vhost_dev *dev,
327 vq->busyloop_timeout = 0; 481 vq->busyloop_timeout = 0;
328 vq->umem = NULL; 482 vq->umem = NULL;
329 vq->iotlb = NULL; 483 vq->iotlb = NULL;
484 vq->invalidate_count = 0;
330 __vhost_vq_meta_reset(vq); 485 __vhost_vq_meta_reset(vq);
486#if VHOST_ARCH_CAN_ACCEL_UACCESS
487 vhost_reset_vq_maps(vq);
488#endif
331} 489}
332 490
333static int vhost_worker(void *data) 491static int vhost_worker(void *data)
@@ -477,7 +635,9 @@ void vhost_dev_init(struct vhost_dev *dev,
477 INIT_LIST_HEAD(&dev->read_list); 635 INIT_LIST_HEAD(&dev->read_list);
478 INIT_LIST_HEAD(&dev->pending_list); 636 INIT_LIST_HEAD(&dev->pending_list);
479 spin_lock_init(&dev->iotlb_lock); 637 spin_lock_init(&dev->iotlb_lock);
480 638#if VHOST_ARCH_CAN_ACCEL_UACCESS
639 vhost_init_maps(dev);
640#endif
481 641
482 for (i = 0; i < dev->nvqs; ++i) { 642 for (i = 0; i < dev->nvqs; ++i) {
483 vq = dev->vqs[i]; 643 vq = dev->vqs[i];
@@ -486,6 +646,7 @@ void vhost_dev_init(struct vhost_dev *dev,
486 vq->heads = NULL; 646 vq->heads = NULL;
487 vq->dev = dev; 647 vq->dev = dev;
488 mutex_init(&vq->mutex); 648 mutex_init(&vq->mutex);
649 spin_lock_init(&vq->mmu_lock);
489 vhost_vq_reset(dev, vq); 650 vhost_vq_reset(dev, vq);
490 if (vq->handle_kick) 651 if (vq->handle_kick)
491 vhost_poll_init(&vq->poll, vq->handle_kick, 652 vhost_poll_init(&vq->poll, vq->handle_kick,
@@ -565,7 +726,18 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
565 if (err) 726 if (err)
566 goto err_cgroup; 727 goto err_cgroup;
567 728
729#if VHOST_ARCH_CAN_ACCEL_UACCESS
730 err = mmu_notifier_register(&dev->mmu_notifier, dev->mm);
731 if (err)
732 goto err_mmu_notifier;
733#endif
734
568 return 0; 735 return 0;
736
737#if VHOST_ARCH_CAN_ACCEL_UACCESS
738err_mmu_notifier:
739 vhost_dev_free_iovecs(dev);
740#endif
569err_cgroup: 741err_cgroup:
570 kthread_stop(worker); 742 kthread_stop(worker);
571 dev->worker = NULL; 743 dev->worker = NULL;
@@ -656,6 +828,107 @@ static void vhost_clear_msg(struct vhost_dev *dev)
656 spin_unlock(&dev->iotlb_lock); 828 spin_unlock(&dev->iotlb_lock);
657} 829}
658 830
831#if VHOST_ARCH_CAN_ACCEL_UACCESS
832static void vhost_setup_uaddr(struct vhost_virtqueue *vq,
833 int index, unsigned long uaddr,
834 size_t size, bool write)
835{
836 struct vhost_uaddr *addr = &vq->uaddrs[index];
837
838 addr->uaddr = uaddr;
839 addr->size = size;
840 addr->write = write;
841}
842
843static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq)
844{
845 vhost_setup_uaddr(vq, VHOST_ADDR_DESC,
846 (unsigned long)vq->desc,
847 vhost_get_desc_size(vq, vq->num),
848 false);
849 vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL,
850 (unsigned long)vq->avail,
851 vhost_get_avail_size(vq, vq->num),
852 false);
853 vhost_setup_uaddr(vq, VHOST_ADDR_USED,
854 (unsigned long)vq->used,
855 vhost_get_used_size(vq, vq->num),
856 true);
857}
858
859static int vhost_map_prefetch(struct vhost_virtqueue *vq,
860 int index)
861{
862 struct vhost_map *map;
863 struct vhost_uaddr *uaddr = &vq->uaddrs[index];
864 struct page **pages;
865 int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE);
866 int npinned;
867 void *vaddr, *v;
868 int err;
869 int i;
870
871 spin_lock(&vq->mmu_lock);
872
873 err = -EFAULT;
874 if (vq->invalidate_count)
875 goto err;
876
877 err = -ENOMEM;
878 map = kmalloc(sizeof(*map), GFP_ATOMIC);
879 if (!map)
880 goto err;
881
882 pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC);
883 if (!pages)
884 goto err_pages;
885
886 err = EFAULT;
887 npinned = __get_user_pages_fast(uaddr->uaddr, npages,
888 uaddr->write, pages);
889 if (npinned > 0)
890 release_pages(pages, npinned);
891 if (npinned != npages)
892 goto err_gup;
893
894 for (i = 0; i < npinned; i++)
895 if (PageHighMem(pages[i]))
896 goto err_gup;
897
898 vaddr = v = page_address(pages[0]);
899
900 /* For simplicity, fallback to userspace address if VA is not
901 * contigious.
902 */
903 for (i = 1; i < npinned; i++) {
904 v += PAGE_SIZE;
905 if (v != page_address(pages[i]))
906 goto err_gup;
907 }
908
909 map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1));
910 map->npages = npages;
911 map->pages = pages;
912
913 rcu_assign_pointer(vq->maps[index], map);
914 /* No need for a synchronize_rcu(). This function should be
915 * called by dev->worker so we are serialized with all
916 * readers.
917 */
918 spin_unlock(&vq->mmu_lock);
919
920 return 0;
921
922err_gup:
923 kfree(pages);
924err_pages:
925 kfree(map);
926err:
927 spin_unlock(&vq->mmu_lock);
928 return err;
929}
930#endif
931
659void vhost_dev_cleanup(struct vhost_dev *dev) 932void vhost_dev_cleanup(struct vhost_dev *dev)
660{ 933{
661 int i; 934 int i;
@@ -685,8 +958,16 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
685 kthread_stop(dev->worker); 958 kthread_stop(dev->worker);
686 dev->worker = NULL; 959 dev->worker = NULL;
687 } 960 }
688 if (dev->mm) 961 if (dev->mm) {
962#if VHOST_ARCH_CAN_ACCEL_UACCESS
963 mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
964#endif
689 mmput(dev->mm); 965 mmput(dev->mm);
966 }
967#if VHOST_ARCH_CAN_ACCEL_UACCESS
968 for (i = 0; i < dev->nvqs; i++)
969 vhost_uninit_vq_maps(dev->vqs[i]);
970#endif
690 dev->mm = NULL; 971 dev->mm = NULL;
691} 972}
692EXPORT_SYMBOL_GPL(vhost_dev_cleanup); 973EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -915,6 +1196,26 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
915 1196
916static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) 1197static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
917{ 1198{
1199#if VHOST_ARCH_CAN_ACCEL_UACCESS
1200 struct vhost_map *map;
1201 struct vring_used *used;
1202
1203 if (!vq->iotlb) {
1204 rcu_read_lock();
1205
1206 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1207 if (likely(map)) {
1208 used = map->addr;
1209 *((__virtio16 *)&used->ring[vq->num]) =
1210 cpu_to_vhost16(vq, vq->avail_idx);
1211 rcu_read_unlock();
1212 return 0;
1213 }
1214
1215 rcu_read_unlock();
1216 }
1217#endif
1218
918 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), 1219 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
919 vhost_avail_event(vq)); 1220 vhost_avail_event(vq));
920} 1221}
@@ -923,6 +1224,27 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
923 struct vring_used_elem *head, int idx, 1224 struct vring_used_elem *head, int idx,
924 int count) 1225 int count)
925{ 1226{
1227#if VHOST_ARCH_CAN_ACCEL_UACCESS
1228 struct vhost_map *map;
1229 struct vring_used *used;
1230 size_t size;
1231
1232 if (!vq->iotlb) {
1233 rcu_read_lock();
1234
1235 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1236 if (likely(map)) {
1237 used = map->addr;
1238 size = count * sizeof(*head);
1239 memcpy(used->ring + idx, head, size);
1240 rcu_read_unlock();
1241 return 0;
1242 }
1243
1244 rcu_read_unlock();
1245 }
1246#endif
1247
926 return vhost_copy_to_user(vq, vq->used->ring + idx, head, 1248 return vhost_copy_to_user(vq, vq->used->ring + idx, head,
927 count * sizeof(*head)); 1249 count * sizeof(*head));
928} 1250}
@@ -930,6 +1252,25 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
930static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) 1252static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
931 1253
932{ 1254{
1255#if VHOST_ARCH_CAN_ACCEL_UACCESS
1256 struct vhost_map *map;
1257 struct vring_used *used;
1258
1259 if (!vq->iotlb) {
1260 rcu_read_lock();
1261
1262 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1263 if (likely(map)) {
1264 used = map->addr;
1265 used->flags = cpu_to_vhost16(vq, vq->used_flags);
1266 rcu_read_unlock();
1267 return 0;
1268 }
1269
1270 rcu_read_unlock();
1271 }
1272#endif
1273
933 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), 1274 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
934 &vq->used->flags); 1275 &vq->used->flags);
935} 1276}
@@ -937,6 +1278,25 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
937static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) 1278static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
938 1279
939{ 1280{
1281#if VHOST_ARCH_CAN_ACCEL_UACCESS
1282 struct vhost_map *map;
1283 struct vring_used *used;
1284
1285 if (!vq->iotlb) {
1286 rcu_read_lock();
1287
1288 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1289 if (likely(map)) {
1290 used = map->addr;
1291 used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
1292 rcu_read_unlock();
1293 return 0;
1294 }
1295
1296 rcu_read_unlock();
1297 }
1298#endif
1299
940 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), 1300 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
941 &vq->used->idx); 1301 &vq->used->idx);
942} 1302}
@@ -982,12 +1342,50 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
982static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, 1342static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
983 __virtio16 *idx) 1343 __virtio16 *idx)
984{ 1344{
1345#if VHOST_ARCH_CAN_ACCEL_UACCESS
1346 struct vhost_map *map;
1347 struct vring_avail *avail;
1348
1349 if (!vq->iotlb) {
1350 rcu_read_lock();
1351
1352 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1353 if (likely(map)) {
1354 avail = map->addr;
1355 *idx = avail->idx;
1356 rcu_read_unlock();
1357 return 0;
1358 }
1359
1360 rcu_read_unlock();
1361 }
1362#endif
1363
985 return vhost_get_avail(vq, *idx, &vq->avail->idx); 1364 return vhost_get_avail(vq, *idx, &vq->avail->idx);
986} 1365}
987 1366
988static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, 1367static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
989 __virtio16 *head, int idx) 1368 __virtio16 *head, int idx)
990{ 1369{
1370#if VHOST_ARCH_CAN_ACCEL_UACCESS
1371 struct vhost_map *map;
1372 struct vring_avail *avail;
1373
1374 if (!vq->iotlb) {
1375 rcu_read_lock();
1376
1377 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1378 if (likely(map)) {
1379 avail = map->addr;
1380 *head = avail->ring[idx & (vq->num - 1)];
1381 rcu_read_unlock();
1382 return 0;
1383 }
1384
1385 rcu_read_unlock();
1386 }
1387#endif
1388
991 return vhost_get_avail(vq, *head, 1389 return vhost_get_avail(vq, *head,
992 &vq->avail->ring[idx & (vq->num - 1)]); 1390 &vq->avail->ring[idx & (vq->num - 1)]);
993} 1391}
@@ -995,24 +1393,98 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
995static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, 1393static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
996 __virtio16 *flags) 1394 __virtio16 *flags)
997{ 1395{
1396#if VHOST_ARCH_CAN_ACCEL_UACCESS
1397 struct vhost_map *map;
1398 struct vring_avail *avail;
1399
1400 if (!vq->iotlb) {
1401 rcu_read_lock();
1402
1403 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1404 if (likely(map)) {
1405 avail = map->addr;
1406 *flags = avail->flags;
1407 rcu_read_unlock();
1408 return 0;
1409 }
1410
1411 rcu_read_unlock();
1412 }
1413#endif
1414
998 return vhost_get_avail(vq, *flags, &vq->avail->flags); 1415 return vhost_get_avail(vq, *flags, &vq->avail->flags);
999} 1416}
1000 1417
1001static inline int vhost_get_used_event(struct vhost_virtqueue *vq, 1418static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
1002 __virtio16 *event) 1419 __virtio16 *event)
1003{ 1420{
1421#if VHOST_ARCH_CAN_ACCEL_UACCESS
1422 struct vhost_map *map;
1423 struct vring_avail *avail;
1424
1425 if (!vq->iotlb) {
1426 rcu_read_lock();
1427 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1428 if (likely(map)) {
1429 avail = map->addr;
1430 *event = (__virtio16)avail->ring[vq->num];
1431 rcu_read_unlock();
1432 return 0;
1433 }
1434 rcu_read_unlock();
1435 }
1436#endif
1437
1004 return vhost_get_avail(vq, *event, vhost_used_event(vq)); 1438 return vhost_get_avail(vq, *event, vhost_used_event(vq));
1005} 1439}
1006 1440
1007static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, 1441static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
1008 __virtio16 *idx) 1442 __virtio16 *idx)
1009{ 1443{
1444#if VHOST_ARCH_CAN_ACCEL_UACCESS
1445 struct vhost_map *map;
1446 struct vring_used *used;
1447
1448 if (!vq->iotlb) {
1449 rcu_read_lock();
1450
1451 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1452 if (likely(map)) {
1453 used = map->addr;
1454 *idx = used->idx;
1455 rcu_read_unlock();
1456 return 0;
1457 }
1458
1459 rcu_read_unlock();
1460 }
1461#endif
1462
1010 return vhost_get_used(vq, *idx, &vq->used->idx); 1463 return vhost_get_used(vq, *idx, &vq->used->idx);
1011} 1464}
1012 1465
1013static inline int vhost_get_desc(struct vhost_virtqueue *vq, 1466static inline int vhost_get_desc(struct vhost_virtqueue *vq,
1014 struct vring_desc *desc, int idx) 1467 struct vring_desc *desc, int idx)
1015{ 1468{
1469#if VHOST_ARCH_CAN_ACCEL_UACCESS
1470 struct vhost_map *map;
1471 struct vring_desc *d;
1472
1473 if (!vq->iotlb) {
1474 rcu_read_lock();
1475
1476 map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
1477 if (likely(map)) {
1478 d = map->addr;
1479 *desc = *(d + idx);
1480 rcu_read_unlock();
1481 return 0;
1482 }
1483
1484 rcu_read_unlock();
1485 }
1486#endif
1487
1016 return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); 1488 return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
1017} 1489}
1018 1490
@@ -1353,12 +1825,32 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
1353 return true; 1825 return true;
1354} 1826}
1355 1827
1828#if VHOST_ARCH_CAN_ACCEL_UACCESS
1829static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
1830{
1831 struct vhost_map __rcu *map;
1832 int i;
1833
1834 for (i = 0; i < VHOST_NUM_ADDRS; i++) {
1835 rcu_read_lock();
1836 map = rcu_dereference(vq->maps[i]);
1837 rcu_read_unlock();
1838 if (unlikely(!map))
1839 vhost_map_prefetch(vq, i);
1840 }
1841}
1842#endif
1843
1356int vq_meta_prefetch(struct vhost_virtqueue *vq) 1844int vq_meta_prefetch(struct vhost_virtqueue *vq)
1357{ 1845{
1358 unsigned int num = vq->num; 1846 unsigned int num = vq->num;
1359 1847
1360 if (!vq->iotlb) 1848 if (!vq->iotlb) {
1849#if VHOST_ARCH_CAN_ACCEL_UACCESS
1850 vhost_vq_map_prefetch(vq);
1851#endif
1361 return 1; 1852 return 1;
1853 }
1362 1854
1363 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, 1855 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
1364 vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && 1856 vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
@@ -1569,6 +2061,16 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
1569 2061
1570 mutex_lock(&vq->mutex); 2062 mutex_lock(&vq->mutex);
1571 2063
2064#if VHOST_ARCH_CAN_ACCEL_UACCESS
2065 /* Unregister MMU notifer to allow invalidation callback
2066 * can access vq->uaddrs[] without holding a lock.
2067 */
2068 if (d->mm)
2069 mmu_notifier_unregister(&d->mmu_notifier, d->mm);
2070
2071 vhost_uninit_vq_maps(vq);
2072#endif
2073
1572 switch (ioctl) { 2074 switch (ioctl) {
1573 case VHOST_SET_VRING_NUM: 2075 case VHOST_SET_VRING_NUM:
1574 r = vhost_vring_set_num(d, vq, argp); 2076 r = vhost_vring_set_num(d, vq, argp);
@@ -1580,6 +2082,13 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
1580 BUG(); 2082 BUG();
1581 } 2083 }
1582 2084
2085#if VHOST_ARCH_CAN_ACCEL_UACCESS
2086 vhost_setup_vq_uaddr(vq);
2087
2088 if (d->mm)
2089 mmu_notifier_register(&d->mmu_notifier, d->mm);
2090#endif
2091
1583 mutex_unlock(&vq->mutex); 2092 mutex_unlock(&vq->mutex);
1584 2093
1585 return r; 2094 return r;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index e9ed2722b633..c5d950cf7627 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -12,6 +12,9 @@
12#include <linux/virtio_config.h> 12#include <linux/virtio_config.h>
13#include <linux/virtio_ring.h> 13#include <linux/virtio_ring.h>
14#include <linux/atomic.h> 14#include <linux/atomic.h>
15#include <linux/pagemap.h>
16#include <linux/mmu_notifier.h>
17#include <asm/cacheflush.h>
15 18
16struct vhost_work; 19struct vhost_work;
17typedef void (*vhost_work_fn_t)(struct vhost_work *work); 20typedef void (*vhost_work_fn_t)(struct vhost_work *work);
@@ -80,6 +83,21 @@ enum vhost_uaddr_type {
80 VHOST_NUM_ADDRS = 3, 83 VHOST_NUM_ADDRS = 3,
81}; 84};
82 85
86struct vhost_map {
87 int npages;
88 void *addr;
89 struct page **pages;
90};
91
92struct vhost_uaddr {
93 unsigned long uaddr;
94 size_t size;
95 bool write;
96};
97
98#define VHOST_ARCH_CAN_ACCEL_UACCESS defined(CONFIG_MMU_NOTIFIER) && \
99 ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
100
83/* The virtqueue structure describes a queue attached to a device. */ 101/* The virtqueue structure describes a queue attached to a device. */
84struct vhost_virtqueue { 102struct vhost_virtqueue {
85 struct vhost_dev *dev; 103 struct vhost_dev *dev;
@@ -90,7 +108,22 @@ struct vhost_virtqueue {
90 struct vring_desc __user *desc; 108 struct vring_desc __user *desc;
91 struct vring_avail __user *avail; 109 struct vring_avail __user *avail;
92 struct vring_used __user *used; 110 struct vring_used __user *used;
111
112#if VHOST_ARCH_CAN_ACCEL_UACCESS
113 /* Read by memory accessors, modified by meta data
114 * prefetching, MMU notifier and vring ioctl().
115 * Synchonrized through mmu_lock (writers) and RCU (writers
116 * and readers).
117 */
118 struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
119 /* Read by MMU notifier, modified by vring ioctl(),
120 * synchronized through MMU notifier
121 * registering/unregistering.
122 */
123 struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
124#endif
93 const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; 125 const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
126
94 struct file *kick; 127 struct file *kick;
95 struct eventfd_ctx *call_ctx; 128 struct eventfd_ctx *call_ctx;
96 struct eventfd_ctx *error_ctx; 129 struct eventfd_ctx *error_ctx;
@@ -145,6 +178,8 @@ struct vhost_virtqueue {
145 bool user_be; 178 bool user_be;
146#endif 179#endif
147 u32 busyloop_timeout; 180 u32 busyloop_timeout;
181 spinlock_t mmu_lock;
182 int invalidate_count;
148}; 183};
149 184
150struct vhost_msg_node { 185struct vhost_msg_node {
@@ -158,6 +193,9 @@ struct vhost_msg_node {
158 193
159struct vhost_dev { 194struct vhost_dev {
160 struct mm_struct *mm; 195 struct mm_struct *mm;
196#ifdef CONFIG_MMU_NOTIFIER
197 struct mmu_notifier mmu_notifier;
198#endif
161 struct mutex mutex; 199 struct mutex mutex;
162 struct vhost_virtqueue **vqs; 200 struct vhost_virtqueue **vqs;
163 int nvqs; 201 int nvqs;