diff options
-rw-r--r-- | drivers/vhost/vhost.c | 515 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 38 |
2 files changed, 550 insertions, 3 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e27d1da5f979..dc9301d31f12 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -299,6 +299,160 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) | |||
299 | __vhost_vq_meta_reset(d->vqs[i]); | 299 | __vhost_vq_meta_reset(d->vqs[i]); |
300 | } | 300 | } |
301 | 301 | ||
302 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
303 | static void vhost_map_unprefetch(struct vhost_map *map) | ||
304 | { | ||
305 | kfree(map->pages); | ||
306 | map->pages = NULL; | ||
307 | map->npages = 0; | ||
308 | map->addr = NULL; | ||
309 | } | ||
310 | |||
311 | static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) | ||
312 | { | ||
313 | struct vhost_map *map[VHOST_NUM_ADDRS]; | ||
314 | int i; | ||
315 | |||
316 | spin_lock(&vq->mmu_lock); | ||
317 | for (i = 0; i < VHOST_NUM_ADDRS; i++) { | ||
318 | map[i] = rcu_dereference_protected(vq->maps[i], | ||
319 | lockdep_is_held(&vq->mmu_lock)); | ||
320 | if (map[i]) | ||
321 | rcu_assign_pointer(vq->maps[i], NULL); | ||
322 | } | ||
323 | spin_unlock(&vq->mmu_lock); | ||
324 | |||
325 | synchronize_rcu(); | ||
326 | |||
327 | for (i = 0; i < VHOST_NUM_ADDRS; i++) | ||
328 | if (map[i]) | ||
329 | vhost_map_unprefetch(map[i]); | ||
330 | |||
331 | } | ||
332 | |||
333 | static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) | ||
334 | { | ||
335 | int i; | ||
336 | |||
337 | vhost_uninit_vq_maps(vq); | ||
338 | for (i = 0; i < VHOST_NUM_ADDRS; i++) | ||
339 | vq->uaddrs[i].size = 0; | ||
340 | } | ||
341 | |||
342 | static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, | ||
343 | unsigned long start, | ||
344 | unsigned long end) | ||
345 | { | ||
346 | if (unlikely(!uaddr->size)) | ||
347 | return false; | ||
348 | |||
349 | return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); | ||
350 | } | ||
351 | |||
352 | static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, | ||
353 | int index, | ||
354 | unsigned long start, | ||
355 | unsigned long end) | ||
356 | { | ||
357 | struct vhost_uaddr *uaddr = &vq->uaddrs[index]; | ||
358 | struct vhost_map *map; | ||
359 | int i; | ||
360 | |||
361 | if (!vhost_map_range_overlap(uaddr, start, end)) | ||
362 | return; | ||
363 | |||
364 | spin_lock(&vq->mmu_lock); | ||
365 | ++vq->invalidate_count; | ||
366 | |||
367 | map = rcu_dereference_protected(vq->maps[index], | ||
368 | lockdep_is_held(&vq->mmu_lock)); | ||
369 | if (map) { | ||
370 | if (uaddr->write) { | ||
371 | for (i = 0; i < map->npages; i++) | ||
372 | set_page_dirty(map->pages[i]); | ||
373 | } | ||
374 | rcu_assign_pointer(vq->maps[index], NULL); | ||
375 | } | ||
376 | spin_unlock(&vq->mmu_lock); | ||
377 | |||
378 | if (map) { | ||
379 | synchronize_rcu(); | ||
380 | vhost_map_unprefetch(map); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq, | ||
385 | int index, | ||
386 | unsigned long start, | ||
387 | unsigned long end) | ||
388 | { | ||
389 | if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end)) | ||
390 | return; | ||
391 | |||
392 | spin_lock(&vq->mmu_lock); | ||
393 | --vq->invalidate_count; | ||
394 | spin_unlock(&vq->mmu_lock); | ||
395 | } | ||
396 | |||
397 | static int vhost_invalidate_range_start(struct mmu_notifier *mn, | ||
398 | const struct mmu_notifier_range *range) | ||
399 | { | ||
400 | struct vhost_dev *dev = container_of(mn, struct vhost_dev, | ||
401 | mmu_notifier); | ||
402 | int i, j; | ||
403 | |||
404 | if (!mmu_notifier_range_blockable(range)) | ||
405 | return -EAGAIN; | ||
406 | |||
407 | for (i = 0; i < dev->nvqs; i++) { | ||
408 | struct vhost_virtqueue *vq = dev->vqs[i]; | ||
409 | |||
410 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
411 | vhost_invalidate_vq_start(vq, j, | ||
412 | range->start, | ||
413 | range->end); | ||
414 | } | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | static void vhost_invalidate_range_end(struct mmu_notifier *mn, | ||
420 | const struct mmu_notifier_range *range) | ||
421 | { | ||
422 | struct vhost_dev *dev = container_of(mn, struct vhost_dev, | ||
423 | mmu_notifier); | ||
424 | int i, j; | ||
425 | |||
426 | for (i = 0; i < dev->nvqs; i++) { | ||
427 | struct vhost_virtqueue *vq = dev->vqs[i]; | ||
428 | |||
429 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
430 | vhost_invalidate_vq_end(vq, j, | ||
431 | range->start, | ||
432 | range->end); | ||
433 | } | ||
434 | } | ||
435 | |||
436 | static const struct mmu_notifier_ops vhost_mmu_notifier_ops = { | ||
437 | .invalidate_range_start = vhost_invalidate_range_start, | ||
438 | .invalidate_range_end = vhost_invalidate_range_end, | ||
439 | }; | ||
440 | |||
441 | static void vhost_init_maps(struct vhost_dev *dev) | ||
442 | { | ||
443 | struct vhost_virtqueue *vq; | ||
444 | int i, j; | ||
445 | |||
446 | dev->mmu_notifier.ops = &vhost_mmu_notifier_ops; | ||
447 | |||
448 | for (i = 0; i < dev->nvqs; ++i) { | ||
449 | vq = dev->vqs[i]; | ||
450 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
451 | RCU_INIT_POINTER(vq->maps[j], NULL); | ||
452 | } | ||
453 | } | ||
454 | #endif | ||
455 | |||
302 | static void vhost_vq_reset(struct vhost_dev *dev, | 456 | static void vhost_vq_reset(struct vhost_dev *dev, |
303 | struct vhost_virtqueue *vq) | 457 | struct vhost_virtqueue *vq) |
304 | { | 458 | { |
@@ -327,7 +481,11 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
327 | vq->busyloop_timeout = 0; | 481 | vq->busyloop_timeout = 0; |
328 | vq->umem = NULL; | 482 | vq->umem = NULL; |
329 | vq->iotlb = NULL; | 483 | vq->iotlb = NULL; |
484 | vq->invalidate_count = 0; | ||
330 | __vhost_vq_meta_reset(vq); | 485 | __vhost_vq_meta_reset(vq); |
486 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
487 | vhost_reset_vq_maps(vq); | ||
488 | #endif | ||
331 | } | 489 | } |
332 | 490 | ||
333 | static int vhost_worker(void *data) | 491 | static int vhost_worker(void *data) |
@@ -477,7 +635,9 @@ void vhost_dev_init(struct vhost_dev *dev, | |||
477 | INIT_LIST_HEAD(&dev->read_list); | 635 | INIT_LIST_HEAD(&dev->read_list); |
478 | INIT_LIST_HEAD(&dev->pending_list); | 636 | INIT_LIST_HEAD(&dev->pending_list); |
479 | spin_lock_init(&dev->iotlb_lock); | 637 | spin_lock_init(&dev->iotlb_lock); |
480 | 638 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | |
639 | vhost_init_maps(dev); | ||
640 | #endif | ||
481 | 641 | ||
482 | for (i = 0; i < dev->nvqs; ++i) { | 642 | for (i = 0; i < dev->nvqs; ++i) { |
483 | vq = dev->vqs[i]; | 643 | vq = dev->vqs[i]; |
@@ -486,6 +646,7 @@ void vhost_dev_init(struct vhost_dev *dev, | |||
486 | vq->heads = NULL; | 646 | vq->heads = NULL; |
487 | vq->dev = dev; | 647 | vq->dev = dev; |
488 | mutex_init(&vq->mutex); | 648 | mutex_init(&vq->mutex); |
649 | spin_lock_init(&vq->mmu_lock); | ||
489 | vhost_vq_reset(dev, vq); | 650 | vhost_vq_reset(dev, vq); |
490 | if (vq->handle_kick) | 651 | if (vq->handle_kick) |
491 | vhost_poll_init(&vq->poll, vq->handle_kick, | 652 | vhost_poll_init(&vq->poll, vq->handle_kick, |
@@ -565,7 +726,18 @@ long vhost_dev_set_owner(struct vhost_dev *dev) | |||
565 | if (err) | 726 | if (err) |
566 | goto err_cgroup; | 727 | goto err_cgroup; |
567 | 728 | ||
729 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
730 | err = mmu_notifier_register(&dev->mmu_notifier, dev->mm); | ||
731 | if (err) | ||
732 | goto err_mmu_notifier; | ||
733 | #endif | ||
734 | |||
568 | return 0; | 735 | return 0; |
736 | |||
737 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
738 | err_mmu_notifier: | ||
739 | vhost_dev_free_iovecs(dev); | ||
740 | #endif | ||
569 | err_cgroup: | 741 | err_cgroup: |
570 | kthread_stop(worker); | 742 | kthread_stop(worker); |
571 | dev->worker = NULL; | 743 | dev->worker = NULL; |
@@ -656,6 +828,107 @@ static void vhost_clear_msg(struct vhost_dev *dev) | |||
656 | spin_unlock(&dev->iotlb_lock); | 828 | spin_unlock(&dev->iotlb_lock); |
657 | } | 829 | } |
658 | 830 | ||
831 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
832 | static void vhost_setup_uaddr(struct vhost_virtqueue *vq, | ||
833 | int index, unsigned long uaddr, | ||
834 | size_t size, bool write) | ||
835 | { | ||
836 | struct vhost_uaddr *addr = &vq->uaddrs[index]; | ||
837 | |||
838 | addr->uaddr = uaddr; | ||
839 | addr->size = size; | ||
840 | addr->write = write; | ||
841 | } | ||
842 | |||
843 | static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq) | ||
844 | { | ||
845 | vhost_setup_uaddr(vq, VHOST_ADDR_DESC, | ||
846 | (unsigned long)vq->desc, | ||
847 | vhost_get_desc_size(vq, vq->num), | ||
848 | false); | ||
849 | vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL, | ||
850 | (unsigned long)vq->avail, | ||
851 | vhost_get_avail_size(vq, vq->num), | ||
852 | false); | ||
853 | vhost_setup_uaddr(vq, VHOST_ADDR_USED, | ||
854 | (unsigned long)vq->used, | ||
855 | vhost_get_used_size(vq, vq->num), | ||
856 | true); | ||
857 | } | ||
858 | |||
859 | static int vhost_map_prefetch(struct vhost_virtqueue *vq, | ||
860 | int index) | ||
861 | { | ||
862 | struct vhost_map *map; | ||
863 | struct vhost_uaddr *uaddr = &vq->uaddrs[index]; | ||
864 | struct page **pages; | ||
865 | int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE); | ||
866 | int npinned; | ||
867 | void *vaddr, *v; | ||
868 | int err; | ||
869 | int i; | ||
870 | |||
871 | spin_lock(&vq->mmu_lock); | ||
872 | |||
873 | err = -EFAULT; | ||
874 | if (vq->invalidate_count) | ||
875 | goto err; | ||
876 | |||
877 | err = -ENOMEM; | ||
878 | map = kmalloc(sizeof(*map), GFP_ATOMIC); | ||
879 | if (!map) | ||
880 | goto err; | ||
881 | |||
882 | pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC); | ||
883 | if (!pages) | ||
884 | goto err_pages; | ||
885 | |||
886 | err = EFAULT; | ||
887 | npinned = __get_user_pages_fast(uaddr->uaddr, npages, | ||
888 | uaddr->write, pages); | ||
889 | if (npinned > 0) | ||
890 | release_pages(pages, npinned); | ||
891 | if (npinned != npages) | ||
892 | goto err_gup; | ||
893 | |||
894 | for (i = 0; i < npinned; i++) | ||
895 | if (PageHighMem(pages[i])) | ||
896 | goto err_gup; | ||
897 | |||
898 | vaddr = v = page_address(pages[0]); | ||
899 | |||
900 | /* For simplicity, fallback to userspace address if VA is not | ||
901 | * contigious. | ||
902 | */ | ||
903 | for (i = 1; i < npinned; i++) { | ||
904 | v += PAGE_SIZE; | ||
905 | if (v != page_address(pages[i])) | ||
906 | goto err_gup; | ||
907 | } | ||
908 | |||
909 | map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1)); | ||
910 | map->npages = npages; | ||
911 | map->pages = pages; | ||
912 | |||
913 | rcu_assign_pointer(vq->maps[index], map); | ||
914 | /* No need for a synchronize_rcu(). This function should be | ||
915 | * called by dev->worker so we are serialized with all | ||
916 | * readers. | ||
917 | */ | ||
918 | spin_unlock(&vq->mmu_lock); | ||
919 | |||
920 | return 0; | ||
921 | |||
922 | err_gup: | ||
923 | kfree(pages); | ||
924 | err_pages: | ||
925 | kfree(map); | ||
926 | err: | ||
927 | spin_unlock(&vq->mmu_lock); | ||
928 | return err; | ||
929 | } | ||
930 | #endif | ||
931 | |||
659 | void vhost_dev_cleanup(struct vhost_dev *dev) | 932 | void vhost_dev_cleanup(struct vhost_dev *dev) |
660 | { | 933 | { |
661 | int i; | 934 | int i; |
@@ -685,8 +958,16 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
685 | kthread_stop(dev->worker); | 958 | kthread_stop(dev->worker); |
686 | dev->worker = NULL; | 959 | dev->worker = NULL; |
687 | } | 960 | } |
688 | if (dev->mm) | 961 | if (dev->mm) { |
962 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
963 | mmu_notifier_unregister(&dev->mmu_notifier, dev->mm); | ||
964 | #endif | ||
689 | mmput(dev->mm); | 965 | mmput(dev->mm); |
966 | } | ||
967 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
968 | for (i = 0; i < dev->nvqs; i++) | ||
969 | vhost_uninit_vq_maps(dev->vqs[i]); | ||
970 | #endif | ||
690 | dev->mm = NULL; | 971 | dev->mm = NULL; |
691 | } | 972 | } |
692 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); | 973 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); |
@@ -915,6 +1196,26 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, | |||
915 | 1196 | ||
916 | static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) | 1197 | static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) |
917 | { | 1198 | { |
1199 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1200 | struct vhost_map *map; | ||
1201 | struct vring_used *used; | ||
1202 | |||
1203 | if (!vq->iotlb) { | ||
1204 | rcu_read_lock(); | ||
1205 | |||
1206 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1207 | if (likely(map)) { | ||
1208 | used = map->addr; | ||
1209 | *((__virtio16 *)&used->ring[vq->num]) = | ||
1210 | cpu_to_vhost16(vq, vq->avail_idx); | ||
1211 | rcu_read_unlock(); | ||
1212 | return 0; | ||
1213 | } | ||
1214 | |||
1215 | rcu_read_unlock(); | ||
1216 | } | ||
1217 | #endif | ||
1218 | |||
918 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), | 1219 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), |
919 | vhost_avail_event(vq)); | 1220 | vhost_avail_event(vq)); |
920 | } | 1221 | } |
@@ -923,6 +1224,27 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, | |||
923 | struct vring_used_elem *head, int idx, | 1224 | struct vring_used_elem *head, int idx, |
924 | int count) | 1225 | int count) |
925 | { | 1226 | { |
1227 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1228 | struct vhost_map *map; | ||
1229 | struct vring_used *used; | ||
1230 | size_t size; | ||
1231 | |||
1232 | if (!vq->iotlb) { | ||
1233 | rcu_read_lock(); | ||
1234 | |||
1235 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1236 | if (likely(map)) { | ||
1237 | used = map->addr; | ||
1238 | size = count * sizeof(*head); | ||
1239 | memcpy(used->ring + idx, head, size); | ||
1240 | rcu_read_unlock(); | ||
1241 | return 0; | ||
1242 | } | ||
1243 | |||
1244 | rcu_read_unlock(); | ||
1245 | } | ||
1246 | #endif | ||
1247 | |||
926 | return vhost_copy_to_user(vq, vq->used->ring + idx, head, | 1248 | return vhost_copy_to_user(vq, vq->used->ring + idx, head, |
927 | count * sizeof(*head)); | 1249 | count * sizeof(*head)); |
928 | } | 1250 | } |
@@ -930,6 +1252,25 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, | |||
930 | static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) | 1252 | static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) |
931 | 1253 | ||
932 | { | 1254 | { |
1255 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1256 | struct vhost_map *map; | ||
1257 | struct vring_used *used; | ||
1258 | |||
1259 | if (!vq->iotlb) { | ||
1260 | rcu_read_lock(); | ||
1261 | |||
1262 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1263 | if (likely(map)) { | ||
1264 | used = map->addr; | ||
1265 | used->flags = cpu_to_vhost16(vq, vq->used_flags); | ||
1266 | rcu_read_unlock(); | ||
1267 | return 0; | ||
1268 | } | ||
1269 | |||
1270 | rcu_read_unlock(); | ||
1271 | } | ||
1272 | #endif | ||
1273 | |||
933 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), | 1274 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), |
934 | &vq->used->flags); | 1275 | &vq->used->flags); |
935 | } | 1276 | } |
@@ -937,6 +1278,25 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) | |||
937 | static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) | 1278 | static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) |
938 | 1279 | ||
939 | { | 1280 | { |
1281 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1282 | struct vhost_map *map; | ||
1283 | struct vring_used *used; | ||
1284 | |||
1285 | if (!vq->iotlb) { | ||
1286 | rcu_read_lock(); | ||
1287 | |||
1288 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1289 | if (likely(map)) { | ||
1290 | used = map->addr; | ||
1291 | used->idx = cpu_to_vhost16(vq, vq->last_used_idx); | ||
1292 | rcu_read_unlock(); | ||
1293 | return 0; | ||
1294 | } | ||
1295 | |||
1296 | rcu_read_unlock(); | ||
1297 | } | ||
1298 | #endif | ||
1299 | |||
940 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), | 1300 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), |
941 | &vq->used->idx); | 1301 | &vq->used->idx); |
942 | } | 1302 | } |
@@ -982,12 +1342,50 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d) | |||
982 | static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, | 1342 | static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, |
983 | __virtio16 *idx) | 1343 | __virtio16 *idx) |
984 | { | 1344 | { |
1345 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1346 | struct vhost_map *map; | ||
1347 | struct vring_avail *avail; | ||
1348 | |||
1349 | if (!vq->iotlb) { | ||
1350 | rcu_read_lock(); | ||
1351 | |||
1352 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1353 | if (likely(map)) { | ||
1354 | avail = map->addr; | ||
1355 | *idx = avail->idx; | ||
1356 | rcu_read_unlock(); | ||
1357 | return 0; | ||
1358 | } | ||
1359 | |||
1360 | rcu_read_unlock(); | ||
1361 | } | ||
1362 | #endif | ||
1363 | |||
985 | return vhost_get_avail(vq, *idx, &vq->avail->idx); | 1364 | return vhost_get_avail(vq, *idx, &vq->avail->idx); |
986 | } | 1365 | } |
987 | 1366 | ||
988 | static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, | 1367 | static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, |
989 | __virtio16 *head, int idx) | 1368 | __virtio16 *head, int idx) |
990 | { | 1369 | { |
1370 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1371 | struct vhost_map *map; | ||
1372 | struct vring_avail *avail; | ||
1373 | |||
1374 | if (!vq->iotlb) { | ||
1375 | rcu_read_lock(); | ||
1376 | |||
1377 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1378 | if (likely(map)) { | ||
1379 | avail = map->addr; | ||
1380 | *head = avail->ring[idx & (vq->num - 1)]; | ||
1381 | rcu_read_unlock(); | ||
1382 | return 0; | ||
1383 | } | ||
1384 | |||
1385 | rcu_read_unlock(); | ||
1386 | } | ||
1387 | #endif | ||
1388 | |||
991 | return vhost_get_avail(vq, *head, | 1389 | return vhost_get_avail(vq, *head, |
992 | &vq->avail->ring[idx & (vq->num - 1)]); | 1390 | &vq->avail->ring[idx & (vq->num - 1)]); |
993 | } | 1391 | } |
@@ -995,24 +1393,98 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, | |||
995 | static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, | 1393 | static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, |
996 | __virtio16 *flags) | 1394 | __virtio16 *flags) |
997 | { | 1395 | { |
1396 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1397 | struct vhost_map *map; | ||
1398 | struct vring_avail *avail; | ||
1399 | |||
1400 | if (!vq->iotlb) { | ||
1401 | rcu_read_lock(); | ||
1402 | |||
1403 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1404 | if (likely(map)) { | ||
1405 | avail = map->addr; | ||
1406 | *flags = avail->flags; | ||
1407 | rcu_read_unlock(); | ||
1408 | return 0; | ||
1409 | } | ||
1410 | |||
1411 | rcu_read_unlock(); | ||
1412 | } | ||
1413 | #endif | ||
1414 | |||
998 | return vhost_get_avail(vq, *flags, &vq->avail->flags); | 1415 | return vhost_get_avail(vq, *flags, &vq->avail->flags); |
999 | } | 1416 | } |
1000 | 1417 | ||
1001 | static inline int vhost_get_used_event(struct vhost_virtqueue *vq, | 1418 | static inline int vhost_get_used_event(struct vhost_virtqueue *vq, |
1002 | __virtio16 *event) | 1419 | __virtio16 *event) |
1003 | { | 1420 | { |
1421 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1422 | struct vhost_map *map; | ||
1423 | struct vring_avail *avail; | ||
1424 | |||
1425 | if (!vq->iotlb) { | ||
1426 | rcu_read_lock(); | ||
1427 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1428 | if (likely(map)) { | ||
1429 | avail = map->addr; | ||
1430 | *event = (__virtio16)avail->ring[vq->num]; | ||
1431 | rcu_read_unlock(); | ||
1432 | return 0; | ||
1433 | } | ||
1434 | rcu_read_unlock(); | ||
1435 | } | ||
1436 | #endif | ||
1437 | |||
1004 | return vhost_get_avail(vq, *event, vhost_used_event(vq)); | 1438 | return vhost_get_avail(vq, *event, vhost_used_event(vq)); |
1005 | } | 1439 | } |
1006 | 1440 | ||
1007 | static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, | 1441 | static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, |
1008 | __virtio16 *idx) | 1442 | __virtio16 *idx) |
1009 | { | 1443 | { |
1444 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1445 | struct vhost_map *map; | ||
1446 | struct vring_used *used; | ||
1447 | |||
1448 | if (!vq->iotlb) { | ||
1449 | rcu_read_lock(); | ||
1450 | |||
1451 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1452 | if (likely(map)) { | ||
1453 | used = map->addr; | ||
1454 | *idx = used->idx; | ||
1455 | rcu_read_unlock(); | ||
1456 | return 0; | ||
1457 | } | ||
1458 | |||
1459 | rcu_read_unlock(); | ||
1460 | } | ||
1461 | #endif | ||
1462 | |||
1010 | return vhost_get_used(vq, *idx, &vq->used->idx); | 1463 | return vhost_get_used(vq, *idx, &vq->used->idx); |
1011 | } | 1464 | } |
1012 | 1465 | ||
1013 | static inline int vhost_get_desc(struct vhost_virtqueue *vq, | 1466 | static inline int vhost_get_desc(struct vhost_virtqueue *vq, |
1014 | struct vring_desc *desc, int idx) | 1467 | struct vring_desc *desc, int idx) |
1015 | { | 1468 | { |
1469 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1470 | struct vhost_map *map; | ||
1471 | struct vring_desc *d; | ||
1472 | |||
1473 | if (!vq->iotlb) { | ||
1474 | rcu_read_lock(); | ||
1475 | |||
1476 | map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]); | ||
1477 | if (likely(map)) { | ||
1478 | d = map->addr; | ||
1479 | *desc = *(d + idx); | ||
1480 | rcu_read_unlock(); | ||
1481 | return 0; | ||
1482 | } | ||
1483 | |||
1484 | rcu_read_unlock(); | ||
1485 | } | ||
1486 | #endif | ||
1487 | |||
1016 | return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); | 1488 | return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); |
1017 | } | 1489 | } |
1018 | 1490 | ||
@@ -1353,12 +1825,32 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq, | |||
1353 | return true; | 1825 | return true; |
1354 | } | 1826 | } |
1355 | 1827 | ||
1828 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1829 | static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq) | ||
1830 | { | ||
1831 | struct vhost_map __rcu *map; | ||
1832 | int i; | ||
1833 | |||
1834 | for (i = 0; i < VHOST_NUM_ADDRS; i++) { | ||
1835 | rcu_read_lock(); | ||
1836 | map = rcu_dereference(vq->maps[i]); | ||
1837 | rcu_read_unlock(); | ||
1838 | if (unlikely(!map)) | ||
1839 | vhost_map_prefetch(vq, i); | ||
1840 | } | ||
1841 | } | ||
1842 | #endif | ||
1843 | |||
1356 | int vq_meta_prefetch(struct vhost_virtqueue *vq) | 1844 | int vq_meta_prefetch(struct vhost_virtqueue *vq) |
1357 | { | 1845 | { |
1358 | unsigned int num = vq->num; | 1846 | unsigned int num = vq->num; |
1359 | 1847 | ||
1360 | if (!vq->iotlb) | 1848 | if (!vq->iotlb) { |
1849 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1850 | vhost_vq_map_prefetch(vq); | ||
1851 | #endif | ||
1361 | return 1; | 1852 | return 1; |
1853 | } | ||
1362 | 1854 | ||
1363 | return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, | 1855 | return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, |
1364 | vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && | 1856 | vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && |
@@ -1569,6 +2061,16 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, | |||
1569 | 2061 | ||
1570 | mutex_lock(&vq->mutex); | 2062 | mutex_lock(&vq->mutex); |
1571 | 2063 | ||
2064 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
2065 | /* Unregister MMU notifer to allow invalidation callback | ||
2066 | * can access vq->uaddrs[] without holding a lock. | ||
2067 | */ | ||
2068 | if (d->mm) | ||
2069 | mmu_notifier_unregister(&d->mmu_notifier, d->mm); | ||
2070 | |||
2071 | vhost_uninit_vq_maps(vq); | ||
2072 | #endif | ||
2073 | |||
1572 | switch (ioctl) { | 2074 | switch (ioctl) { |
1573 | case VHOST_SET_VRING_NUM: | 2075 | case VHOST_SET_VRING_NUM: |
1574 | r = vhost_vring_set_num(d, vq, argp); | 2076 | r = vhost_vring_set_num(d, vq, argp); |
@@ -1580,6 +2082,13 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, | |||
1580 | BUG(); | 2082 | BUG(); |
1581 | } | 2083 | } |
1582 | 2084 | ||
2085 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
2086 | vhost_setup_vq_uaddr(vq); | ||
2087 | |||
2088 | if (d->mm) | ||
2089 | mmu_notifier_register(&d->mmu_notifier, d->mm); | ||
2090 | #endif | ||
2091 | |||
1583 | mutex_unlock(&vq->mutex); | 2092 | mutex_unlock(&vq->mutex); |
1584 | 2093 | ||
1585 | return r; | 2094 | return r; |
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index e9ed2722b633..c5d950cf7627 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
@@ -12,6 +12,9 @@ | |||
12 | #include <linux/virtio_config.h> | 12 | #include <linux/virtio_config.h> |
13 | #include <linux/virtio_ring.h> | 13 | #include <linux/virtio_ring.h> |
14 | #include <linux/atomic.h> | 14 | #include <linux/atomic.h> |
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/mmu_notifier.h> | ||
17 | #include <asm/cacheflush.h> | ||
15 | 18 | ||
16 | struct vhost_work; | 19 | struct vhost_work; |
17 | typedef void (*vhost_work_fn_t)(struct vhost_work *work); | 20 | typedef void (*vhost_work_fn_t)(struct vhost_work *work); |
@@ -80,6 +83,21 @@ enum vhost_uaddr_type { | |||
80 | VHOST_NUM_ADDRS = 3, | 83 | VHOST_NUM_ADDRS = 3, |
81 | }; | 84 | }; |
82 | 85 | ||
86 | struct vhost_map { | ||
87 | int npages; | ||
88 | void *addr; | ||
89 | struct page **pages; | ||
90 | }; | ||
91 | |||
92 | struct vhost_uaddr { | ||
93 | unsigned long uaddr; | ||
94 | size_t size; | ||
95 | bool write; | ||
96 | }; | ||
97 | |||
98 | #define VHOST_ARCH_CAN_ACCEL_UACCESS defined(CONFIG_MMU_NOTIFIER) && \ | ||
99 | ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0 | ||
100 | |||
83 | /* The virtqueue structure describes a queue attached to a device. */ | 101 | /* The virtqueue structure describes a queue attached to a device. */ |
84 | struct vhost_virtqueue { | 102 | struct vhost_virtqueue { |
85 | struct vhost_dev *dev; | 103 | struct vhost_dev *dev; |
@@ -90,7 +108,22 @@ struct vhost_virtqueue { | |||
90 | struct vring_desc __user *desc; | 108 | struct vring_desc __user *desc; |
91 | struct vring_avail __user *avail; | 109 | struct vring_avail __user *avail; |
92 | struct vring_used __user *used; | 110 | struct vring_used __user *used; |
111 | |||
112 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
113 | /* Read by memory accessors, modified by meta data | ||
114 | * prefetching, MMU notifier and vring ioctl(). | ||
115 | * Synchonrized through mmu_lock (writers) and RCU (writers | ||
116 | * and readers). | ||
117 | */ | ||
118 | struct vhost_map __rcu *maps[VHOST_NUM_ADDRS]; | ||
119 | /* Read by MMU notifier, modified by vring ioctl(), | ||
120 | * synchronized through MMU notifier | ||
121 | * registering/unregistering. | ||
122 | */ | ||
123 | struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS]; | ||
124 | #endif | ||
93 | const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; | 125 | const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; |
126 | |||
94 | struct file *kick; | 127 | struct file *kick; |
95 | struct eventfd_ctx *call_ctx; | 128 | struct eventfd_ctx *call_ctx; |
96 | struct eventfd_ctx *error_ctx; | 129 | struct eventfd_ctx *error_ctx; |
@@ -145,6 +178,8 @@ struct vhost_virtqueue { | |||
145 | bool user_be; | 178 | bool user_be; |
146 | #endif | 179 | #endif |
147 | u32 busyloop_timeout; | 180 | u32 busyloop_timeout; |
181 | spinlock_t mmu_lock; | ||
182 | int invalidate_count; | ||
148 | }; | 183 | }; |
149 | 184 | ||
150 | struct vhost_msg_node { | 185 | struct vhost_msg_node { |
@@ -158,6 +193,9 @@ struct vhost_msg_node { | |||
158 | 193 | ||
159 | struct vhost_dev { | 194 | struct vhost_dev { |
160 | struct mm_struct *mm; | 195 | struct mm_struct *mm; |
196 | #ifdef CONFIG_MMU_NOTIFIER | ||
197 | struct mmu_notifier mmu_notifier; | ||
198 | #endif | ||
161 | struct mutex mutex; | 199 | struct mutex mutex; |
162 | struct vhost_virtqueue **vqs; | 200 | struct vhost_virtqueue **vqs; |
163 | int nvqs; | 201 | int nvqs; |