diff options
author | Michael S. Tsirkin <mst@redhat.com> | 2019-08-10 13:53:21 -0400 |
---|---|---|
committer | Michael S. Tsirkin <mst@redhat.com> | 2019-09-04 07:39:48 -0400 |
commit | 3d2c7d37047557175fb41de044091050b5f0d73b (patch) | |
tree | 18f8965b2d8d63415346af400d4579353a8aaa70 | |
parent | 896fc242bc1d261c1178838487a0a54b260625cc (diff) |
Revert "vhost: access vq metadata through kernel virtual address"
This reverts commit 7f466032dc ("vhost: access vq metadata through
kernel virtual address"). The commit caused a bunch of issues, and
while commit 73f628ec9e ("vhost: disable metadata prefetch
optimization") disabled the optimization it's not nice to keep lots of
dead code around.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
-rw-r--r-- | drivers/vhost/vhost.c | 515 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 41 |
2 files changed, 3 insertions, 553 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1ac9de250319..5dc174ac8cac 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -297,160 +297,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) | |||
297 | __vhost_vq_meta_reset(d->vqs[i]); | 297 | __vhost_vq_meta_reset(d->vqs[i]); |
298 | } | 298 | } |
299 | 299 | ||
300 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
301 | static void vhost_map_unprefetch(struct vhost_map *map) | ||
302 | { | ||
303 | kfree(map->pages); | ||
304 | map->pages = NULL; | ||
305 | map->npages = 0; | ||
306 | map->addr = NULL; | ||
307 | } | ||
308 | |||
309 | static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) | ||
310 | { | ||
311 | struct vhost_map *map[VHOST_NUM_ADDRS]; | ||
312 | int i; | ||
313 | |||
314 | spin_lock(&vq->mmu_lock); | ||
315 | for (i = 0; i < VHOST_NUM_ADDRS; i++) { | ||
316 | map[i] = rcu_dereference_protected(vq->maps[i], | ||
317 | lockdep_is_held(&vq->mmu_lock)); | ||
318 | if (map[i]) | ||
319 | rcu_assign_pointer(vq->maps[i], NULL); | ||
320 | } | ||
321 | spin_unlock(&vq->mmu_lock); | ||
322 | |||
323 | synchronize_rcu(); | ||
324 | |||
325 | for (i = 0; i < VHOST_NUM_ADDRS; i++) | ||
326 | if (map[i]) | ||
327 | vhost_map_unprefetch(map[i]); | ||
328 | |||
329 | } | ||
330 | |||
331 | static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) | ||
332 | { | ||
333 | int i; | ||
334 | |||
335 | vhost_uninit_vq_maps(vq); | ||
336 | for (i = 0; i < VHOST_NUM_ADDRS; i++) | ||
337 | vq->uaddrs[i].size = 0; | ||
338 | } | ||
339 | |||
340 | static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, | ||
341 | unsigned long start, | ||
342 | unsigned long end) | ||
343 | { | ||
344 | if (unlikely(!uaddr->size)) | ||
345 | return false; | ||
346 | |||
347 | return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); | ||
348 | } | ||
349 | |||
350 | static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, | ||
351 | int index, | ||
352 | unsigned long start, | ||
353 | unsigned long end) | ||
354 | { | ||
355 | struct vhost_uaddr *uaddr = &vq->uaddrs[index]; | ||
356 | struct vhost_map *map; | ||
357 | int i; | ||
358 | |||
359 | if (!vhost_map_range_overlap(uaddr, start, end)) | ||
360 | return; | ||
361 | |||
362 | spin_lock(&vq->mmu_lock); | ||
363 | ++vq->invalidate_count; | ||
364 | |||
365 | map = rcu_dereference_protected(vq->maps[index], | ||
366 | lockdep_is_held(&vq->mmu_lock)); | ||
367 | if (map) { | ||
368 | if (uaddr->write) { | ||
369 | for (i = 0; i < map->npages; i++) | ||
370 | set_page_dirty(map->pages[i]); | ||
371 | } | ||
372 | rcu_assign_pointer(vq->maps[index], NULL); | ||
373 | } | ||
374 | spin_unlock(&vq->mmu_lock); | ||
375 | |||
376 | if (map) { | ||
377 | synchronize_rcu(); | ||
378 | vhost_map_unprefetch(map); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq, | ||
383 | int index, | ||
384 | unsigned long start, | ||
385 | unsigned long end) | ||
386 | { | ||
387 | if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end)) | ||
388 | return; | ||
389 | |||
390 | spin_lock(&vq->mmu_lock); | ||
391 | --vq->invalidate_count; | ||
392 | spin_unlock(&vq->mmu_lock); | ||
393 | } | ||
394 | |||
395 | static int vhost_invalidate_range_start(struct mmu_notifier *mn, | ||
396 | const struct mmu_notifier_range *range) | ||
397 | { | ||
398 | struct vhost_dev *dev = container_of(mn, struct vhost_dev, | ||
399 | mmu_notifier); | ||
400 | int i, j; | ||
401 | |||
402 | if (!mmu_notifier_range_blockable(range)) | ||
403 | return -EAGAIN; | ||
404 | |||
405 | for (i = 0; i < dev->nvqs; i++) { | ||
406 | struct vhost_virtqueue *vq = dev->vqs[i]; | ||
407 | |||
408 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
409 | vhost_invalidate_vq_start(vq, j, | ||
410 | range->start, | ||
411 | range->end); | ||
412 | } | ||
413 | |||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | static void vhost_invalidate_range_end(struct mmu_notifier *mn, | ||
418 | const struct mmu_notifier_range *range) | ||
419 | { | ||
420 | struct vhost_dev *dev = container_of(mn, struct vhost_dev, | ||
421 | mmu_notifier); | ||
422 | int i, j; | ||
423 | |||
424 | for (i = 0; i < dev->nvqs; i++) { | ||
425 | struct vhost_virtqueue *vq = dev->vqs[i]; | ||
426 | |||
427 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
428 | vhost_invalidate_vq_end(vq, j, | ||
429 | range->start, | ||
430 | range->end); | ||
431 | } | ||
432 | } | ||
433 | |||
434 | static const struct mmu_notifier_ops vhost_mmu_notifier_ops = { | ||
435 | .invalidate_range_start = vhost_invalidate_range_start, | ||
436 | .invalidate_range_end = vhost_invalidate_range_end, | ||
437 | }; | ||
438 | |||
439 | static void vhost_init_maps(struct vhost_dev *dev) | ||
440 | { | ||
441 | struct vhost_virtqueue *vq; | ||
442 | int i, j; | ||
443 | |||
444 | dev->mmu_notifier.ops = &vhost_mmu_notifier_ops; | ||
445 | |||
446 | for (i = 0; i < dev->nvqs; ++i) { | ||
447 | vq = dev->vqs[i]; | ||
448 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
449 | RCU_INIT_POINTER(vq->maps[j], NULL); | ||
450 | } | ||
451 | } | ||
452 | #endif | ||
453 | |||
454 | static void vhost_vq_reset(struct vhost_dev *dev, | 300 | static void vhost_vq_reset(struct vhost_dev *dev, |
455 | struct vhost_virtqueue *vq) | 301 | struct vhost_virtqueue *vq) |
456 | { | 302 | { |
@@ -479,11 +325,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
479 | vq->busyloop_timeout = 0; | 325 | vq->busyloop_timeout = 0; |
480 | vq->umem = NULL; | 326 | vq->umem = NULL; |
481 | vq->iotlb = NULL; | 327 | vq->iotlb = NULL; |
482 | vq->invalidate_count = 0; | ||
483 | __vhost_vq_meta_reset(vq); | 328 | __vhost_vq_meta_reset(vq); |
484 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
485 | vhost_reset_vq_maps(vq); | ||
486 | #endif | ||
487 | } | 329 | } |
488 | 330 | ||
489 | static int vhost_worker(void *data) | 331 | static int vhost_worker(void *data) |
@@ -633,9 +475,7 @@ void vhost_dev_init(struct vhost_dev *dev, | |||
633 | INIT_LIST_HEAD(&dev->read_list); | 475 | INIT_LIST_HEAD(&dev->read_list); |
634 | INIT_LIST_HEAD(&dev->pending_list); | 476 | INIT_LIST_HEAD(&dev->pending_list); |
635 | spin_lock_init(&dev->iotlb_lock); | 477 | spin_lock_init(&dev->iotlb_lock); |
636 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | 478 | |
637 | vhost_init_maps(dev); | ||
638 | #endif | ||
639 | 479 | ||
640 | for (i = 0; i < dev->nvqs; ++i) { | 480 | for (i = 0; i < dev->nvqs; ++i) { |
641 | vq = dev->vqs[i]; | 481 | vq = dev->vqs[i]; |
@@ -644,7 +484,6 @@ void vhost_dev_init(struct vhost_dev *dev, | |||
644 | vq->heads = NULL; | 484 | vq->heads = NULL; |
645 | vq->dev = dev; | 485 | vq->dev = dev; |
646 | mutex_init(&vq->mutex); | 486 | mutex_init(&vq->mutex); |
647 | spin_lock_init(&vq->mmu_lock); | ||
648 | vhost_vq_reset(dev, vq); | 487 | vhost_vq_reset(dev, vq); |
649 | if (vq->handle_kick) | 488 | if (vq->handle_kick) |
650 | vhost_poll_init(&vq->poll, vq->handle_kick, | 489 | vhost_poll_init(&vq->poll, vq->handle_kick, |
@@ -724,18 +563,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) | |||
724 | if (err) | 563 | if (err) |
725 | goto err_cgroup; | 564 | goto err_cgroup; |
726 | 565 | ||
727 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
728 | err = mmu_notifier_register(&dev->mmu_notifier, dev->mm); | ||
729 | if (err) | ||
730 | goto err_mmu_notifier; | ||
731 | #endif | ||
732 | |||
733 | return 0; | 566 | return 0; |
734 | |||
735 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
736 | err_mmu_notifier: | ||
737 | vhost_dev_free_iovecs(dev); | ||
738 | #endif | ||
739 | err_cgroup: | 567 | err_cgroup: |
740 | kthread_stop(worker); | 568 | kthread_stop(worker); |
741 | dev->worker = NULL; | 569 | dev->worker = NULL; |
@@ -826,107 +654,6 @@ static void vhost_clear_msg(struct vhost_dev *dev) | |||
826 | spin_unlock(&dev->iotlb_lock); | 654 | spin_unlock(&dev->iotlb_lock); |
827 | } | 655 | } |
828 | 656 | ||
829 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
830 | static void vhost_setup_uaddr(struct vhost_virtqueue *vq, | ||
831 | int index, unsigned long uaddr, | ||
832 | size_t size, bool write) | ||
833 | { | ||
834 | struct vhost_uaddr *addr = &vq->uaddrs[index]; | ||
835 | |||
836 | addr->uaddr = uaddr; | ||
837 | addr->size = size; | ||
838 | addr->write = write; | ||
839 | } | ||
840 | |||
841 | static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq) | ||
842 | { | ||
843 | vhost_setup_uaddr(vq, VHOST_ADDR_DESC, | ||
844 | (unsigned long)vq->desc, | ||
845 | vhost_get_desc_size(vq, vq->num), | ||
846 | false); | ||
847 | vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL, | ||
848 | (unsigned long)vq->avail, | ||
849 | vhost_get_avail_size(vq, vq->num), | ||
850 | false); | ||
851 | vhost_setup_uaddr(vq, VHOST_ADDR_USED, | ||
852 | (unsigned long)vq->used, | ||
853 | vhost_get_used_size(vq, vq->num), | ||
854 | true); | ||
855 | } | ||
856 | |||
857 | static int vhost_map_prefetch(struct vhost_virtqueue *vq, | ||
858 | int index) | ||
859 | { | ||
860 | struct vhost_map *map; | ||
861 | struct vhost_uaddr *uaddr = &vq->uaddrs[index]; | ||
862 | struct page **pages; | ||
863 | int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE); | ||
864 | int npinned; | ||
865 | void *vaddr, *v; | ||
866 | int err; | ||
867 | int i; | ||
868 | |||
869 | spin_lock(&vq->mmu_lock); | ||
870 | |||
871 | err = -EFAULT; | ||
872 | if (vq->invalidate_count) | ||
873 | goto err; | ||
874 | |||
875 | err = -ENOMEM; | ||
876 | map = kmalloc(sizeof(*map), GFP_ATOMIC); | ||
877 | if (!map) | ||
878 | goto err; | ||
879 | |||
880 | pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC); | ||
881 | if (!pages) | ||
882 | goto err_pages; | ||
883 | |||
884 | err = EFAULT; | ||
885 | npinned = __get_user_pages_fast(uaddr->uaddr, npages, | ||
886 | uaddr->write, pages); | ||
887 | if (npinned > 0) | ||
888 | release_pages(pages, npinned); | ||
889 | if (npinned != npages) | ||
890 | goto err_gup; | ||
891 | |||
892 | for (i = 0; i < npinned; i++) | ||
893 | if (PageHighMem(pages[i])) | ||
894 | goto err_gup; | ||
895 | |||
896 | vaddr = v = page_address(pages[0]); | ||
897 | |||
898 | /* For simplicity, fallback to userspace address if VA is not | ||
899 | * contigious. | ||
900 | */ | ||
901 | for (i = 1; i < npinned; i++) { | ||
902 | v += PAGE_SIZE; | ||
903 | if (v != page_address(pages[i])) | ||
904 | goto err_gup; | ||
905 | } | ||
906 | |||
907 | map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1)); | ||
908 | map->npages = npages; | ||
909 | map->pages = pages; | ||
910 | |||
911 | rcu_assign_pointer(vq->maps[index], map); | ||
912 | /* No need for a synchronize_rcu(). This function should be | ||
913 | * called by dev->worker so we are serialized with all | ||
914 | * readers. | ||
915 | */ | ||
916 | spin_unlock(&vq->mmu_lock); | ||
917 | |||
918 | return 0; | ||
919 | |||
920 | err_gup: | ||
921 | kfree(pages); | ||
922 | err_pages: | ||
923 | kfree(map); | ||
924 | err: | ||
925 | spin_unlock(&vq->mmu_lock); | ||
926 | return err; | ||
927 | } | ||
928 | #endif | ||
929 | |||
930 | void vhost_dev_cleanup(struct vhost_dev *dev) | 657 | void vhost_dev_cleanup(struct vhost_dev *dev) |
931 | { | 658 | { |
932 | int i; | 659 | int i; |
@@ -956,16 +683,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
956 | kthread_stop(dev->worker); | 683 | kthread_stop(dev->worker); |
957 | dev->worker = NULL; | 684 | dev->worker = NULL; |
958 | } | 685 | } |
959 | if (dev->mm) { | 686 | if (dev->mm) |
960 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
961 | mmu_notifier_unregister(&dev->mmu_notifier, dev->mm); | ||
962 | #endif | ||
963 | mmput(dev->mm); | 687 | mmput(dev->mm); |
964 | } | ||
965 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
966 | for (i = 0; i < dev->nvqs; i++) | ||
967 | vhost_uninit_vq_maps(dev->vqs[i]); | ||
968 | #endif | ||
969 | dev->mm = NULL; | 688 | dev->mm = NULL; |
970 | } | 689 | } |
971 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); | 690 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); |
@@ -1194,26 +913,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, | |||
1194 | 913 | ||
1195 | static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) | 914 | static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) |
1196 | { | 915 | { |
1197 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1198 | struct vhost_map *map; | ||
1199 | struct vring_used *used; | ||
1200 | |||
1201 | if (!vq->iotlb) { | ||
1202 | rcu_read_lock(); | ||
1203 | |||
1204 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1205 | if (likely(map)) { | ||
1206 | used = map->addr; | ||
1207 | *((__virtio16 *)&used->ring[vq->num]) = | ||
1208 | cpu_to_vhost16(vq, vq->avail_idx); | ||
1209 | rcu_read_unlock(); | ||
1210 | return 0; | ||
1211 | } | ||
1212 | |||
1213 | rcu_read_unlock(); | ||
1214 | } | ||
1215 | #endif | ||
1216 | |||
1217 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), | 916 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), |
1218 | vhost_avail_event(vq)); | 917 | vhost_avail_event(vq)); |
1219 | } | 918 | } |
@@ -1222,27 +921,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, | |||
1222 | struct vring_used_elem *head, int idx, | 921 | struct vring_used_elem *head, int idx, |
1223 | int count) | 922 | int count) |
1224 | { | 923 | { |
1225 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1226 | struct vhost_map *map; | ||
1227 | struct vring_used *used; | ||
1228 | size_t size; | ||
1229 | |||
1230 | if (!vq->iotlb) { | ||
1231 | rcu_read_lock(); | ||
1232 | |||
1233 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1234 | if (likely(map)) { | ||
1235 | used = map->addr; | ||
1236 | size = count * sizeof(*head); | ||
1237 | memcpy(used->ring + idx, head, size); | ||
1238 | rcu_read_unlock(); | ||
1239 | return 0; | ||
1240 | } | ||
1241 | |||
1242 | rcu_read_unlock(); | ||
1243 | } | ||
1244 | #endif | ||
1245 | |||
1246 | return vhost_copy_to_user(vq, vq->used->ring + idx, head, | 924 | return vhost_copy_to_user(vq, vq->used->ring + idx, head, |
1247 | count * sizeof(*head)); | 925 | count * sizeof(*head)); |
1248 | } | 926 | } |
@@ -1250,25 +928,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, | |||
1250 | static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) | 928 | static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) |
1251 | 929 | ||
1252 | { | 930 | { |
1253 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1254 | struct vhost_map *map; | ||
1255 | struct vring_used *used; | ||
1256 | |||
1257 | if (!vq->iotlb) { | ||
1258 | rcu_read_lock(); | ||
1259 | |||
1260 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1261 | if (likely(map)) { | ||
1262 | used = map->addr; | ||
1263 | used->flags = cpu_to_vhost16(vq, vq->used_flags); | ||
1264 | rcu_read_unlock(); | ||
1265 | return 0; | ||
1266 | } | ||
1267 | |||
1268 | rcu_read_unlock(); | ||
1269 | } | ||
1270 | #endif | ||
1271 | |||
1272 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), | 931 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), |
1273 | &vq->used->flags); | 932 | &vq->used->flags); |
1274 | } | 933 | } |
@@ -1276,25 +935,6 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) | |||
1276 | static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) | 935 | static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) |
1277 | 936 | ||
1278 | { | 937 | { |
1279 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1280 | struct vhost_map *map; | ||
1281 | struct vring_used *used; | ||
1282 | |||
1283 | if (!vq->iotlb) { | ||
1284 | rcu_read_lock(); | ||
1285 | |||
1286 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1287 | if (likely(map)) { | ||
1288 | used = map->addr; | ||
1289 | used->idx = cpu_to_vhost16(vq, vq->last_used_idx); | ||
1290 | rcu_read_unlock(); | ||
1291 | return 0; | ||
1292 | } | ||
1293 | |||
1294 | rcu_read_unlock(); | ||
1295 | } | ||
1296 | #endif | ||
1297 | |||
1298 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), | 938 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), |
1299 | &vq->used->idx); | 939 | &vq->used->idx); |
1300 | } | 940 | } |
@@ -1340,50 +980,12 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d) | |||
1340 | static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, | 980 | static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, |
1341 | __virtio16 *idx) | 981 | __virtio16 *idx) |
1342 | { | 982 | { |
1343 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1344 | struct vhost_map *map; | ||
1345 | struct vring_avail *avail; | ||
1346 | |||
1347 | if (!vq->iotlb) { | ||
1348 | rcu_read_lock(); | ||
1349 | |||
1350 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1351 | if (likely(map)) { | ||
1352 | avail = map->addr; | ||
1353 | *idx = avail->idx; | ||
1354 | rcu_read_unlock(); | ||
1355 | return 0; | ||
1356 | } | ||
1357 | |||
1358 | rcu_read_unlock(); | ||
1359 | } | ||
1360 | #endif | ||
1361 | |||
1362 | return vhost_get_avail(vq, *idx, &vq->avail->idx); | 983 | return vhost_get_avail(vq, *idx, &vq->avail->idx); |
1363 | } | 984 | } |
1364 | 985 | ||
1365 | static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, | 986 | static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, |
1366 | __virtio16 *head, int idx) | 987 | __virtio16 *head, int idx) |
1367 | { | 988 | { |
1368 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1369 | struct vhost_map *map; | ||
1370 | struct vring_avail *avail; | ||
1371 | |||
1372 | if (!vq->iotlb) { | ||
1373 | rcu_read_lock(); | ||
1374 | |||
1375 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1376 | if (likely(map)) { | ||
1377 | avail = map->addr; | ||
1378 | *head = avail->ring[idx & (vq->num - 1)]; | ||
1379 | rcu_read_unlock(); | ||
1380 | return 0; | ||
1381 | } | ||
1382 | |||
1383 | rcu_read_unlock(); | ||
1384 | } | ||
1385 | #endif | ||
1386 | |||
1387 | return vhost_get_avail(vq, *head, | 989 | return vhost_get_avail(vq, *head, |
1388 | &vq->avail->ring[idx & (vq->num - 1)]); | 990 | &vq->avail->ring[idx & (vq->num - 1)]); |
1389 | } | 991 | } |
@@ -1391,98 +993,24 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, | |||
1391 | static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, | 993 | static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, |
1392 | __virtio16 *flags) | 994 | __virtio16 *flags) |
1393 | { | 995 | { |
1394 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1395 | struct vhost_map *map; | ||
1396 | struct vring_avail *avail; | ||
1397 | |||
1398 | if (!vq->iotlb) { | ||
1399 | rcu_read_lock(); | ||
1400 | |||
1401 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1402 | if (likely(map)) { | ||
1403 | avail = map->addr; | ||
1404 | *flags = avail->flags; | ||
1405 | rcu_read_unlock(); | ||
1406 | return 0; | ||
1407 | } | ||
1408 | |||
1409 | rcu_read_unlock(); | ||
1410 | } | ||
1411 | #endif | ||
1412 | |||
1413 | return vhost_get_avail(vq, *flags, &vq->avail->flags); | 996 | return vhost_get_avail(vq, *flags, &vq->avail->flags); |
1414 | } | 997 | } |
1415 | 998 | ||
1416 | static inline int vhost_get_used_event(struct vhost_virtqueue *vq, | 999 | static inline int vhost_get_used_event(struct vhost_virtqueue *vq, |
1417 | __virtio16 *event) | 1000 | __virtio16 *event) |
1418 | { | 1001 | { |
1419 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1420 | struct vhost_map *map; | ||
1421 | struct vring_avail *avail; | ||
1422 | |||
1423 | if (!vq->iotlb) { | ||
1424 | rcu_read_lock(); | ||
1425 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1426 | if (likely(map)) { | ||
1427 | avail = map->addr; | ||
1428 | *event = (__virtio16)avail->ring[vq->num]; | ||
1429 | rcu_read_unlock(); | ||
1430 | return 0; | ||
1431 | } | ||
1432 | rcu_read_unlock(); | ||
1433 | } | ||
1434 | #endif | ||
1435 | |||
1436 | return vhost_get_avail(vq, *event, vhost_used_event(vq)); | 1002 | return vhost_get_avail(vq, *event, vhost_used_event(vq)); |
1437 | } | 1003 | } |
1438 | 1004 | ||
1439 | static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, | 1005 | static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, |
1440 | __virtio16 *idx) | 1006 | __virtio16 *idx) |
1441 | { | 1007 | { |
1442 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1443 | struct vhost_map *map; | ||
1444 | struct vring_used *used; | ||
1445 | |||
1446 | if (!vq->iotlb) { | ||
1447 | rcu_read_lock(); | ||
1448 | |||
1449 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1450 | if (likely(map)) { | ||
1451 | used = map->addr; | ||
1452 | *idx = used->idx; | ||
1453 | rcu_read_unlock(); | ||
1454 | return 0; | ||
1455 | } | ||
1456 | |||
1457 | rcu_read_unlock(); | ||
1458 | } | ||
1459 | #endif | ||
1460 | |||
1461 | return vhost_get_used(vq, *idx, &vq->used->idx); | 1008 | return vhost_get_used(vq, *idx, &vq->used->idx); |
1462 | } | 1009 | } |
1463 | 1010 | ||
1464 | static inline int vhost_get_desc(struct vhost_virtqueue *vq, | 1011 | static inline int vhost_get_desc(struct vhost_virtqueue *vq, |
1465 | struct vring_desc *desc, int idx) | 1012 | struct vring_desc *desc, int idx) |
1466 | { | 1013 | { |
1467 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1468 | struct vhost_map *map; | ||
1469 | struct vring_desc *d; | ||
1470 | |||
1471 | if (!vq->iotlb) { | ||
1472 | rcu_read_lock(); | ||
1473 | |||
1474 | map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]); | ||
1475 | if (likely(map)) { | ||
1476 | d = map->addr; | ||
1477 | *desc = *(d + idx); | ||
1478 | rcu_read_unlock(); | ||
1479 | return 0; | ||
1480 | } | ||
1481 | |||
1482 | rcu_read_unlock(); | ||
1483 | } | ||
1484 | #endif | ||
1485 | |||
1486 | return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); | 1014 | return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); |
1487 | } | 1015 | } |
1488 | 1016 | ||
@@ -1823,32 +1351,12 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq, | |||
1823 | return true; | 1351 | return true; |
1824 | } | 1352 | } |
1825 | 1353 | ||
1826 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1827 | static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq) | ||
1828 | { | ||
1829 | struct vhost_map __rcu *map; | ||
1830 | int i; | ||
1831 | |||
1832 | for (i = 0; i < VHOST_NUM_ADDRS; i++) { | ||
1833 | rcu_read_lock(); | ||
1834 | map = rcu_dereference(vq->maps[i]); | ||
1835 | rcu_read_unlock(); | ||
1836 | if (unlikely(!map)) | ||
1837 | vhost_map_prefetch(vq, i); | ||
1838 | } | ||
1839 | } | ||
1840 | #endif | ||
1841 | |||
1842 | int vq_meta_prefetch(struct vhost_virtqueue *vq) | 1354 | int vq_meta_prefetch(struct vhost_virtqueue *vq) |
1843 | { | 1355 | { |
1844 | unsigned int num = vq->num; | 1356 | unsigned int num = vq->num; |
1845 | 1357 | ||
1846 | if (!vq->iotlb) { | 1358 | if (!vq->iotlb) |
1847 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1848 | vhost_vq_map_prefetch(vq); | ||
1849 | #endif | ||
1850 | return 1; | 1359 | return 1; |
1851 | } | ||
1852 | 1360 | ||
1853 | return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, | 1361 | return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, |
1854 | vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && | 1362 | vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && |
@@ -2059,16 +1567,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, | |||
2059 | 1567 | ||
2060 | mutex_lock(&vq->mutex); | 1568 | mutex_lock(&vq->mutex); |
2061 | 1569 | ||
2062 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
2063 | /* Unregister MMU notifer to allow invalidation callback | ||
2064 | * can access vq->uaddrs[] without holding a lock. | ||
2065 | */ | ||
2066 | if (d->mm) | ||
2067 | mmu_notifier_unregister(&d->mmu_notifier, d->mm); | ||
2068 | |||
2069 | vhost_uninit_vq_maps(vq); | ||
2070 | #endif | ||
2071 | |||
2072 | switch (ioctl) { | 1570 | switch (ioctl) { |
2073 | case VHOST_SET_VRING_NUM: | 1571 | case VHOST_SET_VRING_NUM: |
2074 | r = vhost_vring_set_num(d, vq, argp); | 1572 | r = vhost_vring_set_num(d, vq, argp); |
@@ -2080,13 +1578,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, | |||
2080 | BUG(); | 1578 | BUG(); |
2081 | } | 1579 | } |
2082 | 1580 | ||
2083 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
2084 | vhost_setup_vq_uaddr(vq); | ||
2085 | |||
2086 | if (d->mm) | ||
2087 | mmu_notifier_register(&d->mmu_notifier, d->mm); | ||
2088 | #endif | ||
2089 | |||
2090 | mutex_unlock(&vq->mutex); | 1581 | mutex_unlock(&vq->mutex); |
2091 | 1582 | ||
2092 | return r; | 1583 | return r; |
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 42a8c2a13ab1..e9ed2722b633 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
@@ -12,9 +12,6 @@ | |||
12 | #include <linux/virtio_config.h> | 12 | #include <linux/virtio_config.h> |
13 | #include <linux/virtio_ring.h> | 13 | #include <linux/virtio_ring.h> |
14 | #include <linux/atomic.h> | 14 | #include <linux/atomic.h> |
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/mmu_notifier.h> | ||
17 | #include <asm/cacheflush.h> | ||
18 | 15 | ||
19 | struct vhost_work; | 16 | struct vhost_work; |
20 | typedef void (*vhost_work_fn_t)(struct vhost_work *work); | 17 | typedef void (*vhost_work_fn_t)(struct vhost_work *work); |
@@ -83,24 +80,6 @@ enum vhost_uaddr_type { | |||
83 | VHOST_NUM_ADDRS = 3, | 80 | VHOST_NUM_ADDRS = 3, |
84 | }; | 81 | }; |
85 | 82 | ||
86 | struct vhost_map { | ||
87 | int npages; | ||
88 | void *addr; | ||
89 | struct page **pages; | ||
90 | }; | ||
91 | |||
92 | struct vhost_uaddr { | ||
93 | unsigned long uaddr; | ||
94 | size_t size; | ||
95 | bool write; | ||
96 | }; | ||
97 | |||
98 | #if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0 | ||
99 | #define VHOST_ARCH_CAN_ACCEL_UACCESS 0 | ||
100 | #else | ||
101 | #define VHOST_ARCH_CAN_ACCEL_UACCESS 0 | ||
102 | #endif | ||
103 | |||
104 | /* The virtqueue structure describes a queue attached to a device. */ | 83 | /* The virtqueue structure describes a queue attached to a device. */ |
105 | struct vhost_virtqueue { | 84 | struct vhost_virtqueue { |
106 | struct vhost_dev *dev; | 85 | struct vhost_dev *dev; |
@@ -111,22 +90,7 @@ struct vhost_virtqueue { | |||
111 | struct vring_desc __user *desc; | 90 | struct vring_desc __user *desc; |
112 | struct vring_avail __user *avail; | 91 | struct vring_avail __user *avail; |
113 | struct vring_used __user *used; | 92 | struct vring_used __user *used; |
114 | |||
115 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
116 | /* Read by memory accessors, modified by meta data | ||
117 | * prefetching, MMU notifier and vring ioctl(). | ||
118 | * Synchonrized through mmu_lock (writers) and RCU (writers | ||
119 | * and readers). | ||
120 | */ | ||
121 | struct vhost_map __rcu *maps[VHOST_NUM_ADDRS]; | ||
122 | /* Read by MMU notifier, modified by vring ioctl(), | ||
123 | * synchronized through MMU notifier | ||
124 | * registering/unregistering. | ||
125 | */ | ||
126 | struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS]; | ||
127 | #endif | ||
128 | const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; | 93 | const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; |
129 | |||
130 | struct file *kick; | 94 | struct file *kick; |
131 | struct eventfd_ctx *call_ctx; | 95 | struct eventfd_ctx *call_ctx; |
132 | struct eventfd_ctx *error_ctx; | 96 | struct eventfd_ctx *error_ctx; |
@@ -181,8 +145,6 @@ struct vhost_virtqueue { | |||
181 | bool user_be; | 145 | bool user_be; |
182 | #endif | 146 | #endif |
183 | u32 busyloop_timeout; | 147 | u32 busyloop_timeout; |
184 | spinlock_t mmu_lock; | ||
185 | int invalidate_count; | ||
186 | }; | 148 | }; |
187 | 149 | ||
188 | struct vhost_msg_node { | 150 | struct vhost_msg_node { |
@@ -196,9 +158,6 @@ struct vhost_msg_node { | |||
196 | 158 | ||
197 | struct vhost_dev { | 159 | struct vhost_dev { |
198 | struct mm_struct *mm; | 160 | struct mm_struct *mm; |
199 | #ifdef CONFIG_MMU_NOTIFIER | ||
200 | struct mmu_notifier mmu_notifier; | ||
201 | #endif | ||
202 | struct mutex mutex; | 161 | struct mutex mutex; |
203 | struct vhost_virtqueue **vqs; | 162 | struct vhost_virtqueue **vqs; |
204 | int nvqs; | 163 | int nvqs; |