diff options
| -rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 54 |
1 files changed, 51 insertions, 3 deletions
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 4a9d666f1e91..e6e7f155bdd9 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c | |||
| @@ -66,6 +66,7 @@ struct vfio_domain { | |||
| 66 | struct list_head next; | 66 | struct list_head next; |
| 67 | struct list_head group_list; | 67 | struct list_head group_list; |
| 68 | int prot; /* IOMMU_CACHE */ | 68 | int prot; /* IOMMU_CACHE */ |
| 69 | bool fgsp; /* Fine-grained super pages */ | ||
| 69 | }; | 70 | }; |
| 70 | 71 | ||
| 71 | struct vfio_dma { | 72 | struct vfio_dma { |
| @@ -350,8 +351,8 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) | |||
| 350 | iommu_unmap(d->domain, dma->iova, dma->size); | 351 | iommu_unmap(d->domain, dma->iova, dma->size); |
| 351 | 352 | ||
| 352 | while (iova < end) { | 353 | while (iova < end) { |
| 353 | size_t unmapped; | 354 | size_t unmapped, len; |
| 354 | phys_addr_t phys; | 355 | phys_addr_t phys, next; |
| 355 | 356 | ||
| 356 | phys = iommu_iova_to_phys(domain->domain, iova); | 357 | phys = iommu_iova_to_phys(domain->domain, iova); |
| 357 | if (WARN_ON(!phys)) { | 358 | if (WARN_ON(!phys)) { |
| @@ -359,7 +360,19 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) | |||
| 359 | continue; | 360 | continue; |
| 360 | } | 361 | } |
| 361 | 362 | ||
| 362 | unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE); | 363 | /* |
| 364 | * To optimize for fewer iommu_unmap() calls, each of which | ||
| 365 | * may require hardware cache flushing, try to find the | ||
| 366 | * largest contiguous physical memory chunk to unmap. | ||
| 367 | */ | ||
| 368 | for (len = PAGE_SIZE; | ||
| 369 | !domain->fgsp && iova + len < end; len += PAGE_SIZE) { | ||
| 370 | next = iommu_iova_to_phys(domain->domain, iova + len); | ||
| 371 | if (next != phys + len) | ||
| 372 | break; | ||
| 373 | } | ||
| 374 | |||
| 375 | unmapped = iommu_unmap(domain->domain, iova, len); | ||
| 363 | if (WARN_ON(!unmapped)) | 376 | if (WARN_ON(!unmapped)) |
| 364 | break; | 377 | break; |
| 365 | 378 | ||
| @@ -665,6 +678,39 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, | |||
| 665 | return 0; | 678 | return 0; |
| 666 | } | 679 | } |
| 667 | 680 | ||
| 681 | /* | ||
| 682 | * We change our unmap behavior slightly depending on whether the IOMMU | ||
| 683 | * supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage | ||
| 684 | * for practically any contiguous power-of-two mapping we give it. This means | ||
| 685 | * we don't need to look for contiguous chunks ourselves to make unmapping | ||
| 686 | * more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d | ||
| 687 | * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks | ||
| 688 | * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when | ||
| 689 | * hugetlbfs is in use. | ||
| 690 | */ | ||
| 691 | static void vfio_test_domain_fgsp(struct vfio_domain *domain) | ||
| 692 | { | ||
| 693 | struct page *pages; | ||
| 694 | int ret, order = get_order(PAGE_SIZE * 2); | ||
| 695 | |||
| 696 | pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); | ||
| 697 | if (!pages) | ||
| 698 | return; | ||
| 699 | |||
| 700 | ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, | ||
| 701 | IOMMU_READ | IOMMU_WRITE | domain->prot); | ||
| 702 | if (!ret) { | ||
| 703 | size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); | ||
| 704 | |||
| 705 | if (unmapped == PAGE_SIZE) | ||
| 706 | iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE); | ||
| 707 | else | ||
| 708 | domain->fgsp = true; | ||
| 709 | } | ||
| 710 | |||
| 711 | __free_pages(pages, order); | ||
| 712 | } | ||
| 713 | |||
| 668 | static int vfio_iommu_type1_attach_group(void *iommu_data, | 714 | static int vfio_iommu_type1_attach_group(void *iommu_data, |
| 669 | struct iommu_group *iommu_group) | 715 | struct iommu_group *iommu_group) |
| 670 | { | 716 | { |
| @@ -758,6 +804,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, | |||
| 758 | } | 804 | } |
| 759 | } | 805 | } |
| 760 | 806 | ||
| 807 | vfio_test_domain_fgsp(domain); | ||
| 808 | |||
| 761 | /* replay mappings on new domains */ | 809 | /* replay mappings on new domains */ |
| 762 | ret = vfio_iommu_replay(iommu, domain); | 810 | ret = vfio_iommu_replay(iommu, domain); |
| 763 | if (ret) | 811 | if (ret) |
