diff options
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 54 |
1 files changed, 51 insertions, 3 deletions
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 4a9d666f1e91..e6e7f155bdd9 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c | |||
@@ -66,6 +66,7 @@ struct vfio_domain { | |||
66 | struct list_head next; | 66 | struct list_head next; |
67 | struct list_head group_list; | 67 | struct list_head group_list; |
68 | int prot; /* IOMMU_CACHE */ | 68 | int prot; /* IOMMU_CACHE */ |
69 | bool fgsp; /* Fine-grained super pages */ | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | struct vfio_dma { | 72 | struct vfio_dma { |
@@ -350,8 +351,8 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) | |||
350 | iommu_unmap(d->domain, dma->iova, dma->size); | 351 | iommu_unmap(d->domain, dma->iova, dma->size); |
351 | 352 | ||
352 | while (iova < end) { | 353 | while (iova < end) { |
353 | size_t unmapped; | 354 | size_t unmapped, len; |
354 | phys_addr_t phys; | 355 | phys_addr_t phys, next; |
355 | 356 | ||
356 | phys = iommu_iova_to_phys(domain->domain, iova); | 357 | phys = iommu_iova_to_phys(domain->domain, iova); |
357 | if (WARN_ON(!phys)) { | 358 | if (WARN_ON(!phys)) { |
@@ -359,7 +360,19 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) | |||
359 | continue; | 360 | continue; |
360 | } | 361 | } |
361 | 362 | ||
362 | unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE); | 363 | /* |
364 | * To optimize for fewer iommu_unmap() calls, each of which | ||
365 | * may require hardware cache flushing, try to find the | ||
366 | * largest contiguous physical memory chunk to unmap. | ||
367 | */ | ||
368 | for (len = PAGE_SIZE; | ||
369 | !domain->fgsp && iova + len < end; len += PAGE_SIZE) { | ||
370 | next = iommu_iova_to_phys(domain->domain, iova + len); | ||
371 | if (next != phys + len) | ||
372 | break; | ||
373 | } | ||
374 | |||
375 | unmapped = iommu_unmap(domain->domain, iova, len); | ||
363 | if (WARN_ON(!unmapped)) | 376 | if (WARN_ON(!unmapped)) |
364 | break; | 377 | break; |
365 | 378 | ||
@@ -665,6 +678,39 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, | |||
665 | return 0; | 678 | return 0; |
666 | } | 679 | } |
667 | 680 | ||
681 | /* | ||
682 | * We change our unmap behavior slightly depending on whether the IOMMU | ||
683 | * supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage | ||
684 | * for practically any contiguous power-of-two mapping we give it. This means | ||
685 | * we don't need to look for contiguous chunks ourselves to make unmapping | ||
686 | * more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d | ||
687 | * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks | ||
688 | * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when | ||
689 | * hugetlbfs is in use. | ||
690 | */ | ||
691 | static void vfio_test_domain_fgsp(struct vfio_domain *domain) | ||
692 | { | ||
693 | struct page *pages; | ||
694 | int ret, order = get_order(PAGE_SIZE * 2); | ||
695 | |||
696 | pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); | ||
697 | if (!pages) | ||
698 | return; | ||
699 | |||
700 | ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, | ||
701 | IOMMU_READ | IOMMU_WRITE | domain->prot); | ||
702 | if (!ret) { | ||
703 | size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); | ||
704 | |||
705 | if (unmapped == PAGE_SIZE) | ||
706 | iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE); | ||
707 | else | ||
708 | domain->fgsp = true; | ||
709 | } | ||
710 | |||
711 | __free_pages(pages, order); | ||
712 | } | ||
713 | |||
668 | static int vfio_iommu_type1_attach_group(void *iommu_data, | 714 | static int vfio_iommu_type1_attach_group(void *iommu_data, |
669 | struct iommu_group *iommu_group) | 715 | struct iommu_group *iommu_group) |
670 | { | 716 | { |
@@ -758,6 +804,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, | |||
758 | } | 804 | } |
759 | } | 805 | } |
760 | 806 | ||
807 | vfio_test_domain_fgsp(domain); | ||
808 | |||
761 | /* replay mappings on new domains */ | 809 | /* replay mappings on new domains */ |
762 | ret = vfio_iommu_replay(iommu, domain); | 810 | ret = vfio_iommu_replay(iommu, domain); |
763 | if (ret) | 811 | if (ret) |