aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/vfio/vfio_iommu_type1.c54
1 files changed, 51 insertions, 3 deletions
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 4a9d666f1e91..e6e7f155bdd9 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -66,6 +66,7 @@ struct vfio_domain {
66 struct list_head next; 66 struct list_head next;
67 struct list_head group_list; 67 struct list_head group_list;
68 int prot; /* IOMMU_CACHE */ 68 int prot; /* IOMMU_CACHE */
69 bool fgsp; /* Fine-grained super pages */
69}; 70};
70 71
71struct vfio_dma { 72struct vfio_dma {
@@ -350,8 +351,8 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
350 iommu_unmap(d->domain, dma->iova, dma->size); 351 iommu_unmap(d->domain, dma->iova, dma->size);
351 352
352 while (iova < end) { 353 while (iova < end) {
353 size_t unmapped; 354 size_t unmapped, len;
354 phys_addr_t phys; 355 phys_addr_t phys, next;
355 356
356 phys = iommu_iova_to_phys(domain->domain, iova); 357 phys = iommu_iova_to_phys(domain->domain, iova);
357 if (WARN_ON(!phys)) { 358 if (WARN_ON(!phys)) {
@@ -359,7 +360,19 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
359 continue; 360 continue;
360 } 361 }
361 362
362 unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE); 363 /*
364 * To optimize for fewer iommu_unmap() calls, each of which
365 * may require hardware cache flushing, try to find the
366 * largest contiguous physical memory chunk to unmap.
367 */
368 for (len = PAGE_SIZE;
369 !domain->fgsp && iova + len < end; len += PAGE_SIZE) {
370 next = iommu_iova_to_phys(domain->domain, iova + len);
371 if (next != phys + len)
372 break;
373 }
374
375 unmapped = iommu_unmap(domain->domain, iova, len);
363 if (WARN_ON(!unmapped)) 376 if (WARN_ON(!unmapped))
364 break; 377 break;
365 378
@@ -665,6 +678,39 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
665 return 0; 678 return 0;
666} 679}
667 680
681/*
682 * We change our unmap behavior slightly depending on whether the IOMMU
683 * supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage
684 * for practically any contiguous power-of-two mapping we give it. This means
685 * we don't need to look for contiguous chunks ourselves to make unmapping
686 * more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d
687 * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
688 * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
689 * hugetlbfs is in use.
690 */
691static void vfio_test_domain_fgsp(struct vfio_domain *domain)
692{
693 struct page *pages;
694 int ret, order = get_order(PAGE_SIZE * 2);
695
696 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
697 if (!pages)
698 return;
699
700 ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
701 IOMMU_READ | IOMMU_WRITE | domain->prot);
702 if (!ret) {
703 size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
704
705 if (unmapped == PAGE_SIZE)
706 iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
707 else
708 domain->fgsp = true;
709 }
710
711 __free_pages(pages, order);
712}
713
668static int vfio_iommu_type1_attach_group(void *iommu_data, 714static int vfio_iommu_type1_attach_group(void *iommu_data,
669 struct iommu_group *iommu_group) 715 struct iommu_group *iommu_group)
670{ 716{
@@ -758,6 +804,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
758 } 804 }
759 } 805 }
760 806
807 vfio_test_domain_fgsp(domain);
808
761 /* replay mappings on new domains */ 809 /* replay mappings on new domains */
762 ret = vfio_iommu_replay(iommu, domain); 810 ret = vfio_iommu_replay(iommu, domain);
763 if (ret) 811 if (ret)