aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoerg Roedel <joerg.roedel@amd.com>2010-05-11 11:40:57 -0400
committerJoerg Roedel <joerg.roedel@amd.com>2010-05-11 11:40:57 -0400
commit795e74f7a69f9c08afa4fa7c86cc4f18a62bd630 (patch)
tree8448ece35101d8db945c49df50d0d5889687de9f
parenta52357259680fe5368c2fabf5949209e231f2aa2 (diff)
parent12c7389abe5786349d3ea6da1961cf78d0c1c7cd (diff)
Merge branch 'iommu/largepages' into amd-iommu/2.6.35
Conflicts: arch/x86/kernel/amd_iommu.c
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h34
-rw-r--r--arch/x86/kernel/amd_iommu.c197
-rw-r--r--drivers/base/iommu.c43
-rw-r--r--drivers/pci/intel-iommu.c22
-rw-r--r--include/linux/iommu.h24
-rw-r--r--virt/kvm/iommu.c113
6 files changed, 304 insertions, 129 deletions
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 86a0ff0aeac7..7014e88bc779 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -174,6 +174,40 @@
174 (~((1ULL << (12 + ((lvl) * 9))) - 1))) 174 (~((1ULL << (12 + ((lvl) * 9))) - 1)))
175#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) 175#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
176 176
177/*
178 * Returns the page table level to use for a given page size
179 * Pagesize is expected to be a power-of-two
180 */
181#define PAGE_SIZE_LEVEL(pagesize) \
182 ((__ffs(pagesize) - 12) / 9)
183/*
184 * Returns the number of ptes to use for a given page size
185 * Pagesize is expected to be a power-of-two
186 */
187#define PAGE_SIZE_PTE_COUNT(pagesize) \
188 (1ULL << ((__ffs(pagesize) - 12) % 9))
189
190/*
191 * Aligns a given io-virtual address to a given page size
192 * Pagesize is expected to be a power-of-two
193 */
194#define PAGE_SIZE_ALIGN(address, pagesize) \
195 ((address) & ~((pagesize) - 1))
196/*
197 * Creates an IOMMU PTE for an address an a given pagesize
198 * The PTE has no permission bits set
199 * Pagesize is expected to be a power-of-two larger than 4096
200 */
201#define PAGE_SIZE_PTE(address, pagesize) \
202 (((address) | ((pagesize) - 1)) & \
203 (~(pagesize >> 1)) & PM_ADDR_MASK)
204
205/*
206 * Takes a PTE value with mode=0x07 and returns the page size it maps
207 */
208#define PTE_PAGE_SIZE(pte) \
209 (1ULL << (1 + ffz(((pte) | 0xfffULL))))
210
177#define IOMMU_PTE_P (1ULL << 0) 211#define IOMMU_PTE_P (1ULL << 0)
178#define IOMMU_PTE_TV (1ULL << 1) 212#define IOMMU_PTE_TV (1ULL << 1)
179#define IOMMU_PTE_U (1ULL << 59) 213#define IOMMU_PTE_U (1ULL << 59)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f854d89b7edf..fa5a1474cd18 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -731,18 +731,22 @@ static bool increase_address_space(struct protection_domain *domain,
731 731
732static u64 *alloc_pte(struct protection_domain *domain, 732static u64 *alloc_pte(struct protection_domain *domain,
733 unsigned long address, 733 unsigned long address,
734 int end_lvl, 734 unsigned long page_size,
735 u64 **pte_page, 735 u64 **pte_page,
736 gfp_t gfp) 736 gfp_t gfp)
737{ 737{
738 int level, end_lvl;
738 u64 *pte, *page; 739 u64 *pte, *page;
739 int level; 740
741 BUG_ON(!is_power_of_2(page_size));
740 742
741 while (address > PM_LEVEL_SIZE(domain->mode)) 743 while (address > PM_LEVEL_SIZE(domain->mode))
742 increase_address_space(domain, gfp); 744 increase_address_space(domain, gfp);
743 745
744 level = domain->mode - 1; 746 level = domain->mode - 1;
745 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; 747 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
748 address = PAGE_SIZE_ALIGN(address, page_size);
749 end_lvl = PAGE_SIZE_LEVEL(page_size);
746 750
747 while (level > end_lvl) { 751 while (level > end_lvl) {
748 if (!IOMMU_PTE_PRESENT(*pte)) { 752 if (!IOMMU_PTE_PRESENT(*pte)) {
@@ -752,6 +756,10 @@ static u64 *alloc_pte(struct protection_domain *domain,
752 *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); 756 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
753 } 757 }
754 758
759 /* No level skipping support yet */
760 if (PM_PTE_LEVEL(*pte) != level)
761 return NULL;
762
755 level -= 1; 763 level -= 1;
756 764
757 pte = IOMMU_PTE_PAGE(*pte); 765 pte = IOMMU_PTE_PAGE(*pte);
@@ -769,28 +777,47 @@ static u64 *alloc_pte(struct protection_domain *domain,
769 * This function checks if there is a PTE for a given dma address. If 777 * This function checks if there is a PTE for a given dma address. If
770 * there is one, it returns the pointer to it. 778 * there is one, it returns the pointer to it.
771 */ 779 */
772static u64 *fetch_pte(struct protection_domain *domain, 780static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
773 unsigned long address, int map_size)
774{ 781{
775 int level; 782 int level;
776 u64 *pte; 783 u64 *pte;
777 784
778 level = domain->mode - 1; 785 if (address > PM_LEVEL_SIZE(domain->mode))
779 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; 786 return NULL;
787
788 level = domain->mode - 1;
789 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
780 790
781 while (level > map_size) { 791 while (level > 0) {
792
793 /* Not Present */
782 if (!IOMMU_PTE_PRESENT(*pte)) 794 if (!IOMMU_PTE_PRESENT(*pte))
783 return NULL; 795 return NULL;
784 796
797 /* Large PTE */
798 if (PM_PTE_LEVEL(*pte) == 0x07) {
799 unsigned long pte_mask, __pte;
800
801 /*
802 * If we have a series of large PTEs, make
803 * sure to return a pointer to the first one.
804 */
805 pte_mask = PTE_PAGE_SIZE(*pte);
806 pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
807 __pte = ((unsigned long)pte) & pte_mask;
808
809 return (u64 *)__pte;
810 }
811
812 /* No level skipping support yet */
813 if (PM_PTE_LEVEL(*pte) != level)
814 return NULL;
815
785 level -= 1; 816 level -= 1;
786 817
818 /* Walk to the next level */
787 pte = IOMMU_PTE_PAGE(*pte); 819 pte = IOMMU_PTE_PAGE(*pte);
788 pte = &pte[PM_LEVEL_INDEX(level, address)]; 820 pte = &pte[PM_LEVEL_INDEX(level, address)];
789
790 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
791 pte = NULL;
792 break;
793 }
794 } 821 }
795 822
796 return pte; 823 return pte;
@@ -807,44 +834,84 @@ static int iommu_map_page(struct protection_domain *dom,
807 unsigned long bus_addr, 834 unsigned long bus_addr,
808 unsigned long phys_addr, 835 unsigned long phys_addr,
809 int prot, 836 int prot,
810 int map_size) 837 unsigned long page_size)
811{ 838{
812 u64 __pte, *pte; 839 u64 __pte, *pte;
813 840 int i, count;
814 bus_addr = PAGE_ALIGN(bus_addr);
815 phys_addr = PAGE_ALIGN(phys_addr);
816
817 BUG_ON(!PM_ALIGNED(map_size, bus_addr));
818 BUG_ON(!PM_ALIGNED(map_size, phys_addr));
819 841
820 if (!(prot & IOMMU_PROT_MASK)) 842 if (!(prot & IOMMU_PROT_MASK))
821 return -EINVAL; 843 return -EINVAL;
822 844
823 pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); 845 bus_addr = PAGE_ALIGN(bus_addr);
846 phys_addr = PAGE_ALIGN(phys_addr);
847 count = PAGE_SIZE_PTE_COUNT(page_size);
848 pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
849
850 for (i = 0; i < count; ++i)
851 if (IOMMU_PTE_PRESENT(pte[i]))
852 return -EBUSY;
824 853
825 if (IOMMU_PTE_PRESENT(*pte)) 854 if (page_size > PAGE_SIZE) {
826 return -EBUSY; 855 __pte = PAGE_SIZE_PTE(phys_addr, page_size);
856 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
857 } else
858 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
827 859
828 __pte = phys_addr | IOMMU_PTE_P;
829 if (prot & IOMMU_PROT_IR) 860 if (prot & IOMMU_PROT_IR)
830 __pte |= IOMMU_PTE_IR; 861 __pte |= IOMMU_PTE_IR;
831 if (prot & IOMMU_PROT_IW) 862 if (prot & IOMMU_PROT_IW)
832 __pte |= IOMMU_PTE_IW; 863 __pte |= IOMMU_PTE_IW;
833 864
834 *pte = __pte; 865 for (i = 0; i < count; ++i)
866 pte[i] = __pte;
835 867
836 update_domain(dom); 868 update_domain(dom);
837 869
838 return 0; 870 return 0;
839} 871}
840 872
841static void iommu_unmap_page(struct protection_domain *dom, 873static unsigned long iommu_unmap_page(struct protection_domain *dom,
842 unsigned long bus_addr, int map_size) 874 unsigned long bus_addr,
875 unsigned long page_size)
843{ 876{
844 u64 *pte = fetch_pte(dom, bus_addr, map_size); 877 unsigned long long unmap_size, unmapped;
878 u64 *pte;
879
880 BUG_ON(!is_power_of_2(page_size));
881
882 unmapped = 0;
845 883
846 if (pte) 884 while (unmapped < page_size) {
847 *pte = 0; 885
886 pte = fetch_pte(dom, bus_addr);
887
888 if (!pte) {
889 /*
890 * No PTE for this address
891 * move forward in 4kb steps
892 */
893 unmap_size = PAGE_SIZE;
894 } else if (PM_PTE_LEVEL(*pte) == 0) {
895 /* 4kb PTE found for this address */
896 unmap_size = PAGE_SIZE;
897 *pte = 0ULL;
898 } else {
899 int count, i;
900
901 /* Large PTE found which maps this address */
902 unmap_size = PTE_PAGE_SIZE(*pte);
903 count = PAGE_SIZE_PTE_COUNT(unmap_size);
904 for (i = 0; i < count; i++)
905 pte[i] = 0ULL;
906 }
907
908 bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
909 unmapped += unmap_size;
910 }
911
912 BUG_ON(!is_power_of_2(unmapped));
913
914 return unmapped;
848} 915}
849 916
850/* 917/*
@@ -878,7 +945,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
878 for (addr = e->address_start; addr < e->address_end; 945 for (addr = e->address_start; addr < e->address_end;
879 addr += PAGE_SIZE) { 946 addr += PAGE_SIZE) {
880 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, 947 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
881 PM_MAP_4k); 948 PAGE_SIZE);
882 if (ret) 949 if (ret)
883 return ret; 950 return ret;
884 /* 951 /*
@@ -1006,7 +1073,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
1006 u64 *pte, *pte_page; 1073 u64 *pte, *pte_page;
1007 1074
1008 for (i = 0; i < num_ptes; ++i) { 1075 for (i = 0; i < num_ptes; ++i) {
1009 pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, 1076 pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1010 &pte_page, gfp); 1077 &pte_page, gfp);
1011 if (!pte) 1078 if (!pte)
1012 goto out_free; 1079 goto out_free;
@@ -1042,7 +1109,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
1042 for (i = dma_dom->aperture[index]->offset; 1109 for (i = dma_dom->aperture[index]->offset;
1043 i < dma_dom->aperture_size; 1110 i < dma_dom->aperture_size;
1044 i += PAGE_SIZE) { 1111 i += PAGE_SIZE) {
1045 u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); 1112 u64 *pte = fetch_pte(&dma_dom->domain, i);
1046 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 1113 if (!pte || !IOMMU_PTE_PRESENT(*pte))
1047 continue; 1114 continue;
1048 1115
@@ -1712,7 +1779,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1712 1779
1713 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; 1780 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1714 if (!pte) { 1781 if (!pte) {
1715 pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, 1782 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
1716 GFP_ATOMIC); 1783 GFP_ATOMIC);
1717 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; 1784 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1718 } else 1785 } else
@@ -2439,12 +2506,11 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
2439 return ret; 2506 return ret;
2440} 2507}
2441 2508
2442static int amd_iommu_map_range(struct iommu_domain *dom, 2509static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2443 unsigned long iova, phys_addr_t paddr, 2510 phys_addr_t paddr, int gfp_order, int iommu_prot)
2444 size_t size, int iommu_prot)
2445{ 2511{
2512 unsigned long page_size = 0x1000UL << gfp_order;
2446 struct protection_domain *domain = dom->priv; 2513 struct protection_domain *domain = dom->priv;
2447 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
2448 int prot = 0; 2514 int prot = 0;
2449 int ret; 2515 int ret;
2450 2516
@@ -2453,61 +2519,50 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
2453 if (iommu_prot & IOMMU_WRITE) 2519 if (iommu_prot & IOMMU_WRITE)
2454 prot |= IOMMU_PROT_IW; 2520 prot |= IOMMU_PROT_IW;
2455 2521
2456 iova &= PAGE_MASK;
2457 paddr &= PAGE_MASK;
2458
2459 mutex_lock(&domain->api_lock); 2522 mutex_lock(&domain->api_lock);
2460 2523 ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2461 for (i = 0; i < npages; ++i) {
2462 ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
2463 if (ret)
2464 return ret;
2465
2466 iova += PAGE_SIZE;
2467 paddr += PAGE_SIZE;
2468 }
2469
2470 mutex_unlock(&domain->api_lock); 2524 mutex_unlock(&domain->api_lock);
2471 2525
2472 return 0; 2526 return ret;
2473} 2527}
2474 2528
2475static void amd_iommu_unmap_range(struct iommu_domain *dom, 2529static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2476 unsigned long iova, size_t size) 2530 int gfp_order)
2477{ 2531{
2478
2479 struct protection_domain *domain = dom->priv; 2532 struct protection_domain *domain = dom->priv;
2480 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); 2533 unsigned long page_size, unmap_size;
2481 2534
2482 iova &= PAGE_MASK; 2535 page_size = 0x1000UL << gfp_order;
2483 2536
2484 mutex_lock(&domain->api_lock); 2537 mutex_lock(&domain->api_lock);
2485 2538 unmap_size = iommu_unmap_page(domain, iova, page_size);
2486 for (i = 0; i < npages; ++i) { 2539 mutex_unlock(&domain->api_lock);
2487 iommu_unmap_page(domain, iova, PM_MAP_4k);
2488 iova += PAGE_SIZE;
2489 }
2490 2540
2491 iommu_flush_tlb_pde(domain); 2541 iommu_flush_tlb_pde(domain);
2492 2542
2493 mutex_unlock(&domain->api_lock); 2543 return get_order(unmap_size);
2494} 2544}
2495 2545
2496static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 2546static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2497 unsigned long iova) 2547 unsigned long iova)
2498{ 2548{
2499 struct protection_domain *domain = dom->priv; 2549 struct protection_domain *domain = dom->priv;
2500 unsigned long offset = iova & ~PAGE_MASK; 2550 unsigned long offset_mask;
2501 phys_addr_t paddr; 2551 phys_addr_t paddr;
2502 u64 *pte; 2552 u64 *pte, __pte;
2503 2553
2504 pte = fetch_pte(domain, iova, PM_MAP_4k); 2554 pte = fetch_pte(domain, iova);
2505 2555
2506 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 2556 if (!pte || !IOMMU_PTE_PRESENT(*pte))
2507 return 0; 2557 return 0;
2508 2558
2509 paddr = *pte & IOMMU_PAGE_MASK; 2559 if (PM_PTE_LEVEL(*pte) == 0)
2510 paddr |= offset; 2560 offset_mask = PAGE_SIZE - 1;
2561 else
2562 offset_mask = PTE_PAGE_SIZE(*pte) - 1;
2563
2564 __pte = *pte & PM_ADDR_MASK;
2565 paddr = (__pte & ~offset_mask) | (iova & offset_mask);
2511 2566
2512 return paddr; 2567 return paddr;
2513} 2568}
@@ -2523,8 +2578,8 @@ static struct iommu_ops amd_iommu_ops = {
2523 .domain_destroy = amd_iommu_domain_destroy, 2578 .domain_destroy = amd_iommu_domain_destroy,
2524 .attach_dev = amd_iommu_attach_device, 2579 .attach_dev = amd_iommu_attach_device,
2525 .detach_dev = amd_iommu_detach_device, 2580 .detach_dev = amd_iommu_detach_device,
2526 .map = amd_iommu_map_range, 2581 .map = amd_iommu_map,
2527 .unmap = amd_iommu_unmap_range, 2582 .unmap = amd_iommu_unmap,
2528 .iova_to_phys = amd_iommu_iova_to_phys, 2583 .iova_to_phys = amd_iommu_iova_to_phys,
2529 .domain_has_cap = amd_iommu_domain_has_cap, 2584 .domain_has_cap = amd_iommu_domain_has_cap,
2530}; 2585};
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
index 8ad4ffea6920..6e6b6a11b3ce 100644
--- a/drivers/base/iommu.c
+++ b/drivers/base/iommu.c
@@ -80,20 +80,6 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
80} 80}
81EXPORT_SYMBOL_GPL(iommu_detach_device); 81EXPORT_SYMBOL_GPL(iommu_detach_device);
82 82
83int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
84 phys_addr_t paddr, size_t size, int prot)
85{
86 return iommu_ops->map(domain, iova, paddr, size, prot);
87}
88EXPORT_SYMBOL_GPL(iommu_map_range);
89
90void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
91 size_t size)
92{
93 iommu_ops->unmap(domain, iova, size);
94}
95EXPORT_SYMBOL_GPL(iommu_unmap_range);
96
97phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 83phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
98 unsigned long iova) 84 unsigned long iova)
99{ 85{
@@ -107,3 +93,32 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
107 return iommu_ops->domain_has_cap(domain, cap); 93 return iommu_ops->domain_has_cap(domain, cap);
108} 94}
109EXPORT_SYMBOL_GPL(iommu_domain_has_cap); 95EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
96
97int iommu_map(struct iommu_domain *domain, unsigned long iova,
98 phys_addr_t paddr, int gfp_order, int prot)
99{
100 unsigned long invalid_mask;
101 size_t size;
102
103 size = 0x1000UL << gfp_order;
104 invalid_mask = size - 1;
105
106 BUG_ON((iova | paddr) & invalid_mask);
107
108 return iommu_ops->map(domain, iova, paddr, gfp_order, prot);
109}
110EXPORT_SYMBOL_GPL(iommu_map);
111
112int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
113{
114 unsigned long invalid_mask;
115 size_t size;
116
117 size = 0x1000UL << gfp_order;
118 invalid_mask = size - 1;
119
120 BUG_ON(iova & invalid_mask);
121
122 return iommu_ops->unmap(domain, iova, gfp_order);
123}
124EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 417312528ddf..371dc564e2e4 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3626,14 +3626,15 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
3626 domain_remove_one_dev_info(dmar_domain, pdev); 3626 domain_remove_one_dev_info(dmar_domain, pdev);
3627} 3627}
3628 3628
3629static int intel_iommu_map_range(struct iommu_domain *domain, 3629static int intel_iommu_map(struct iommu_domain *domain,
3630 unsigned long iova, phys_addr_t hpa, 3630 unsigned long iova, phys_addr_t hpa,
3631 size_t size, int iommu_prot) 3631 int gfp_order, int iommu_prot)
3632{ 3632{
3633 struct dmar_domain *dmar_domain = domain->priv; 3633 struct dmar_domain *dmar_domain = domain->priv;
3634 u64 max_addr; 3634 u64 max_addr;
3635 int addr_width; 3635 int addr_width;
3636 int prot = 0; 3636 int prot = 0;
3637 size_t size;
3637 int ret; 3638 int ret;
3638 3639
3639 if (iommu_prot & IOMMU_READ) 3640 if (iommu_prot & IOMMU_READ)
@@ -3643,6 +3644,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3643 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3644 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3644 prot |= DMA_PTE_SNP; 3645 prot |= DMA_PTE_SNP;
3645 3646
3647 size = PAGE_SIZE << gfp_order;
3646 max_addr = iova + size; 3648 max_addr = iova + size;
3647 if (dmar_domain->max_addr < max_addr) { 3649 if (dmar_domain->max_addr < max_addr) {
3648 int min_agaw; 3650 int min_agaw;
@@ -3669,19 +3671,19 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3669 return ret; 3671 return ret;
3670} 3672}
3671 3673
3672static void intel_iommu_unmap_range(struct iommu_domain *domain, 3674static int intel_iommu_unmap(struct iommu_domain *domain,
3673 unsigned long iova, size_t size) 3675 unsigned long iova, int gfp_order)
3674{ 3676{
3675 struct dmar_domain *dmar_domain = domain->priv; 3677 struct dmar_domain *dmar_domain = domain->priv;
3676 3678 size_t size = PAGE_SIZE << gfp_order;
3677 if (!size)
3678 return;
3679 3679
3680 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, 3680 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3681 (iova + size - 1) >> VTD_PAGE_SHIFT); 3681 (iova + size - 1) >> VTD_PAGE_SHIFT);
3682 3682
3683 if (dmar_domain->max_addr == iova + size) 3683 if (dmar_domain->max_addr == iova + size)
3684 dmar_domain->max_addr = iova; 3684 dmar_domain->max_addr = iova;
3685
3686 return gfp_order;
3685} 3687}
3686 3688
3687static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3689static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3714,8 +3716,8 @@ static struct iommu_ops intel_iommu_ops = {
3714 .domain_destroy = intel_iommu_domain_destroy, 3716 .domain_destroy = intel_iommu_domain_destroy,
3715 .attach_dev = intel_iommu_attach_device, 3717 .attach_dev = intel_iommu_attach_device,
3716 .detach_dev = intel_iommu_detach_device, 3718 .detach_dev = intel_iommu_detach_device,
3717 .map = intel_iommu_map_range, 3719 .map = intel_iommu_map,
3718 .unmap = intel_iommu_unmap_range, 3720 .unmap = intel_iommu_unmap,
3719 .iova_to_phys = intel_iommu_iova_to_phys, 3721 .iova_to_phys = intel_iommu_iova_to_phys,
3720 .domain_has_cap = intel_iommu_domain_has_cap, 3722 .domain_has_cap = intel_iommu_domain_has_cap,
3721}; 3723};
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3af4ffd591b9..be22ad83689c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -37,9 +37,9 @@ struct iommu_ops {
37 int (*attach_dev)(struct iommu_domain *domain, struct device *dev); 37 int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
38 void (*detach_dev)(struct iommu_domain *domain, struct device *dev); 38 void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
39 int (*map)(struct iommu_domain *domain, unsigned long iova, 39 int (*map)(struct iommu_domain *domain, unsigned long iova,
40 phys_addr_t paddr, size_t size, int prot); 40 phys_addr_t paddr, int gfp_order, int prot);
41 void (*unmap)(struct iommu_domain *domain, unsigned long iova, 41 int (*unmap)(struct iommu_domain *domain, unsigned long iova,
42 size_t size); 42 int gfp_order);
43 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, 43 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
44 unsigned long iova); 44 unsigned long iova);
45 int (*domain_has_cap)(struct iommu_domain *domain, 45 int (*domain_has_cap)(struct iommu_domain *domain,
@@ -56,10 +56,10 @@ extern int iommu_attach_device(struct iommu_domain *domain,
56 struct device *dev); 56 struct device *dev);
57extern void iommu_detach_device(struct iommu_domain *domain, 57extern void iommu_detach_device(struct iommu_domain *domain,
58 struct device *dev); 58 struct device *dev);
59extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, 59extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
60 phys_addr_t paddr, size_t size, int prot); 60 phys_addr_t paddr, int gfp_order, int prot);
61extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, 61extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
62 size_t size); 62 int gfp_order);
63extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 63extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
64 unsigned long iova); 64 unsigned long iova);
65extern int iommu_domain_has_cap(struct iommu_domain *domain, 65extern int iommu_domain_has_cap(struct iommu_domain *domain,
@@ -96,16 +96,16 @@ static inline void iommu_detach_device(struct iommu_domain *domain,
96{ 96{
97} 97}
98 98
99static inline int iommu_map_range(struct iommu_domain *domain, 99static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
100 unsigned long iova, phys_addr_t paddr, 100 phys_addr_t paddr, int gfp_order, int prot)
101 size_t size, int prot)
102{ 101{
103 return -ENODEV; 102 return -ENODEV;
104} 103}
105 104
106static inline void iommu_unmap_range(struct iommu_domain *domain, 105static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
107 unsigned long iova, size_t size) 106 int gfp_order)
108{ 107{
108 return -ENODEV;
109} 109}
110 110
111static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 111static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 80fd3ad3b2de..11692b9e8830 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,12 +32,30 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
32static void kvm_iommu_put_pages(struct kvm *kvm, 32static void kvm_iommu_put_pages(struct kvm *kvm,
33 gfn_t base_gfn, unsigned long npages); 33 gfn_t base_gfn, unsigned long npages);
34 34
35static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
36 gfn_t gfn, unsigned long size)
37{
38 gfn_t end_gfn;
39 pfn_t pfn;
40
41 pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
42 end_gfn = gfn + (size >> PAGE_SHIFT);
43 gfn += 1;
44
45 if (is_error_pfn(pfn))
46 return pfn;
47
48 while (gfn < end_gfn)
49 gfn_to_pfn_memslot(kvm, slot, gfn++);
50
51 return pfn;
52}
53
35int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) 54int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
36{ 55{
37 gfn_t gfn = slot->base_gfn; 56 gfn_t gfn, end_gfn;
38 unsigned long npages = slot->npages;
39 pfn_t pfn; 57 pfn_t pfn;
40 int i, r = 0; 58 int r = 0;
41 struct iommu_domain *domain = kvm->arch.iommu_domain; 59 struct iommu_domain *domain = kvm->arch.iommu_domain;
42 int flags; 60 int flags;
43 61
@@ -45,31 +63,62 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
45 if (!domain) 63 if (!domain)
46 return 0; 64 return 0;
47 65
66 gfn = slot->base_gfn;
67 end_gfn = gfn + slot->npages;
68
48 flags = IOMMU_READ | IOMMU_WRITE; 69 flags = IOMMU_READ | IOMMU_WRITE;
49 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) 70 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
50 flags |= IOMMU_CACHE; 71 flags |= IOMMU_CACHE;
51 72
52 for (i = 0; i < npages; i++) { 73
53 /* check if already mapped */ 74 while (gfn < end_gfn) {
54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 75 unsigned long page_size;
76
77 /* Check if already mapped */
78 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
79 gfn += 1;
80 continue;
81 }
82
83 /* Get the page size we could use to map */
84 page_size = kvm_host_page_size(kvm, gfn);
85
86 /* Make sure the page_size does not exceed the memslot */
87 while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
88 page_size >>= 1;
89
90 /* Make sure gfn is aligned to the page size we want to map */
91 while ((gfn << PAGE_SHIFT) & (page_size - 1))
92 page_size >>= 1;
93
94 /*
95 * Pin all pages we are about to map in memory. This is
96 * important because we unmap and unpin in 4kb steps later.
97 */
98 pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
99 if (is_error_pfn(pfn)) {
100 gfn += 1;
55 continue; 101 continue;
102 }
56 103
57 pfn = gfn_to_pfn_memslot(kvm, slot, gfn); 104 /* Map into IO address space */
58 r = iommu_map_range(domain, 105 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
59 gfn_to_gpa(gfn), 106 get_order(page_size), flags);
60 pfn_to_hpa(pfn),
61 PAGE_SIZE, flags);
62 if (r) { 107 if (r) {
63 printk(KERN_ERR "kvm_iommu_map_address:" 108 printk(KERN_ERR "kvm_iommu_map_address:"
64 "iommu failed to map pfn=%lx\n", pfn); 109 "iommu failed to map pfn=%lx\n", pfn);
65 goto unmap_pages; 110 goto unmap_pages;
66 } 111 }
67 gfn++; 112
113 gfn += page_size >> PAGE_SHIFT;
114
115
68 } 116 }
117
69 return 0; 118 return 0;
70 119
71unmap_pages: 120unmap_pages:
72 kvm_iommu_put_pages(kvm, slot->base_gfn, i); 121 kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
73 return r; 122 return r;
74} 123}
75 124
@@ -189,27 +238,47 @@ out_unmap:
189 return r; 238 return r;
190} 239}
191 240
241static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
242{
243 unsigned long i;
244
245 for (i = 0; i < npages; ++i)
246 kvm_release_pfn_clean(pfn + i);
247}
248
192static void kvm_iommu_put_pages(struct kvm *kvm, 249static void kvm_iommu_put_pages(struct kvm *kvm,
193 gfn_t base_gfn, unsigned long npages) 250 gfn_t base_gfn, unsigned long npages)
194{ 251{
195 gfn_t gfn = base_gfn; 252 struct iommu_domain *domain;
253 gfn_t end_gfn, gfn;
196 pfn_t pfn; 254 pfn_t pfn;
197 struct iommu_domain *domain = kvm->arch.iommu_domain;
198 unsigned long i;
199 u64 phys; 255 u64 phys;
200 256
257 domain = kvm->arch.iommu_domain;
258 end_gfn = base_gfn + npages;
259 gfn = base_gfn;
260
201 /* check if iommu exists and in use */ 261 /* check if iommu exists and in use */
202 if (!domain) 262 if (!domain)
203 return; 263 return;
204 264
205 for (i = 0; i < npages; i++) { 265 while (gfn < end_gfn) {
266 unsigned long unmap_pages;
267 int order;
268
269 /* Get physical address */
206 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); 270 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
207 pfn = phys >> PAGE_SHIFT; 271 pfn = phys >> PAGE_SHIFT;
208 kvm_release_pfn_clean(pfn); 272
209 gfn++; 273 /* Unmap address from IO address space */
210 } 274 order = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
275 unmap_pages = 1ULL << order;
211 276
212 iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); 277 /* Unpin all pages we just unmapped to not leak any memory */
278 kvm_unpin_pages(kvm, pfn, unmap_pages);
279
280 gfn += unmap_pages;
281 }
213} 282}
214 283
215static int kvm_iommu_unmap_memslots(struct kvm *kvm) 284static int kvm_iommu_unmap_memslots(struct kvm *kvm)