aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 10:22:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 10:22:37 -0400
commit8123d8f17d8ba9d67e556688e4f025456ca97842 (patch)
tree1d15088a32644e464ad3536ad7bec775050065eb
parent06ee772043c7ad125f2c2e6a08dc563706f39e8d (diff)
parent795e74f7a69f9c08afa4fa7c86cc4f18a62bd630 (diff)
Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86/amd-iommu: Add amd_iommu=off command line option iommu-api: Remove iommu_{un}map_range functions x86/amd-iommu: Implement ->{un}map callbacks for iommu-api x86/amd-iommu: Make amd_iommu_iova_to_phys aware of multiple page sizes x86/amd-iommu: Make iommu_unmap_page and fetch_pte aware of page sizes x86/amd-iommu: Make iommu_map_page and alloc_pte aware of page sizes kvm: Change kvm_iommu_map_pages to map large pages VT-d: Change {un}map_range functions to implement {un}map interface iommu-api: Add ->{un}map callbacks to iommu_ops iommu-api: Add iommu_map and iommu_unmap functions iommu-api: Rename ->{un}map function pointers to ->{un}map_range
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h34
-rw-r--r--arch/x86/kernel/amd_iommu.c197
-rw-r--r--arch/x86/kernel/amd_iommu_init.c6
-rw-r--r--drivers/base/iommu.c43
-rw-r--r--drivers/pci/intel-iommu.c22
-rw-r--r--include/linux/iommu.h24
-rw-r--r--virt/kvm/iommu.c113
8 files changed, 312 insertions, 129 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 839b21b0699a..0c6c56076d19 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -324,6 +324,8 @@ and is between 256 and 4096 characters. It is defined in the file
324 they are unmapped. Otherwise they are 324 they are unmapped. Otherwise they are
325 flushed before they will be reused, which 325 flushed before they will be reused, which
326 is a lot of faster 326 is a lot of faster
327 off - do not initialize any AMD IOMMU found in
328 the system
327 329
328 amijoy.map= [HW,JOY] Amiga joystick support 330 amijoy.map= [HW,JOY] Amiga joystick support
329 Map of devices attached to JOY0DAT and JOY1DAT 331 Map of devices attached to JOY0DAT and JOY1DAT
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 86a0ff0aeac7..7014e88bc779 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -174,6 +174,40 @@
174 (~((1ULL << (12 + ((lvl) * 9))) - 1))) 174 (~((1ULL << (12 + ((lvl) * 9))) - 1)))
175#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) 175#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
176 176
177/*
178 * Returns the page table level to use for a given page size
179 * Pagesize is expected to be a power-of-two
180 */
181#define PAGE_SIZE_LEVEL(pagesize) \
182 ((__ffs(pagesize) - 12) / 9)
183/*
184 * Returns the number of ptes to use for a given page size
185 * Pagesize is expected to be a power-of-two
186 */
187#define PAGE_SIZE_PTE_COUNT(pagesize) \
188 (1ULL << ((__ffs(pagesize) - 12) % 9))
189
190/*
191 * Aligns a given io-virtual address to a given page size
192 * Pagesize is expected to be a power-of-two
193 */
194#define PAGE_SIZE_ALIGN(address, pagesize) \
195 ((address) & ~((pagesize) - 1))
196/*
197 * Creates an IOMMU PTE for an address an a given pagesize
198 * The PTE has no permission bits set
199 * Pagesize is expected to be a power-of-two larger than 4096
200 */
201#define PAGE_SIZE_PTE(address, pagesize) \
202 (((address) | ((pagesize) - 1)) & \
203 (~(pagesize >> 1)) & PM_ADDR_MASK)
204
205/*
206 * Takes a PTE value with mode=0x07 and returns the page size it maps
207 */
208#define PTE_PAGE_SIZE(pte) \
209 (1ULL << (1 + ffz(((pte) | 0xfffULL))))
210
177#define IOMMU_PTE_P (1ULL << 0) 211#define IOMMU_PTE_P (1ULL << 0)
178#define IOMMU_PTE_TV (1ULL << 1) 212#define IOMMU_PTE_TV (1ULL << 1)
179#define IOMMU_PTE_U (1ULL << 59) 213#define IOMMU_PTE_U (1ULL << 59)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f854d89b7edf..fa5a1474cd18 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -731,18 +731,22 @@ static bool increase_address_space(struct protection_domain *domain,
731 731
732static u64 *alloc_pte(struct protection_domain *domain, 732static u64 *alloc_pte(struct protection_domain *domain,
733 unsigned long address, 733 unsigned long address,
734 int end_lvl, 734 unsigned long page_size,
735 u64 **pte_page, 735 u64 **pte_page,
736 gfp_t gfp) 736 gfp_t gfp)
737{ 737{
738 int level, end_lvl;
738 u64 *pte, *page; 739 u64 *pte, *page;
739 int level; 740
741 BUG_ON(!is_power_of_2(page_size));
740 742
741 while (address > PM_LEVEL_SIZE(domain->mode)) 743 while (address > PM_LEVEL_SIZE(domain->mode))
742 increase_address_space(domain, gfp); 744 increase_address_space(domain, gfp);
743 745
744 level = domain->mode - 1; 746 level = domain->mode - 1;
745 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; 747 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
748 address = PAGE_SIZE_ALIGN(address, page_size);
749 end_lvl = PAGE_SIZE_LEVEL(page_size);
746 750
747 while (level > end_lvl) { 751 while (level > end_lvl) {
748 if (!IOMMU_PTE_PRESENT(*pte)) { 752 if (!IOMMU_PTE_PRESENT(*pte)) {
@@ -752,6 +756,10 @@ static u64 *alloc_pte(struct protection_domain *domain,
752 *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); 756 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
753 } 757 }
754 758
759 /* No level skipping support yet */
760 if (PM_PTE_LEVEL(*pte) != level)
761 return NULL;
762
755 level -= 1; 763 level -= 1;
756 764
757 pte = IOMMU_PTE_PAGE(*pte); 765 pte = IOMMU_PTE_PAGE(*pte);
@@ -769,28 +777,47 @@ static u64 *alloc_pte(struct protection_domain *domain,
769 * This function checks if there is a PTE for a given dma address. If 777 * This function checks if there is a PTE for a given dma address. If
770 * there is one, it returns the pointer to it. 778 * there is one, it returns the pointer to it.
771 */ 779 */
772static u64 *fetch_pte(struct protection_domain *domain, 780static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
773 unsigned long address, int map_size)
774{ 781{
775 int level; 782 int level;
776 u64 *pte; 783 u64 *pte;
777 784
778 level = domain->mode - 1; 785 if (address > PM_LEVEL_SIZE(domain->mode))
779 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; 786 return NULL;
787
788 level = domain->mode - 1;
789 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
780 790
781 while (level > map_size) { 791 while (level > 0) {
792
793 /* Not Present */
782 if (!IOMMU_PTE_PRESENT(*pte)) 794 if (!IOMMU_PTE_PRESENT(*pte))
783 return NULL; 795 return NULL;
784 796
797 /* Large PTE */
798 if (PM_PTE_LEVEL(*pte) == 0x07) {
799 unsigned long pte_mask, __pte;
800
801 /*
802 * If we have a series of large PTEs, make
803 * sure to return a pointer to the first one.
804 */
805 pte_mask = PTE_PAGE_SIZE(*pte);
806 pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
807 __pte = ((unsigned long)pte) & pte_mask;
808
809 return (u64 *)__pte;
810 }
811
812 /* No level skipping support yet */
813 if (PM_PTE_LEVEL(*pte) != level)
814 return NULL;
815
785 level -= 1; 816 level -= 1;
786 817
818 /* Walk to the next level */
787 pte = IOMMU_PTE_PAGE(*pte); 819 pte = IOMMU_PTE_PAGE(*pte);
788 pte = &pte[PM_LEVEL_INDEX(level, address)]; 820 pte = &pte[PM_LEVEL_INDEX(level, address)];
789
790 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
791 pte = NULL;
792 break;
793 }
794 } 821 }
795 822
796 return pte; 823 return pte;
@@ -807,44 +834,84 @@ static int iommu_map_page(struct protection_domain *dom,
807 unsigned long bus_addr, 834 unsigned long bus_addr,
808 unsigned long phys_addr, 835 unsigned long phys_addr,
809 int prot, 836 int prot,
810 int map_size) 837 unsigned long page_size)
811{ 838{
812 u64 __pte, *pte; 839 u64 __pte, *pte;
813 840 int i, count;
814 bus_addr = PAGE_ALIGN(bus_addr);
815 phys_addr = PAGE_ALIGN(phys_addr);
816
817 BUG_ON(!PM_ALIGNED(map_size, bus_addr));
818 BUG_ON(!PM_ALIGNED(map_size, phys_addr));
819 841
820 if (!(prot & IOMMU_PROT_MASK)) 842 if (!(prot & IOMMU_PROT_MASK))
821 return -EINVAL; 843 return -EINVAL;
822 844
823 pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); 845 bus_addr = PAGE_ALIGN(bus_addr);
846 phys_addr = PAGE_ALIGN(phys_addr);
847 count = PAGE_SIZE_PTE_COUNT(page_size);
848 pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
849
850 for (i = 0; i < count; ++i)
851 if (IOMMU_PTE_PRESENT(pte[i]))
852 return -EBUSY;
824 853
825 if (IOMMU_PTE_PRESENT(*pte)) 854 if (page_size > PAGE_SIZE) {
826 return -EBUSY; 855 __pte = PAGE_SIZE_PTE(phys_addr, page_size);
856 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
857 } else
858 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
827 859
828 __pte = phys_addr | IOMMU_PTE_P;
829 if (prot & IOMMU_PROT_IR) 860 if (prot & IOMMU_PROT_IR)
830 __pte |= IOMMU_PTE_IR; 861 __pte |= IOMMU_PTE_IR;
831 if (prot & IOMMU_PROT_IW) 862 if (prot & IOMMU_PROT_IW)
832 __pte |= IOMMU_PTE_IW; 863 __pte |= IOMMU_PTE_IW;
833 864
834 *pte = __pte; 865 for (i = 0; i < count; ++i)
866 pte[i] = __pte;
835 867
836 update_domain(dom); 868 update_domain(dom);
837 869
838 return 0; 870 return 0;
839} 871}
840 872
841static void iommu_unmap_page(struct protection_domain *dom, 873static unsigned long iommu_unmap_page(struct protection_domain *dom,
842 unsigned long bus_addr, int map_size) 874 unsigned long bus_addr,
875 unsigned long page_size)
843{ 876{
844 u64 *pte = fetch_pte(dom, bus_addr, map_size); 877 unsigned long long unmap_size, unmapped;
878 u64 *pte;
879
880 BUG_ON(!is_power_of_2(page_size));
881
882 unmapped = 0;
845 883
846 if (pte) 884 while (unmapped < page_size) {
847 *pte = 0; 885
886 pte = fetch_pte(dom, bus_addr);
887
888 if (!pte) {
889 /*
890 * No PTE for this address
891 * move forward in 4kb steps
892 */
893 unmap_size = PAGE_SIZE;
894 } else if (PM_PTE_LEVEL(*pte) == 0) {
895 /* 4kb PTE found for this address */
896 unmap_size = PAGE_SIZE;
897 *pte = 0ULL;
898 } else {
899 int count, i;
900
901 /* Large PTE found which maps this address */
902 unmap_size = PTE_PAGE_SIZE(*pte);
903 count = PAGE_SIZE_PTE_COUNT(unmap_size);
904 for (i = 0; i < count; i++)
905 pte[i] = 0ULL;
906 }
907
908 bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
909 unmapped += unmap_size;
910 }
911
912 BUG_ON(!is_power_of_2(unmapped));
913
914 return unmapped;
848} 915}
849 916
850/* 917/*
@@ -878,7 +945,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
878 for (addr = e->address_start; addr < e->address_end; 945 for (addr = e->address_start; addr < e->address_end;
879 addr += PAGE_SIZE) { 946 addr += PAGE_SIZE) {
880 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, 947 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
881 PM_MAP_4k); 948 PAGE_SIZE);
882 if (ret) 949 if (ret)
883 return ret; 950 return ret;
884 /* 951 /*
@@ -1006,7 +1073,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
1006 u64 *pte, *pte_page; 1073 u64 *pte, *pte_page;
1007 1074
1008 for (i = 0; i < num_ptes; ++i) { 1075 for (i = 0; i < num_ptes; ++i) {
1009 pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, 1076 pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1010 &pte_page, gfp); 1077 &pte_page, gfp);
1011 if (!pte) 1078 if (!pte)
1012 goto out_free; 1079 goto out_free;
@@ -1042,7 +1109,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
1042 for (i = dma_dom->aperture[index]->offset; 1109 for (i = dma_dom->aperture[index]->offset;
1043 i < dma_dom->aperture_size; 1110 i < dma_dom->aperture_size;
1044 i += PAGE_SIZE) { 1111 i += PAGE_SIZE) {
1045 u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); 1112 u64 *pte = fetch_pte(&dma_dom->domain, i);
1046 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 1113 if (!pte || !IOMMU_PTE_PRESENT(*pte))
1047 continue; 1114 continue;
1048 1115
@@ -1712,7 +1779,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1712 1779
1713 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; 1780 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1714 if (!pte) { 1781 if (!pte) {
1715 pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, 1782 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
1716 GFP_ATOMIC); 1783 GFP_ATOMIC);
1717 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; 1784 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1718 } else 1785 } else
@@ -2439,12 +2506,11 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
2439 return ret; 2506 return ret;
2440} 2507}
2441 2508
2442static int amd_iommu_map_range(struct iommu_domain *dom, 2509static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2443 unsigned long iova, phys_addr_t paddr, 2510 phys_addr_t paddr, int gfp_order, int iommu_prot)
2444 size_t size, int iommu_prot)
2445{ 2511{
2512 unsigned long page_size = 0x1000UL << gfp_order;
2446 struct protection_domain *domain = dom->priv; 2513 struct protection_domain *domain = dom->priv;
2447 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
2448 int prot = 0; 2514 int prot = 0;
2449 int ret; 2515 int ret;
2450 2516
@@ -2453,61 +2519,50 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
2453 if (iommu_prot & IOMMU_WRITE) 2519 if (iommu_prot & IOMMU_WRITE)
2454 prot |= IOMMU_PROT_IW; 2520 prot |= IOMMU_PROT_IW;
2455 2521
2456 iova &= PAGE_MASK;
2457 paddr &= PAGE_MASK;
2458
2459 mutex_lock(&domain->api_lock); 2522 mutex_lock(&domain->api_lock);
2460 2523 ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2461 for (i = 0; i < npages; ++i) {
2462 ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
2463 if (ret)
2464 return ret;
2465
2466 iova += PAGE_SIZE;
2467 paddr += PAGE_SIZE;
2468 }
2469
2470 mutex_unlock(&domain->api_lock); 2524 mutex_unlock(&domain->api_lock);
2471 2525
2472 return 0; 2526 return ret;
2473} 2527}
2474 2528
2475static void amd_iommu_unmap_range(struct iommu_domain *dom, 2529static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2476 unsigned long iova, size_t size) 2530 int gfp_order)
2477{ 2531{
2478
2479 struct protection_domain *domain = dom->priv; 2532 struct protection_domain *domain = dom->priv;
2480 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); 2533 unsigned long page_size, unmap_size;
2481 2534
2482 iova &= PAGE_MASK; 2535 page_size = 0x1000UL << gfp_order;
2483 2536
2484 mutex_lock(&domain->api_lock); 2537 mutex_lock(&domain->api_lock);
2485 2538 unmap_size = iommu_unmap_page(domain, iova, page_size);
2486 for (i = 0; i < npages; ++i) { 2539 mutex_unlock(&domain->api_lock);
2487 iommu_unmap_page(domain, iova, PM_MAP_4k);
2488 iova += PAGE_SIZE;
2489 }
2490 2540
2491 iommu_flush_tlb_pde(domain); 2541 iommu_flush_tlb_pde(domain);
2492 2542
2493 mutex_unlock(&domain->api_lock); 2543 return get_order(unmap_size);
2494} 2544}
2495 2545
2496static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 2546static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2497 unsigned long iova) 2547 unsigned long iova)
2498{ 2548{
2499 struct protection_domain *domain = dom->priv; 2549 struct protection_domain *domain = dom->priv;
2500 unsigned long offset = iova & ~PAGE_MASK; 2550 unsigned long offset_mask;
2501 phys_addr_t paddr; 2551 phys_addr_t paddr;
2502 u64 *pte; 2552 u64 *pte, __pte;
2503 2553
2504 pte = fetch_pte(domain, iova, PM_MAP_4k); 2554 pte = fetch_pte(domain, iova);
2505 2555
2506 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 2556 if (!pte || !IOMMU_PTE_PRESENT(*pte))
2507 return 0; 2557 return 0;
2508 2558
2509 paddr = *pte & IOMMU_PAGE_MASK; 2559 if (PM_PTE_LEVEL(*pte) == 0)
2510 paddr |= offset; 2560 offset_mask = PAGE_SIZE - 1;
2561 else
2562 offset_mask = PTE_PAGE_SIZE(*pte) - 1;
2563
2564 __pte = *pte & PM_ADDR_MASK;
2565 paddr = (__pte & ~offset_mask) | (iova & offset_mask);
2511 2566
2512 return paddr; 2567 return paddr;
2513} 2568}
@@ -2523,8 +2578,8 @@ static struct iommu_ops amd_iommu_ops = {
2523 .domain_destroy = amd_iommu_domain_destroy, 2578 .domain_destroy = amd_iommu_domain_destroy,
2524 .attach_dev = amd_iommu_attach_device, 2579 .attach_dev = amd_iommu_attach_device,
2525 .detach_dev = amd_iommu_detach_device, 2580 .detach_dev = amd_iommu_detach_device,
2526 .map = amd_iommu_map_range, 2581 .map = amd_iommu_map,
2527 .unmap = amd_iommu_unmap_range, 2582 .unmap = amd_iommu_unmap,
2528 .iova_to_phys = amd_iommu_iova_to_phys, 2583 .iova_to_phys = amd_iommu_iova_to_phys,
2529 .domain_has_cap = amd_iommu_domain_has_cap, 2584 .domain_has_cap = amd_iommu_domain_has_cap,
2530}; 2585};
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 6360abf993d4..3bacb4d0844c 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -120,6 +120,7 @@ struct ivmd_header {
120bool amd_iommu_dump; 120bool amd_iommu_dump;
121 121
122static int __initdata amd_iommu_detected; 122static int __initdata amd_iommu_detected;
123static bool __initdata amd_iommu_disabled;
123 124
124u16 amd_iommu_last_bdf; /* largest PCI device id we have 125u16 amd_iommu_last_bdf; /* largest PCI device id we have
125 to handle */ 126 to handle */
@@ -1372,6 +1373,9 @@ void __init amd_iommu_detect(void)
1372 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 1373 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1373 return; 1374 return;
1374 1375
1376 if (amd_iommu_disabled)
1377 return;
1378
1375 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1379 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1376 iommu_detected = 1; 1380 iommu_detected = 1;
1377 amd_iommu_detected = 1; 1381 amd_iommu_detected = 1;
@@ -1401,6 +1405,8 @@ static int __init parse_amd_iommu_options(char *str)
1401 for (; *str; ++str) { 1405 for (; *str; ++str) {
1402 if (strncmp(str, "fullflush", 9) == 0) 1406 if (strncmp(str, "fullflush", 9) == 0)
1403 amd_iommu_unmap_flush = true; 1407 amd_iommu_unmap_flush = true;
1408 if (strncmp(str, "off", 3) == 0)
1409 amd_iommu_disabled = true;
1404 } 1410 }
1405 1411
1406 return 1; 1412 return 1;
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
index 8ad4ffea6920..6e6b6a11b3ce 100644
--- a/drivers/base/iommu.c
+++ b/drivers/base/iommu.c
@@ -80,20 +80,6 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
80} 80}
81EXPORT_SYMBOL_GPL(iommu_detach_device); 81EXPORT_SYMBOL_GPL(iommu_detach_device);
82 82
83int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
84 phys_addr_t paddr, size_t size, int prot)
85{
86 return iommu_ops->map(domain, iova, paddr, size, prot);
87}
88EXPORT_SYMBOL_GPL(iommu_map_range);
89
90void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
91 size_t size)
92{
93 iommu_ops->unmap(domain, iova, size);
94}
95EXPORT_SYMBOL_GPL(iommu_unmap_range);
96
97phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 83phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
98 unsigned long iova) 84 unsigned long iova)
99{ 85{
@@ -107,3 +93,32 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
107 return iommu_ops->domain_has_cap(domain, cap); 93 return iommu_ops->domain_has_cap(domain, cap);
108} 94}
109EXPORT_SYMBOL_GPL(iommu_domain_has_cap); 95EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
96
97int iommu_map(struct iommu_domain *domain, unsigned long iova,
98 phys_addr_t paddr, int gfp_order, int prot)
99{
100 unsigned long invalid_mask;
101 size_t size;
102
103 size = 0x1000UL << gfp_order;
104 invalid_mask = size - 1;
105
106 BUG_ON((iova | paddr) & invalid_mask);
107
108 return iommu_ops->map(domain, iova, paddr, gfp_order, prot);
109}
110EXPORT_SYMBOL_GPL(iommu_map);
111
112int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
113{
114 unsigned long invalid_mask;
115 size_t size;
116
117 size = 0x1000UL << gfp_order;
118 invalid_mask = size - 1;
119
120 BUG_ON(iova & invalid_mask);
121
122 return iommu_ops->unmap(domain, iova, gfp_order);
123}
124EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 417312528ddf..371dc564e2e4 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3626,14 +3626,15 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
3626 domain_remove_one_dev_info(dmar_domain, pdev); 3626 domain_remove_one_dev_info(dmar_domain, pdev);
3627} 3627}
3628 3628
3629static int intel_iommu_map_range(struct iommu_domain *domain, 3629static int intel_iommu_map(struct iommu_domain *domain,
3630 unsigned long iova, phys_addr_t hpa, 3630 unsigned long iova, phys_addr_t hpa,
3631 size_t size, int iommu_prot) 3631 int gfp_order, int iommu_prot)
3632{ 3632{
3633 struct dmar_domain *dmar_domain = domain->priv; 3633 struct dmar_domain *dmar_domain = domain->priv;
3634 u64 max_addr; 3634 u64 max_addr;
3635 int addr_width; 3635 int addr_width;
3636 int prot = 0; 3636 int prot = 0;
3637 size_t size;
3637 int ret; 3638 int ret;
3638 3639
3639 if (iommu_prot & IOMMU_READ) 3640 if (iommu_prot & IOMMU_READ)
@@ -3643,6 +3644,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3643 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3644 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3644 prot |= DMA_PTE_SNP; 3645 prot |= DMA_PTE_SNP;
3645 3646
3647 size = PAGE_SIZE << gfp_order;
3646 max_addr = iova + size; 3648 max_addr = iova + size;
3647 if (dmar_domain->max_addr < max_addr) { 3649 if (dmar_domain->max_addr < max_addr) {
3648 int min_agaw; 3650 int min_agaw;
@@ -3669,19 +3671,19 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3669 return ret; 3671 return ret;
3670} 3672}
3671 3673
3672static void intel_iommu_unmap_range(struct iommu_domain *domain, 3674static int intel_iommu_unmap(struct iommu_domain *domain,
3673 unsigned long iova, size_t size) 3675 unsigned long iova, int gfp_order)
3674{ 3676{
3675 struct dmar_domain *dmar_domain = domain->priv; 3677 struct dmar_domain *dmar_domain = domain->priv;
3676 3678 size_t size = PAGE_SIZE << gfp_order;
3677 if (!size)
3678 return;
3679 3679
3680 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, 3680 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3681 (iova + size - 1) >> VTD_PAGE_SHIFT); 3681 (iova + size - 1) >> VTD_PAGE_SHIFT);
3682 3682
3683 if (dmar_domain->max_addr == iova + size) 3683 if (dmar_domain->max_addr == iova + size)
3684 dmar_domain->max_addr = iova; 3684 dmar_domain->max_addr = iova;
3685
3686 return gfp_order;
3685} 3687}
3686 3688
3687static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3689static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3714,8 +3716,8 @@ static struct iommu_ops intel_iommu_ops = {
3714 .domain_destroy = intel_iommu_domain_destroy, 3716 .domain_destroy = intel_iommu_domain_destroy,
3715 .attach_dev = intel_iommu_attach_device, 3717 .attach_dev = intel_iommu_attach_device,
3716 .detach_dev = intel_iommu_detach_device, 3718 .detach_dev = intel_iommu_detach_device,
3717 .map = intel_iommu_map_range, 3719 .map = intel_iommu_map,
3718 .unmap = intel_iommu_unmap_range, 3720 .unmap = intel_iommu_unmap,
3719 .iova_to_phys = intel_iommu_iova_to_phys, 3721 .iova_to_phys = intel_iommu_iova_to_phys,
3720 .domain_has_cap = intel_iommu_domain_has_cap, 3722 .domain_has_cap = intel_iommu_domain_has_cap,
3721}; 3723};
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3af4ffd591b9..be22ad83689c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -37,9 +37,9 @@ struct iommu_ops {
37 int (*attach_dev)(struct iommu_domain *domain, struct device *dev); 37 int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
38 void (*detach_dev)(struct iommu_domain *domain, struct device *dev); 38 void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
39 int (*map)(struct iommu_domain *domain, unsigned long iova, 39 int (*map)(struct iommu_domain *domain, unsigned long iova,
40 phys_addr_t paddr, size_t size, int prot); 40 phys_addr_t paddr, int gfp_order, int prot);
41 void (*unmap)(struct iommu_domain *domain, unsigned long iova, 41 int (*unmap)(struct iommu_domain *domain, unsigned long iova,
42 size_t size); 42 int gfp_order);
43 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, 43 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
44 unsigned long iova); 44 unsigned long iova);
45 int (*domain_has_cap)(struct iommu_domain *domain, 45 int (*domain_has_cap)(struct iommu_domain *domain,
@@ -56,10 +56,10 @@ extern int iommu_attach_device(struct iommu_domain *domain,
56 struct device *dev); 56 struct device *dev);
57extern void iommu_detach_device(struct iommu_domain *domain, 57extern void iommu_detach_device(struct iommu_domain *domain,
58 struct device *dev); 58 struct device *dev);
59extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, 59extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
60 phys_addr_t paddr, size_t size, int prot); 60 phys_addr_t paddr, int gfp_order, int prot);
61extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, 61extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
62 size_t size); 62 int gfp_order);
63extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 63extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
64 unsigned long iova); 64 unsigned long iova);
65extern int iommu_domain_has_cap(struct iommu_domain *domain, 65extern int iommu_domain_has_cap(struct iommu_domain *domain,
@@ -96,16 +96,16 @@ static inline void iommu_detach_device(struct iommu_domain *domain,
96{ 96{
97} 97}
98 98
99static inline int iommu_map_range(struct iommu_domain *domain, 99static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
100 unsigned long iova, phys_addr_t paddr, 100 phys_addr_t paddr, int gfp_order, int prot)
101 size_t size, int prot)
102{ 101{
103 return -ENODEV; 102 return -ENODEV;
104} 103}
105 104
106static inline void iommu_unmap_range(struct iommu_domain *domain, 105static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
107 unsigned long iova, size_t size) 106 int gfp_order)
108{ 107{
108 return -ENODEV;
109} 109}
110 110
111static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 111static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 80fd3ad3b2de..11692b9e8830 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,12 +32,30 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
32static void kvm_iommu_put_pages(struct kvm *kvm, 32static void kvm_iommu_put_pages(struct kvm *kvm,
33 gfn_t base_gfn, unsigned long npages); 33 gfn_t base_gfn, unsigned long npages);
34 34
35static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
36 gfn_t gfn, unsigned long size)
37{
38 gfn_t end_gfn;
39 pfn_t pfn;
40
41 pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
42 end_gfn = gfn + (size >> PAGE_SHIFT);
43 gfn += 1;
44
45 if (is_error_pfn(pfn))
46 return pfn;
47
48 while (gfn < end_gfn)
49 gfn_to_pfn_memslot(kvm, slot, gfn++);
50
51 return pfn;
52}
53
35int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) 54int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
36{ 55{
37 gfn_t gfn = slot->base_gfn; 56 gfn_t gfn, end_gfn;
38 unsigned long npages = slot->npages;
39 pfn_t pfn; 57 pfn_t pfn;
40 int i, r = 0; 58 int r = 0;
41 struct iommu_domain *domain = kvm->arch.iommu_domain; 59 struct iommu_domain *domain = kvm->arch.iommu_domain;
42 int flags; 60 int flags;
43 61
@@ -45,31 +63,62 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
45 if (!domain) 63 if (!domain)
46 return 0; 64 return 0;
47 65
66 gfn = slot->base_gfn;
67 end_gfn = gfn + slot->npages;
68
48 flags = IOMMU_READ | IOMMU_WRITE; 69 flags = IOMMU_READ | IOMMU_WRITE;
49 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) 70 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
50 flags |= IOMMU_CACHE; 71 flags |= IOMMU_CACHE;
51 72
52 for (i = 0; i < npages; i++) { 73
53 /* check if already mapped */ 74 while (gfn < end_gfn) {
54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 75 unsigned long page_size;
76
77 /* Check if already mapped */
78 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
79 gfn += 1;
80 continue;
81 }
82
83 /* Get the page size we could use to map */
84 page_size = kvm_host_page_size(kvm, gfn);
85
86 /* Make sure the page_size does not exceed the memslot */
87 while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
88 page_size >>= 1;
89
90 /* Make sure gfn is aligned to the page size we want to map */
91 while ((gfn << PAGE_SHIFT) & (page_size - 1))
92 page_size >>= 1;
93
94 /*
95 * Pin all pages we are about to map in memory. This is
96 * important because we unmap and unpin in 4kb steps later.
97 */
98 pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
99 if (is_error_pfn(pfn)) {
100 gfn += 1;
55 continue; 101 continue;
102 }
56 103
57 pfn = gfn_to_pfn_memslot(kvm, slot, gfn); 104 /* Map into IO address space */
58 r = iommu_map_range(domain, 105 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
59 gfn_to_gpa(gfn), 106 get_order(page_size), flags);
60 pfn_to_hpa(pfn),
61 PAGE_SIZE, flags);
62 if (r) { 107 if (r) {
63 printk(KERN_ERR "kvm_iommu_map_address:" 108 printk(KERN_ERR "kvm_iommu_map_address:"
64 "iommu failed to map pfn=%lx\n", pfn); 109 "iommu failed to map pfn=%lx\n", pfn);
65 goto unmap_pages; 110 goto unmap_pages;
66 } 111 }
67 gfn++; 112
113 gfn += page_size >> PAGE_SHIFT;
114
115
68 } 116 }
117
69 return 0; 118 return 0;
70 119
71unmap_pages: 120unmap_pages:
72 kvm_iommu_put_pages(kvm, slot->base_gfn, i); 121 kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
73 return r; 122 return r;
74} 123}
75 124
@@ -189,27 +238,47 @@ out_unmap:
189 return r; 238 return r;
190} 239}
191 240
241static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
242{
243 unsigned long i;
244
245 for (i = 0; i < npages; ++i)
246 kvm_release_pfn_clean(pfn + i);
247}
248
192static void kvm_iommu_put_pages(struct kvm *kvm, 249static void kvm_iommu_put_pages(struct kvm *kvm,
193 gfn_t base_gfn, unsigned long npages) 250 gfn_t base_gfn, unsigned long npages)
194{ 251{
195 gfn_t gfn = base_gfn; 252 struct iommu_domain *domain;
253 gfn_t end_gfn, gfn;
196 pfn_t pfn; 254 pfn_t pfn;
197 struct iommu_domain *domain = kvm->arch.iommu_domain;
198 unsigned long i;
199 u64 phys; 255 u64 phys;
200 256
257 domain = kvm->arch.iommu_domain;
258 end_gfn = base_gfn + npages;
259 gfn = base_gfn;
260
201 /* check if iommu exists and in use */ 261 /* check if iommu exists and in use */
202 if (!domain) 262 if (!domain)
203 return; 263 return;
204 264
205 for (i = 0; i < npages; i++) { 265 while (gfn < end_gfn) {
266 unsigned long unmap_pages;
267 int order;
268
269 /* Get physical address */
206 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); 270 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
207 pfn = phys >> PAGE_SHIFT; 271 pfn = phys >> PAGE_SHIFT;
208 kvm_release_pfn_clean(pfn); 272
209 gfn++; 273 /* Unmap address from IO address space */
210 } 274 order = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
275 unmap_pages = 1ULL << order;
211 276
212 iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); 277 /* Unpin all pages we just unmapped to not leak any memory */
278 kvm_unpin_pages(kvm, pfn, unmap_pages);
279
280 gfn += unmap_pages;
281 }
213} 282}
214 283
215static int kvm_iommu_unmap_memslots(struct kvm *kvm) 284static int kvm_iommu_unmap_memslots(struct kvm *kvm)