aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--drivers/pci/intel-iommu.c695
3 files changed, 353 insertions, 359 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d1430ef6b4f9..c07f72205909 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1913,25 +1913,14 @@ config DMAR_DEFAULT_ON
1913 recommended you say N here while the DMAR code remains 1913 recommended you say N here while the DMAR code remains
1914 experimental. 1914 experimental.
1915 1915
1916config DMAR_GFX_WA
1917 def_bool y
1918 prompt "Support for Graphics workaround"
1919 depends on DMAR
1920 ---help---
1921 Current Graphics drivers tend to use physical address
1922 for DMA and avoid using DMA APIs. Setting this config
1923 option permits the IOMMU driver to set a unity map for
1924 all the OS-visible memory. Hence the driver can continue
1925 to use physical addresses for DMA.
1926
1927config DMAR_FLOPPY_WA 1916config DMAR_FLOPPY_WA
1928 def_bool y 1917 def_bool y
1929 depends on DMAR 1918 depends on DMAR
1930 ---help--- 1919 ---help---
1931 Floppy disk drivers are know to bypass DMA API calls 1920 Floppy disk drivers are known to bypass DMA API calls
1932 thereby failing to work when IOMMU is enabled. This 1921 thereby failing to work when IOMMU is enabled. This
1933 workaround will setup a 1:1 mapping for the first 1922 workaround will setup a 1:1 mapping for the first
1934 16M to make floppy (an ISA device) work. 1923 16MiB to make floppy (an ISA device) work.
1935 1924
1936config INTR_REMAP 1925config INTR_REMAP
1937 bool "Support for Interrupt Remapping (EXPERIMENTAL)" 1926 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 47630479b067..1a041bcf506b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -211,11 +211,11 @@ static __init int iommu_setup(char *p)
211#ifdef CONFIG_SWIOTLB 211#ifdef CONFIG_SWIOTLB
212 if (!strncmp(p, "soft", 4)) 212 if (!strncmp(p, "soft", 4))
213 swiotlb = 1; 213 swiotlb = 1;
214#endif
214 if (!strncmp(p, "pt", 2)) { 215 if (!strncmp(p, "pt", 2)) {
215 iommu_pass_through = 1; 216 iommu_pass_through = 1;
216 return 1; 217 return 1;
217 } 218 }
218#endif
219 219
220 gart_parse_options(p); 220 gart_parse_options(p);
221 221
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 420afa887283..53075424a434 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -56,14 +56,32 @@
56#define MAX_AGAW_WIDTH 64 56#define MAX_AGAW_WIDTH 64
57 57
58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
59 60
60#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
61#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
62#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
63 64
64#ifndef PHYSICAL_PAGE_MASK 65
65#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
66#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
67 85
68/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
69static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
204 222
205static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
206{ 224{
207 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
208} 231}
209 232
210static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
211{ 234{
212 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
213} 236}
214 237
215static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -217,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
217 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
218} 241}
219 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
220/* 248/*
221 * This domain is a statically identity mapping domain. 249 * This domain is a statically identity mapping domain.
222 * 1. This domain creats a static 1:1 mapping to all usable memory. 250 * 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -244,7 +272,6 @@ struct dmar_domain {
244 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
245 273
246 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
247 spinlock_t mapping_lock; /* page table lock */
248 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
249 276
250 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -648,80 +675,78 @@ static inline int width_to_agaw(int width)
648 675
649static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
650{ 677{
651 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
652} 679}
653 680
654static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
655{ 682{
656 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
657} 684}
658 685
659static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
660{ 687{
661 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
662} 689}
663 690
664static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
665{ 692{
666 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
667} 694}
668 695
669static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
670{ 697{
671 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
672} 699}
673 700
674static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
675{ 703{
676 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
677 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
678 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
679 int offset; 707 int offset;
680 unsigned long flags;
681 708
682 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
683 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
684 addr &= (((u64)1) << addr_width) - 1;
685 parent = domain->pgd; 711 parent = domain->pgd;
686 712
687 spin_lock_irqsave(&domain->mapping_lock, flags);
688 while (level > 0) { 713 while (level > 0) {
689 void *tmp_page; 714 void *tmp_page;
690 715
691 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
692 pte = &parent[offset]; 717 pte = &parent[offset];
693 if (level == 1) 718 if (level == 1)
694 break; 719 break;
695 720
696 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
697 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
698 725
699 if (!tmp_page) { 726 if (!tmp_page)
700 spin_unlock_irqrestore(&domain->mapping_lock,
701 flags);
702 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
703 } 737 }
704 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
705 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
706 /*
707 * high level table always sets r/w, last level page
708 * table control read/write
709 */
710 dma_set_pte_readable(pte);
711 dma_set_pte_writable(pte);
712 domain_flush_cache(domain, pte, sizeof(*pte));
713 } 738 }
714 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
715 level--; 740 level--;
716 } 741 }
717 742
718 spin_unlock_irqrestore(&domain->mapping_lock, flags);
719 return pte; 743 return pte;
720} 744}
721 745
722/* return address's pte at specific level */ 746/* return address's pte at specific level */
723static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
724 int level) 748 unsigned long pfn,
749 int level)
725{ 750{
726 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
727 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -729,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
729 754
730 parent = domain->pgd; 755 parent = domain->pgd;
731 while (level <= total) { 756 while (level <= total) {
732 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
733 pte = &parent[offset]; 758 pte = &parent[offset];
734 if (level == total) 759 if (level == total)
735 return pte; 760 return pte;
@@ -742,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
742 return NULL; 767 return NULL;
743} 768}
744 769
745/* clear one page's page table */
746static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
747{
748 struct dma_pte *pte = NULL;
749
750 /* get last level pte */
751 pte = dma_addr_level_pte(domain, addr, 1);
752
753 if (pte) {
754 dma_clear_pte(pte);
755 domain_flush_cache(domain, pte, sizeof(*pte));
756 }
757}
758
759/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
760static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
761{ 774{
762 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
763 int npages; 776 struct dma_pte *first_pte, *pte;
777
778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
764 780
765 start &= (((u64)1) << addr_width) - 1; 781 /* we don't need lock here; nobody else touches the iova range */
766 end &= (((u64)1) << addr_width) - 1; 782 while (start_pfn <= last_pfn) {
767 /* in case it's partial page */ 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
768 start &= PAGE_MASK; 784 if (!pte) {
769 end = PAGE_ALIGN(end); 785 start_pfn = align_to_level(start_pfn + 1, 2);
770 npages = (end - start) / VTD_PAGE_SIZE; 786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
771 793
772 /* we don't need lock here, nobody else touches the iova range */ 794 domain_flush_cache(domain, first_pte,
773 while (npages--) { 795 (void *)pte - (void *)first_pte);
774 dma_pte_clear_one(domain, start);
775 start += VTD_PAGE_SIZE;
776 } 796 }
777} 797}
778 798
779/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
780static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
781 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
782{ 803{
783 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
784 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
785 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
786 int level; 807 int level;
787 u64 tmp; 808 unsigned long tmp;
788 809
789 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
790 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
791 812
792 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
793 level = 2; 814 level = 2;
794 while (level <= total) { 815 while (level <= total) {
795 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
796 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
797 return; 820 return;
798 821
799 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
800 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
801 if (pte) { 824 if (!pte) {
802 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
803 phys_to_virt(dma_pte_addr(pte))); 826 continue;
804 dma_clear_pte(pte);
805 domain_flush_cache(domain, pte, sizeof(*pte));
806 } 827 }
807 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
808 } 841 }
809 level++; 842 level++;
810 } 843 }
811 /* free pgd */ 844 /* free pgd */
812 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
813 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
814 domain->pgd = NULL; 847 domain->pgd = NULL;
815 } 848 }
@@ -1035,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1035} 1068}
1036 1069
1037static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1038 u64 addr, unsigned int pages) 1071 unsigned long pfn, unsigned int pages)
1039{ 1072{
1040 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1041 1075
1042 BUG_ON(addr & (~VTD_PAGE_MASK));
1043 BUG_ON(pages == 0); 1076 BUG_ON(pages == 0);
1044 1077
1045 /* 1078 /*
@@ -1054,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1054 else 1087 else
1055 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1056 DMA_TLB_PSI_FLUSH); 1089 DMA_TLB_PSI_FLUSH);
1057 if (did) 1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1058 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1059} 1097}
1060 1098
@@ -1279,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1279 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1280 struct iova *iova; 1318 struct iova *iova;
1281 int i; 1319 int i;
1282 u64 addr, size;
1283 1320
1284 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1285 1322
@@ -1302,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1302 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1303 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1304 continue; 1341 continue;
1305 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1306 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1307 size = r->end - addr; 1344 IOVA_PFN(r->end));
1308 size = PAGE_ALIGN(size);
1309 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1310 IOVA_PFN(size + addr) - 1);
1311 if (!iova) 1345 if (!iova)
1312 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1313 } 1347 }
@@ -1341,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1341 unsigned long sagaw; 1375 unsigned long sagaw;
1342 1376
1343 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1344 spin_lock_init(&domain->mapping_lock);
1345 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1346 1379
1347 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1388,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
1388{ 1421{
1389 struct dmar_drhd_unit *drhd; 1422 struct dmar_drhd_unit *drhd;
1390 struct intel_iommu *iommu; 1423 struct intel_iommu *iommu;
1391 u64 end;
1392 1424
1393 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1394 if (!domain) 1426 if (!domain)
@@ -1397,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
1397 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1398 /* destroy iovas */ 1430 /* destroy iovas */
1399 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1400 end = DOMAIN_MAX_ADDR(domain->gaw);
1401 end = end & (~PAGE_MASK);
1402 1432
1403 /* clear ptes */ 1433 /* clear ptes */
1404 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1405 1435
1406 /* free page tables */ 1436 /* free page tables */
1407 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1408 1438
1409 for_each_active_iommu(iommu, drhd) 1439 for_each_active_iommu(iommu, drhd)
1410 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1618,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
1618 tmp->devfn); 1648 tmp->devfn);
1619} 1649}
1620 1650
1621static int 1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1622domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1652 struct scatterlist *sg, unsigned long phys_pfn,
1623 u64 hpa, size_t size, int prot) 1653 unsigned long nr_pages, int prot)
1624{ 1654{
1625 u64 start_pfn, end_pfn; 1655 struct dma_pte *first_pte = NULL, *pte = NULL;
1626 struct dma_pte *pte; 1656 phys_addr_t uninitialized_var(pteval);
1627 int index; 1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1628 int addr_width = agaw_to_width(domain->agaw); 1658 unsigned long sg_res;
1629 1659
1630 hpa &= (((u64)1) << addr_width) - 1; 1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1631 1661
1632 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1662 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1633 return -EINVAL; 1663 return -EINVAL;
1634 iova &= PAGE_MASK; 1664
1635 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1665 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1636 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1666
1637 index = 0; 1667 if (sg)
1638 while (start_pfn < end_pfn) { 1668 sg_res = 0;
1639 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1669 else {
1640 if (!pte) 1670 sg_res = nr_pages + 1;
1641 return -ENOMEM; 1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 }
1673
1674 while (nr_pages--) {
1675 uint64_t tmp;
1676
1677 if (!sg_res) {
1678 sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot;
1682 }
1683 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1685 if (!pte)
1686 return -ENOMEM;
1687 }
1642 /* We don't need lock here, nobody else 1688 /* We don't need lock here, nobody else
1643 * touches the iova range 1689 * touches the iova range
1644 */ 1690 */
1645 BUG_ON(dma_pte_addr(pte)); 1691 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1646 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1692 if (tmp) {
1647 dma_set_pte_prot(pte, prot); 1693 static int dumps = 5;
1648 if (prot & DMA_PTE_SNP) 1694 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1649 dma_set_pte_snp(pte); 1695 iov_pfn, tmp, (unsigned long long)pteval);
1650 domain_flush_cache(domain, pte, sizeof(*pte)); 1696 if (dumps) {
1651 start_pfn++; 1697 dumps--;
1652 index++; 1698 debug_dma_dump_mappings(NULL);
1699 }
1700 WARN_ON(1);
1701 }
1702 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) {
1704 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte);
1706 pte = NULL;
1707 }
1708 iov_pfn++;
1709 pteval += VTD_PAGE_SIZE;
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg);
1653 } 1713 }
1654 return 0; 1714 return 0;
1655} 1715}
1656 1716
1717static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1718 struct scatterlist *sg, unsigned long nr_pages,
1719 int prot)
1720{
1721 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1722}
1723
1724static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 unsigned long phys_pfn, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1729}
1730
1657static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1731static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1658{ 1732{
1659 if (!iommu) 1733 if (!iommu)
@@ -1844,58 +1918,61 @@ error:
1844 1918
1845static int iommu_identity_mapping; 1919static int iommu_identity_mapping;
1846 1920
1921static int iommu_domain_identity_map(struct dmar_domain *domain,
1922 unsigned long long start,
1923 unsigned long long end)
1924{
1925 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1926 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1927
1928 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1929 dma_to_mm_pfn(last_vpfn))) {
1930 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1931 return -ENOMEM;
1932 }
1933
1934 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1935 start, end, domain->id);
1936 /*
1937 * RMRR range might have overlap with physical memory range,
1938 * clear it first
1939 */
1940 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1941
1942 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1943 last_vpfn - first_vpfn + 1,
1944 DMA_PTE_READ|DMA_PTE_WRITE);
1945}
1946
1847static int iommu_prepare_identity_map(struct pci_dev *pdev, 1947static int iommu_prepare_identity_map(struct pci_dev *pdev,
1848 unsigned long long start, 1948 unsigned long long start,
1849 unsigned long long end) 1949 unsigned long long end)
1850{ 1950{
1851 struct dmar_domain *domain; 1951 struct dmar_domain *domain;
1852 unsigned long size;
1853 unsigned long long base;
1854 int ret; 1952 int ret;
1855 1953
1856 printk(KERN_INFO 1954 printk(KERN_INFO
1857 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1955 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1858 pci_name(pdev), start, end); 1956 pci_name(pdev), start, end);
1859 if (iommu_identity_mapping) 1957
1860 domain = si_domain; 1958 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1861 else
1862 /* page table init */
1863 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1864 if (!domain) 1959 if (!domain)
1865 return -ENOMEM; 1960 return -ENOMEM;
1866 1961
1867 /* The address might not be aligned */ 1962 ret = iommu_domain_identity_map(domain, start, end);
1868 base = start & PAGE_MASK;
1869 size = end - base;
1870 size = PAGE_ALIGN(size);
1871 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1872 IOVA_PFN(base + size) - 1)) {
1873 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1874 ret = -ENOMEM;
1875 goto error;
1876 }
1877
1878 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1879 size, base, pci_name(pdev));
1880 /*
1881 * RMRR range might have overlap with physical memory range,
1882 * clear it first
1883 */
1884 dma_pte_clear_range(domain, base, base + size);
1885
1886 ret = domain_page_mapping(domain, base, base, size,
1887 DMA_PTE_READ|DMA_PTE_WRITE);
1888 if (ret) 1963 if (ret)
1889 goto error; 1964 goto error;
1890 1965
1891 /* context entry init */ 1966 /* context entry init */
1892 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 1967 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1893 if (!ret) 1968 if (ret)
1894 return 0; 1969 goto error;
1895error: 1970
1971 return 0;
1972
1973 error:
1896 domain_exit(domain); 1974 domain_exit(domain);
1897 return ret; 1975 return ret;
1898
1899} 1976}
1900 1977
1901static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1978static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1907,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1907 rmrr->end_address + 1); 1984 rmrr->end_address + 1);
1908} 1985}
1909 1986
1910struct iommu_prepare_data {
1911 struct pci_dev *pdev;
1912 int ret;
1913};
1914
1915static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1916 unsigned long end_pfn, void *datax)
1917{
1918 struct iommu_prepare_data *data;
1919
1920 data = (struct iommu_prepare_data *)datax;
1921
1922 data->ret = iommu_prepare_identity_map(data->pdev,
1923 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1924 return data->ret;
1925
1926}
1927
1928static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1929{
1930 int nid;
1931 struct iommu_prepare_data data;
1932
1933 data.pdev = pdev;
1934 data.ret = 0;
1935
1936 for_each_online_node(nid) {
1937 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1938 if (data.ret)
1939 return data.ret;
1940 }
1941 return data.ret;
1942}
1943
1944#ifdef CONFIG_DMAR_GFX_WA
1945static void __init iommu_prepare_gfx_mapping(void)
1946{
1947 struct pci_dev *pdev = NULL;
1948 int ret;
1949
1950 for_each_pci_dev(pdev) {
1951 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1952 !IS_GFX_DEVICE(pdev))
1953 continue;
1954 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1955 pci_name(pdev));
1956 ret = iommu_prepare_with_active_regions(pdev);
1957 if (ret)
1958 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1959 }
1960}
1961#else /* !CONFIG_DMAR_GFX_WA */
1962static inline void iommu_prepare_gfx_mapping(void)
1963{
1964 return;
1965}
1966#endif
1967
1968#ifdef CONFIG_DMAR_FLOPPY_WA 1987#ifdef CONFIG_DMAR_FLOPPY_WA
1969static inline void iommu_prepare_isa(void) 1988static inline void iommu_prepare_isa(void)
1970{ 1989{
@@ -1975,12 +1994,12 @@ static inline void iommu_prepare_isa(void)
1975 if (!pdev) 1994 if (!pdev)
1976 return; 1995 return;
1977 1996
1978 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 1997 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1979 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 1998 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1980 1999
1981 if (ret) 2000 if (ret)
1982 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2001 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1983 "floppy might not work\n"); 2002 "floppy might not work\n");
1984 2003
1985} 2004}
1986#else 2005#else
@@ -2008,16 +2027,30 @@ static int __init init_context_pass_through(void)
2008} 2027}
2009 2028
2010static int md_domain_init(struct dmar_domain *domain, int guest_width); 2029static int md_domain_init(struct dmar_domain *domain, int guest_width);
2030
2031static int __init si_domain_work_fn(unsigned long start_pfn,
2032 unsigned long end_pfn, void *datax)
2033{
2034 int *ret = datax;
2035
2036 *ret = iommu_domain_identity_map(si_domain,
2037 (uint64_t)start_pfn << PAGE_SHIFT,
2038 (uint64_t)end_pfn << PAGE_SHIFT);
2039 return *ret;
2040
2041}
2042
2011static int si_domain_init(void) 2043static int si_domain_init(void)
2012{ 2044{
2013 struct dmar_drhd_unit *drhd; 2045 struct dmar_drhd_unit *drhd;
2014 struct intel_iommu *iommu; 2046 struct intel_iommu *iommu;
2015 int ret = 0; 2047 int nid, ret = 0;
2016 2048
2017 si_domain = alloc_domain(); 2049 si_domain = alloc_domain();
2018 if (!si_domain) 2050 if (!si_domain)
2019 return -EFAULT; 2051 return -EFAULT;
2020 2052
2053 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2021 2054
2022 for_each_active_iommu(iommu, drhd) { 2055 for_each_active_iommu(iommu, drhd) {
2023 ret = iommu_attach_domain(si_domain, iommu); 2056 ret = iommu_attach_domain(si_domain, iommu);
@@ -2034,6 +2067,12 @@ static int si_domain_init(void)
2034 2067
2035 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2068 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2036 2069
2070 for_each_online_node(nid) {
2071 work_with_active_regions(nid, si_domain_work_fn, &ret);
2072 if (ret)
2073 return ret;
2074 }
2075
2037 return 0; 2076 return 0;
2038} 2077}
2039 2078
@@ -2087,13 +2126,14 @@ static int iommu_prepare_static_identity_mapping(void)
2087 if (ret) 2126 if (ret)
2088 return -EFAULT; 2127 return -EFAULT;
2089 2128
2090 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2091 for_each_pci_dev(pdev) { 2129 for_each_pci_dev(pdev) {
2092 ret = iommu_prepare_with_active_regions(pdev); 2130 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2093 if (ret) { 2131 pci_name(pdev));
2094 printk(KERN_INFO "1:1 mapping to one domain failed.\n"); 2132
2095 return -EFAULT; 2133 ret = domain_context_mapping(si_domain, pdev,
2096 } 2134 CONTEXT_TT_MULTI_LEVEL);
2135 if (ret)
2136 return ret;
2097 ret = domain_add_dev_info(si_domain, pdev); 2137 ret = domain_add_dev_info(si_domain, pdev);
2098 if (ret) 2138 if (ret)
2099 return ret; 2139 return ret;
@@ -2284,8 +2324,6 @@ int __init init_dmars(void)
2284 } 2324 }
2285 } 2325 }
2286 2326
2287 iommu_prepare_gfx_mapping();
2288
2289 iommu_prepare_isa(); 2327 iommu_prepare_isa();
2290 } 2328 }
2291 2329
@@ -2330,50 +2368,40 @@ error:
2330 return ret; 2368 return ret;
2331} 2369}
2332 2370
2333static inline u64 aligned_size(u64 host_addr, size_t size) 2371static inline unsigned long aligned_nrpages(unsigned long host_addr,
2334{ 2372 size_t size)
2335 u64 addr;
2336 addr = (host_addr & (~PAGE_MASK)) + size;
2337 return PAGE_ALIGN(addr);
2338}
2339
2340struct iova *
2341iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2342{ 2373{
2343 struct iova *piova; 2374 host_addr &= ~PAGE_MASK;
2375 host_addr += size + PAGE_SIZE - 1;
2344 2376
2345 /* Make sure it's in range */ 2377 return host_addr >> VTD_PAGE_SHIFT;
2346 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2347 if (!size || (IOVA_START_ADDR + size > end))
2348 return NULL;
2349
2350 piova = alloc_iova(&domain->iovad,
2351 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2352 return piova;
2353} 2378}
2354 2379
2355static struct iova * 2380static struct iova *intel_alloc_iova(struct device *dev,
2356__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, 2381 struct dmar_domain *domain,
2357 size_t size, u64 dma_mask) 2382 unsigned long nrpages, uint64_t dma_mask)
2358{ 2383{
2359 struct pci_dev *pdev = to_pci_dev(dev); 2384 struct pci_dev *pdev = to_pci_dev(dev);
2360 struct iova *iova = NULL; 2385 struct iova *iova = NULL;
2361 2386
2362 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2387 /* Restrict dma_mask to the width that the iommu can handle */
2363 iova = iommu_alloc_iova(domain, size, dma_mask); 2388 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2364 else { 2389
2390 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2365 /* 2391 /*
2366 * First try to allocate an io virtual address in 2392 * First try to allocate an io virtual address in
2367 * DMA_BIT_MASK(32) and if that fails then try allocating 2393 * DMA_BIT_MASK(32) and if that fails then try allocating
2368 * from higher range 2394 * from higher range
2369 */ 2395 */
2370 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2396 iova = alloc_iova(&domain->iovad, nrpages,
2371 if (!iova) 2397 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2372 iova = iommu_alloc_iova(domain, size, dma_mask); 2398 if (iova)
2373 } 2399 return iova;
2374 2400 }
2375 if (!iova) { 2401 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2376 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2402 if (unlikely(!iova)) {
2403 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2404 nrpages, pci_name(pdev));
2377 return NULL; 2405 return NULL;
2378 } 2406 }
2379 2407
@@ -2476,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2476 return 0; 2504 return 0;
2477 2505
2478 iommu = domain_get_iommu(domain); 2506 iommu = domain_get_iommu(domain);
2479 size = aligned_size((u64)paddr, size); 2507 size = aligned_nrpages(paddr, size);
2480 2508
2481 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2509 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2482 if (!iova) 2510 if (!iova)
2483 goto error; 2511 goto error;
2484 2512
2485 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2486
2487 /* 2513 /*
2488 * Check if DMAR supports zero-length reads on write only 2514 * Check if DMAR supports zero-length reads on write only
2489 * mappings.. 2515 * mappings..
@@ -2499,20 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2499 * might have two guest_addr mapping to the same host paddr, but this 2525 * might have two guest_addr mapping to the same host paddr, but this
2500 * is not a big problem 2526 * is not a big problem
2501 */ 2527 */
2502 ret = domain_page_mapping(domain, start_paddr, 2528 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2503 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2529 paddr >> VTD_PAGE_SHIFT, size, prot);
2504 size, prot);
2505 if (ret) 2530 if (ret)
2506 goto error; 2531 goto error;
2507 2532
2508 /* it's a non-present to present mapping. Only flush if caching mode */ 2533 /* it's a non-present to present mapping. Only flush if caching mode */
2509 if (cap_caching_mode(iommu->cap)) 2534 if (cap_caching_mode(iommu->cap))
2510 iommu_flush_iotlb_psi(iommu, 0, start_paddr, 2535 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2511 size >> VTD_PAGE_SHIFT);
2512 else 2536 else
2513 iommu_flush_write_buffer(iommu); 2537 iommu_flush_write_buffer(iommu);
2514 2538
2515 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2539 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2540 start_paddr += paddr & ~PAGE_MASK;
2541 return start_paddr;
2516 2542
2517error: 2543error:
2518 if (iova) 2544 if (iova)
@@ -2605,7 +2631,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2605{ 2631{
2606 struct pci_dev *pdev = to_pci_dev(dev); 2632 struct pci_dev *pdev = to_pci_dev(dev);
2607 struct dmar_domain *domain; 2633 struct dmar_domain *domain;
2608 unsigned long start_addr; 2634 unsigned long start_pfn, last_pfn;
2609 struct iova *iova; 2635 struct iova *iova;
2610 struct intel_iommu *iommu; 2636 struct intel_iommu *iommu;
2611 2637
@@ -2618,22 +2644,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2618 iommu = domain_get_iommu(domain); 2644 iommu = domain_get_iommu(domain);
2619 2645
2620 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2646 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2621 if (!iova) 2647 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2648 (unsigned long long)dev_addr))
2622 return; 2649 return;
2623 2650
2624 start_addr = iova->pfn_lo << PAGE_SHIFT; 2651 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2625 size = aligned_size((u64)dev_addr, size); 2652 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2626 2653
2627 pr_debug("Device %s unmapping: %zx@%llx\n", 2654 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2628 pci_name(pdev), size, (unsigned long long)start_addr); 2655 pci_name(pdev), start_pfn, last_pfn);
2629 2656
2630 /* clear the whole page */ 2657 /* clear the whole page */
2631 dma_pte_clear_range(domain, start_addr, start_addr + size); 2658 dma_pte_clear_range(domain, start_pfn, last_pfn);
2659
2632 /* free page tables */ 2660 /* free page tables */
2633 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2661 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2662
2634 if (intel_iommu_strict) { 2663 if (intel_iommu_strict) {
2635 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2664 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2636 size >> VTD_PAGE_SHIFT); 2665 last_pfn - start_pfn + 1);
2637 /* free iova */ 2666 /* free iova */
2638 __free_iova(&domain->iovad, iova); 2667 __free_iova(&domain->iovad, iova);
2639 } else { 2668 } else {
@@ -2691,14 +2720,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2691 int nelems, enum dma_data_direction dir, 2720 int nelems, enum dma_data_direction dir,
2692 struct dma_attrs *attrs) 2721 struct dma_attrs *attrs)
2693{ 2722{
2694 int i;
2695 struct pci_dev *pdev = to_pci_dev(hwdev); 2723 struct pci_dev *pdev = to_pci_dev(hwdev);
2696 struct dmar_domain *domain; 2724 struct dmar_domain *domain;
2697 unsigned long start_addr; 2725 unsigned long start_pfn, last_pfn;
2698 struct iova *iova; 2726 struct iova *iova;
2699 size_t size = 0;
2700 phys_addr_t addr;
2701 struct scatterlist *sg;
2702 struct intel_iommu *iommu; 2727 struct intel_iommu *iommu;
2703 2728
2704 if (iommu_no_mapping(pdev)) 2729 if (iommu_no_mapping(pdev))
@@ -2710,22 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2710 iommu = domain_get_iommu(domain); 2735 iommu = domain_get_iommu(domain);
2711 2736
2712 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2737 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2713 if (!iova) 2738 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2739 (unsigned long long)sglist[0].dma_address))
2714 return; 2740 return;
2715 for_each_sg(sglist, sg, nelems, i) {
2716 addr = page_to_phys(sg_page(sg)) + sg->offset;
2717 size += aligned_size((u64)addr, sg->length);
2718 }
2719 2741
2720 start_addr = iova->pfn_lo << PAGE_SHIFT; 2742 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2743 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2721 2744
2722 /* clear the whole page */ 2745 /* clear the whole page */
2723 dma_pte_clear_range(domain, start_addr, start_addr + size); 2746 dma_pte_clear_range(domain, start_pfn, last_pfn);
2747
2724 /* free page tables */ 2748 /* free page tables */
2725 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2749 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2726 2750
2727 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2751 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2728 size >> VTD_PAGE_SHIFT); 2752 (last_pfn - start_pfn + 1));
2729 2753
2730 /* free iova */ 2754 /* free iova */
2731 __free_iova(&domain->iovad, iova); 2755 __free_iova(&domain->iovad, iova);
@@ -2748,17 +2772,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2748static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2772static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2749 enum dma_data_direction dir, struct dma_attrs *attrs) 2773 enum dma_data_direction dir, struct dma_attrs *attrs)
2750{ 2774{
2751 phys_addr_t addr;
2752 int i; 2775 int i;
2753 struct pci_dev *pdev = to_pci_dev(hwdev); 2776 struct pci_dev *pdev = to_pci_dev(hwdev);
2754 struct dmar_domain *domain; 2777 struct dmar_domain *domain;
2755 size_t size = 0; 2778 size_t size = 0;
2756 int prot = 0; 2779 int prot = 0;
2757 size_t offset = 0; 2780 size_t offset_pfn = 0;
2758 struct iova *iova = NULL; 2781 struct iova *iova = NULL;
2759 int ret; 2782 int ret;
2760 struct scatterlist *sg; 2783 struct scatterlist *sg;
2761 unsigned long start_addr; 2784 unsigned long start_vpfn;
2762 struct intel_iommu *iommu; 2785 struct intel_iommu *iommu;
2763 2786
2764 BUG_ON(dir == DMA_NONE); 2787 BUG_ON(dir == DMA_NONE);
@@ -2771,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2771 2794
2772 iommu = domain_get_iommu(domain); 2795 iommu = domain_get_iommu(domain);
2773 2796
2774 for_each_sg(sglist, sg, nelems, i) { 2797 for_each_sg(sglist, sg, nelems, i)
2775 addr = page_to_phys(sg_page(sg)) + sg->offset; 2798 size += aligned_nrpages(sg->offset, sg->length);
2776 size += aligned_size((u64)addr, sg->length);
2777 }
2778 2799
2779 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2800 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2780 if (!iova) { 2801 if (!iova) {
2781 sglist->dma_length = 0; 2802 sglist->dma_length = 0;
2782 return 0; 2803 return 0;
@@ -2792,35 +2813,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2792 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2813 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2793 prot |= DMA_PTE_WRITE; 2814 prot |= DMA_PTE_WRITE;
2794 2815
2795 start_addr = iova->pfn_lo << PAGE_SHIFT; 2816 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2796 offset = 0; 2817
2797 for_each_sg(sglist, sg, nelems, i) { 2818 ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
2798 addr = page_to_phys(sg_page(sg)) + sg->offset; 2819 if (unlikely(ret)) {
2799 size = aligned_size((u64)addr, sg->length); 2820 /* clear the page */
2800 ret = domain_page_mapping(domain, start_addr + offset, 2821 dma_pte_clear_range(domain, start_vpfn,
2801 ((u64)addr) & PHYSICAL_PAGE_MASK, 2822 start_vpfn + size - 1);
2802 size, prot); 2823 /* free page tables */
2803 if (ret) { 2824 dma_pte_free_pagetable(domain, start_vpfn,
2804 /* clear the page */ 2825 start_vpfn + size - 1);
2805 dma_pte_clear_range(domain, start_addr, 2826 /* free iova */
2806 start_addr + offset); 2827 __free_iova(&domain->iovad, iova);
2807 /* free page tables */ 2828 return 0;
2808 dma_pte_free_pagetable(domain, start_addr,
2809 start_addr + offset);
2810 /* free iova */
2811 __free_iova(&domain->iovad, iova);
2812 return 0;
2813 }
2814 sg->dma_address = start_addr + offset +
2815 ((u64)addr & (~PAGE_MASK));
2816 sg->dma_length = sg->length;
2817 offset += size;
2818 } 2829 }
2819 2830
2820 /* it's a non-present to present mapping. Only flush if caching mode */ 2831 /* it's a non-present to present mapping. Only flush if caching mode */
2821 if (cap_caching_mode(iommu->cap)) 2832 if (cap_caching_mode(iommu->cap))
2822 iommu_flush_iotlb_psi(iommu, 0, start_addr, 2833 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2823 offset >> VTD_PAGE_SHIFT);
2824 else 2834 else
2825 iommu_flush_write_buffer(iommu); 2835 iommu_flush_write_buffer(iommu);
2826 2836
@@ -3325,7 +3335,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3325 int adjust_width; 3335 int adjust_width;
3326 3336
3327 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3337 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3328 spin_lock_init(&domain->mapping_lock);
3329 spin_lock_init(&domain->iommu_lock); 3338 spin_lock_init(&domain->iommu_lock);
3330 3339
3331 domain_reserve_special_ranges(domain); 3340 domain_reserve_special_ranges(domain);
@@ -3379,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3379 3388
3380static void vm_domain_exit(struct dmar_domain *domain) 3389static void vm_domain_exit(struct dmar_domain *domain)
3381{ 3390{
3382 u64 end;
3383
3384 /* Domain 0 is reserved, so dont process it */ 3391 /* Domain 0 is reserved, so dont process it */
3385 if (!domain) 3392 if (!domain)
3386 return; 3393 return;
@@ -3388,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3388 vm_domain_remove_all_dev_info(domain); 3395 vm_domain_remove_all_dev_info(domain);
3389 /* destroy iovas */ 3396 /* destroy iovas */
3390 put_iova_domain(&domain->iovad); 3397 put_iova_domain(&domain->iovad);
3391 end = DOMAIN_MAX_ADDR(domain->gaw);
3392 end = end & (~VTD_PAGE_MASK);
3393 3398
3394 /* clear ptes */ 3399 /* clear ptes */
3395 dma_pte_clear_range(domain, 0, end); 3400 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3396 3401
3397 /* free page tables */ 3402 /* free page tables */
3398 dma_pte_free_pagetable(domain, 0, end); 3403 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3399 3404
3400 iommu_free_vm_domain(domain); 3405 iommu_free_vm_domain(domain);
3401 free_domain_mem(domain); 3406 free_domain_mem(domain);
@@ -3504,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3504 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3509 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3505 prot |= DMA_PTE_SNP; 3510 prot |= DMA_PTE_SNP;
3506 3511
3507 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3512 max_addr = iova + size;
3508 if (dmar_domain->max_addr < max_addr) { 3513 if (dmar_domain->max_addr < max_addr) {
3509 int min_agaw; 3514 int min_agaw;
3510 u64 end; 3515 u64 end;
@@ -3522,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3522 } 3527 }
3523 dmar_domain->max_addr = max_addr; 3528 dmar_domain->max_addr = max_addr;
3524 } 3529 }
3525 3530 /* Round up size to next multiple of PAGE_SIZE, if it and
3526 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3531 the low bits of hpa would take us onto the next page */
3532 size = aligned_nrpages(hpa, size);
3533 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3534 hpa >> VTD_PAGE_SHIFT, size, prot);
3527 return ret; 3535 return ret;
3528} 3536}
3529 3537
@@ -3531,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3531 unsigned long iova, size_t size) 3539 unsigned long iova, size_t size)
3532{ 3540{
3533 struct dmar_domain *dmar_domain = domain->priv; 3541 struct dmar_domain *dmar_domain = domain->priv;
3534 dma_addr_t base;
3535 3542
3536 /* The address might not be aligned */ 3543 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3537 base = iova & VTD_PAGE_MASK; 3544 (iova + size - 1) >> VTD_PAGE_SHIFT);
3538 size = VTD_PAGE_ALIGN(size);
3539 dma_pte_clear_range(dmar_domain, base, base + size);
3540 3545
3541 if (dmar_domain->max_addr == base + size) 3546 if (dmar_domain->max_addr == iova + size)
3542 dmar_domain->max_addr = base; 3547 dmar_domain->max_addr = iova;
3543} 3548}
3544 3549
3545static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3550static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3549,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3549 struct dma_pte *pte; 3554 struct dma_pte *pte;
3550 u64 phys = 0; 3555 u64 phys = 0;
3551 3556
3552 pte = addr_to_dma_pte(dmar_domain, iova); 3557 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3553 if (pte) 3558 if (pte)
3554 phys = dma_pte_addr(pte); 3559 phys = dma_pte_addr(pte);
3555 3560