aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c695
1 files changed, 350 insertions, 345 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 420afa887283..53075424a434 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -56,14 +56,32 @@
56#define MAX_AGAW_WIDTH 64 56#define MAX_AGAW_WIDTH 64
57 57
58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
59 60
60#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
61#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
62#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
63 64
64#ifndef PHYSICAL_PAGE_MASK 65
65#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
66#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
67 85
68/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
69static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
204 222
205static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
206{ 224{
207 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
208} 231}
209 232
210static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
211{ 234{
212 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
213} 236}
214 237
215static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -217,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
217 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
218} 241}
219 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
220/* 248/*
221 * This domain is a statically identity mapping domain. 249 * This domain is a statically identity mapping domain.
222 * 1. This domain creats a static 1:1 mapping to all usable memory. 250 * 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -244,7 +272,6 @@ struct dmar_domain {
244 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
245 273
246 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
247 spinlock_t mapping_lock; /* page table lock */
248 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
249 276
250 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -648,80 +675,78 @@ static inline int width_to_agaw(int width)
648 675
649static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
650{ 677{
651 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
652} 679}
653 680
654static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
655{ 682{
656 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
657} 684}
658 685
659static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
660{ 687{
661 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
662} 689}
663 690
664static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
665{ 692{
666 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
667} 694}
668 695
669static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
670{ 697{
671 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
672} 699}
673 700
674static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
675{ 703{
676 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
677 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
678 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
679 int offset; 707 int offset;
680 unsigned long flags;
681 708
682 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
683 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
684 addr &= (((u64)1) << addr_width) - 1;
685 parent = domain->pgd; 711 parent = domain->pgd;
686 712
687 spin_lock_irqsave(&domain->mapping_lock, flags);
688 while (level > 0) { 713 while (level > 0) {
689 void *tmp_page; 714 void *tmp_page;
690 715
691 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
692 pte = &parent[offset]; 717 pte = &parent[offset];
693 if (level == 1) 718 if (level == 1)
694 break; 719 break;
695 720
696 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
697 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
698 725
699 if (!tmp_page) { 726 if (!tmp_page)
700 spin_unlock_irqrestore(&domain->mapping_lock,
701 flags);
702 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
703 } 737 }
704 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
705 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
706 /*
707 * high level table always sets r/w, last level page
708 * table control read/write
709 */
710 dma_set_pte_readable(pte);
711 dma_set_pte_writable(pte);
712 domain_flush_cache(domain, pte, sizeof(*pte));
713 } 738 }
714 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
715 level--; 740 level--;
716 } 741 }
717 742
718 spin_unlock_irqrestore(&domain->mapping_lock, flags);
719 return pte; 743 return pte;
720} 744}
721 745
722/* return address's pte at specific level */ 746/* return address's pte at specific level */
723static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
724 int level) 748 unsigned long pfn,
749 int level)
725{ 750{
726 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
727 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -729,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
729 754
730 parent = domain->pgd; 755 parent = domain->pgd;
731 while (level <= total) { 756 while (level <= total) {
732 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
733 pte = &parent[offset]; 758 pte = &parent[offset];
734 if (level == total) 759 if (level == total)
735 return pte; 760 return pte;
@@ -742,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
742 return NULL; 767 return NULL;
743} 768}
744 769
745/* clear one page's page table */
746static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
747{
748 struct dma_pte *pte = NULL;
749
750 /* get last level pte */
751 pte = dma_addr_level_pte(domain, addr, 1);
752
753 if (pte) {
754 dma_clear_pte(pte);
755 domain_flush_cache(domain, pte, sizeof(*pte));
756 }
757}
758
759/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
760static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
761{ 774{
762 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
763 int npages; 776 struct dma_pte *first_pte, *pte;
777
778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
764 780
765 start &= (((u64)1) << addr_width) - 1; 781 /* we don't need lock here; nobody else touches the iova range */
766 end &= (((u64)1) << addr_width) - 1; 782 while (start_pfn <= last_pfn) {
767 /* in case it's partial page */ 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
768 start &= PAGE_MASK; 784 if (!pte) {
769 end = PAGE_ALIGN(end); 785 start_pfn = align_to_level(start_pfn + 1, 2);
770 npages = (end - start) / VTD_PAGE_SIZE; 786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
771 793
772 /* we don't need lock here, nobody else touches the iova range */ 794 domain_flush_cache(domain, first_pte,
773 while (npages--) { 795 (void *)pte - (void *)first_pte);
774 dma_pte_clear_one(domain, start);
775 start += VTD_PAGE_SIZE;
776 } 796 }
777} 797}
778 798
779/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
780static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
781 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
782{ 803{
783 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
784 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
785 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
786 int level; 807 int level;
787 u64 tmp; 808 unsigned long tmp;
788 809
789 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
790 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
791 812
792 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
793 level = 2; 814 level = 2;
794 while (level <= total) { 815 while (level <= total) {
795 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
796 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
797 return; 820 return;
798 821
799 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
800 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
801 if (pte) { 824 if (!pte) {
802 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
803 phys_to_virt(dma_pte_addr(pte))); 826 continue;
804 dma_clear_pte(pte);
805 domain_flush_cache(domain, pte, sizeof(*pte));
806 } 827 }
807 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
808 } 841 }
809 level++; 842 level++;
810 } 843 }
811 /* free pgd */ 844 /* free pgd */
812 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
813 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
814 domain->pgd = NULL; 847 domain->pgd = NULL;
815 } 848 }
@@ -1035,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1035} 1068}
1036 1069
1037static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1038 u64 addr, unsigned int pages) 1071 unsigned long pfn, unsigned int pages)
1039{ 1072{
1040 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1041 1075
1042 BUG_ON(addr & (~VTD_PAGE_MASK));
1043 BUG_ON(pages == 0); 1076 BUG_ON(pages == 0);
1044 1077
1045 /* 1078 /*
@@ -1054,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1054 else 1087 else
1055 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1056 DMA_TLB_PSI_FLUSH); 1089 DMA_TLB_PSI_FLUSH);
1057 if (did) 1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1058 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1059} 1097}
1060 1098
@@ -1279,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1279 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1280 struct iova *iova; 1318 struct iova *iova;
1281 int i; 1319 int i;
1282 u64 addr, size;
1283 1320
1284 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1285 1322
@@ -1302,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1302 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1303 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1304 continue; 1341 continue;
1305 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1306 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1307 size = r->end - addr; 1344 IOVA_PFN(r->end));
1308 size = PAGE_ALIGN(size);
1309 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1310 IOVA_PFN(size + addr) - 1);
1311 if (!iova) 1345 if (!iova)
1312 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1313 } 1347 }
@@ -1341,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1341 unsigned long sagaw; 1375 unsigned long sagaw;
1342 1376
1343 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1344 spin_lock_init(&domain->mapping_lock);
1345 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1346 1379
1347 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1388,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
1388{ 1421{
1389 struct dmar_drhd_unit *drhd; 1422 struct dmar_drhd_unit *drhd;
1390 struct intel_iommu *iommu; 1423 struct intel_iommu *iommu;
1391 u64 end;
1392 1424
1393 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1394 if (!domain) 1426 if (!domain)
@@ -1397,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
1397 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1398 /* destroy iovas */ 1430 /* destroy iovas */
1399 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1400 end = DOMAIN_MAX_ADDR(domain->gaw);
1401 end = end & (~PAGE_MASK);
1402 1432
1403 /* clear ptes */ 1433 /* clear ptes */
1404 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1405 1435
1406 /* free page tables */ 1436 /* free page tables */
1407 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1408 1438
1409 for_each_active_iommu(iommu, drhd) 1439 for_each_active_iommu(iommu, drhd)
1410 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1618,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
1618 tmp->devfn); 1648 tmp->devfn);
1619} 1649}
1620 1650
1621static int 1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1622domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1652 struct scatterlist *sg, unsigned long phys_pfn,
1623 u64 hpa, size_t size, int prot) 1653 unsigned long nr_pages, int prot)
1624{ 1654{
1625 u64 start_pfn, end_pfn; 1655 struct dma_pte *first_pte = NULL, *pte = NULL;
1626 struct dma_pte *pte; 1656 phys_addr_t uninitialized_var(pteval);
1627 int index; 1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1628 int addr_width = agaw_to_width(domain->agaw); 1658 unsigned long sg_res;
1629 1659
1630 hpa &= (((u64)1) << addr_width) - 1; 1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1631 1661
1632 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1662 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1633 return -EINVAL; 1663 return -EINVAL;
1634 iova &= PAGE_MASK; 1664
1635 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1665 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1636 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1666
1637 index = 0; 1667 if (sg)
1638 while (start_pfn < end_pfn) { 1668 sg_res = 0;
1639 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1669 else {
1640 if (!pte) 1670 sg_res = nr_pages + 1;
1641 return -ENOMEM; 1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 }
1673
1674 while (nr_pages--) {
1675 uint64_t tmp;
1676
1677 if (!sg_res) {
1678 sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot;
1682 }
1683 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1685 if (!pte)
1686 return -ENOMEM;
1687 }
1642 /* We don't need lock here, nobody else 1688 /* We don't need lock here, nobody else
1643 * touches the iova range 1689 * touches the iova range
1644 */ 1690 */
1645 BUG_ON(dma_pte_addr(pte)); 1691 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1646 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1692 if (tmp) {
1647 dma_set_pte_prot(pte, prot); 1693 static int dumps = 5;
1648 if (prot & DMA_PTE_SNP) 1694 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1649 dma_set_pte_snp(pte); 1695 iov_pfn, tmp, (unsigned long long)pteval);
1650 domain_flush_cache(domain, pte, sizeof(*pte)); 1696 if (dumps) {
1651 start_pfn++; 1697 dumps--;
1652 index++; 1698 debug_dma_dump_mappings(NULL);
1699 }
1700 WARN_ON(1);
1701 }
1702 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) {
1704 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte);
1706 pte = NULL;
1707 }
1708 iov_pfn++;
1709 pteval += VTD_PAGE_SIZE;
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg);
1653 } 1713 }
1654 return 0; 1714 return 0;
1655} 1715}
1656 1716
1717static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1718 struct scatterlist *sg, unsigned long nr_pages,
1719 int prot)
1720{
1721 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1722}
1723
1724static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 unsigned long phys_pfn, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1729}
1730
1657static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1731static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1658{ 1732{
1659 if (!iommu) 1733 if (!iommu)
@@ -1844,58 +1918,61 @@ error:
1844 1918
1845static int iommu_identity_mapping; 1919static int iommu_identity_mapping;
1846 1920
1921static int iommu_domain_identity_map(struct dmar_domain *domain,
1922 unsigned long long start,
1923 unsigned long long end)
1924{
1925 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1926 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1927
1928 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1929 dma_to_mm_pfn(last_vpfn))) {
1930 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1931 return -ENOMEM;
1932 }
1933
1934 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1935 start, end, domain->id);
1936 /*
1937 * RMRR range might have overlap with physical memory range,
1938 * clear it first
1939 */
1940 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1941
1942 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1943 last_vpfn - first_vpfn + 1,
1944 DMA_PTE_READ|DMA_PTE_WRITE);
1945}
1946
1847static int iommu_prepare_identity_map(struct pci_dev *pdev, 1947static int iommu_prepare_identity_map(struct pci_dev *pdev,
1848 unsigned long long start, 1948 unsigned long long start,
1849 unsigned long long end) 1949 unsigned long long end)
1850{ 1950{
1851 struct dmar_domain *domain; 1951 struct dmar_domain *domain;
1852 unsigned long size;
1853 unsigned long long base;
1854 int ret; 1952 int ret;
1855 1953
1856 printk(KERN_INFO 1954 printk(KERN_INFO
1857 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1955 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1858 pci_name(pdev), start, end); 1956 pci_name(pdev), start, end);
1859 if (iommu_identity_mapping) 1957
1860 domain = si_domain; 1958 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1861 else
1862 /* page table init */
1863 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1864 if (!domain) 1959 if (!domain)
1865 return -ENOMEM; 1960 return -ENOMEM;
1866 1961
1867 /* The address might not be aligned */ 1962 ret = iommu_domain_identity_map(domain, start, end);
1868 base = start & PAGE_MASK;
1869 size = end - base;
1870 size = PAGE_ALIGN(size);
1871 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1872 IOVA_PFN(base + size) - 1)) {
1873 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1874 ret = -ENOMEM;
1875 goto error;
1876 }
1877
1878 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1879 size, base, pci_name(pdev));
1880 /*
1881 * RMRR range might have overlap with physical memory range,
1882 * clear it first
1883 */
1884 dma_pte_clear_range(domain, base, base + size);
1885
1886 ret = domain_page_mapping(domain, base, base, size,
1887 DMA_PTE_READ|DMA_PTE_WRITE);
1888 if (ret) 1963 if (ret)
1889 goto error; 1964 goto error;
1890 1965
1891 /* context entry init */ 1966 /* context entry init */
1892 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 1967 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1893 if (!ret) 1968 if (ret)
1894 return 0; 1969 goto error;
1895error: 1970
1971 return 0;
1972
1973 error:
1896 domain_exit(domain); 1974 domain_exit(domain);
1897 return ret; 1975 return ret;
1898
1899} 1976}
1900 1977
1901static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1978static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1907,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1907 rmrr->end_address + 1); 1984 rmrr->end_address + 1);
1908} 1985}
1909 1986
1910struct iommu_prepare_data {
1911 struct pci_dev *pdev;
1912 int ret;
1913};
1914
1915static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1916 unsigned long end_pfn, void *datax)
1917{
1918 struct iommu_prepare_data *data;
1919
1920 data = (struct iommu_prepare_data *)datax;
1921
1922 data->ret = iommu_prepare_identity_map(data->pdev,
1923 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1924 return data->ret;
1925
1926}
1927
1928static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1929{
1930 int nid;
1931 struct iommu_prepare_data data;
1932
1933 data.pdev = pdev;
1934 data.ret = 0;
1935
1936 for_each_online_node(nid) {
1937 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1938 if (data.ret)
1939 return data.ret;
1940 }
1941 return data.ret;
1942}
1943
1944#ifdef CONFIG_DMAR_GFX_WA
1945static void __init iommu_prepare_gfx_mapping(void)
1946{
1947 struct pci_dev *pdev = NULL;
1948 int ret;
1949
1950 for_each_pci_dev(pdev) {
1951 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1952 !IS_GFX_DEVICE(pdev))
1953 continue;
1954 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1955 pci_name(pdev));
1956 ret = iommu_prepare_with_active_regions(pdev);
1957 if (ret)
1958 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1959 }
1960}
1961#else /* !CONFIG_DMAR_GFX_WA */
1962static inline void iommu_prepare_gfx_mapping(void)
1963{
1964 return;
1965}
1966#endif
1967
1968#ifdef CONFIG_DMAR_FLOPPY_WA 1987#ifdef CONFIG_DMAR_FLOPPY_WA
1969static inline void iommu_prepare_isa(void) 1988static inline void iommu_prepare_isa(void)
1970{ 1989{
@@ -1975,12 +1994,12 @@ static inline void iommu_prepare_isa(void)
1975 if (!pdev) 1994 if (!pdev)
1976 return; 1995 return;
1977 1996
1978 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 1997 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1979 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 1998 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1980 1999
1981 if (ret) 2000 if (ret)
1982 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2001 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1983 "floppy might not work\n"); 2002 "floppy might not work\n");
1984 2003
1985} 2004}
1986#else 2005#else
@@ -2008,16 +2027,30 @@ static int __init init_context_pass_through(void)
2008} 2027}
2009 2028
2010static int md_domain_init(struct dmar_domain *domain, int guest_width); 2029static int md_domain_init(struct dmar_domain *domain, int guest_width);
2030
2031static int __init si_domain_work_fn(unsigned long start_pfn,
2032 unsigned long end_pfn, void *datax)
2033{
2034 int *ret = datax;
2035
2036 *ret = iommu_domain_identity_map(si_domain,
2037 (uint64_t)start_pfn << PAGE_SHIFT,
2038 (uint64_t)end_pfn << PAGE_SHIFT);
2039 return *ret;
2040
2041}
2042
2011static int si_domain_init(void) 2043static int si_domain_init(void)
2012{ 2044{
2013 struct dmar_drhd_unit *drhd; 2045 struct dmar_drhd_unit *drhd;
2014 struct intel_iommu *iommu; 2046 struct intel_iommu *iommu;
2015 int ret = 0; 2047 int nid, ret = 0;
2016 2048
2017 si_domain = alloc_domain(); 2049 si_domain = alloc_domain();
2018 if (!si_domain) 2050 if (!si_domain)
2019 return -EFAULT; 2051 return -EFAULT;
2020 2052
2053 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2021 2054
2022 for_each_active_iommu(iommu, drhd) { 2055 for_each_active_iommu(iommu, drhd) {
2023 ret = iommu_attach_domain(si_domain, iommu); 2056 ret = iommu_attach_domain(si_domain, iommu);
@@ -2034,6 +2067,12 @@ static int si_domain_init(void)
2034 2067
2035 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2068 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2036 2069
2070 for_each_online_node(nid) {
2071 work_with_active_regions(nid, si_domain_work_fn, &ret);
2072 if (ret)
2073 return ret;
2074 }
2075
2037 return 0; 2076 return 0;
2038} 2077}
2039 2078
@@ -2087,13 +2126,14 @@ static int iommu_prepare_static_identity_mapping(void)
2087 if (ret) 2126 if (ret)
2088 return -EFAULT; 2127 return -EFAULT;
2089 2128
2090 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2091 for_each_pci_dev(pdev) { 2129 for_each_pci_dev(pdev) {
2092 ret = iommu_prepare_with_active_regions(pdev); 2130 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2093 if (ret) { 2131 pci_name(pdev));
2094 printk(KERN_INFO "1:1 mapping to one domain failed.\n"); 2132
2095 return -EFAULT; 2133 ret = domain_context_mapping(si_domain, pdev,
2096 } 2134 CONTEXT_TT_MULTI_LEVEL);
2135 if (ret)
2136 return ret;
2097 ret = domain_add_dev_info(si_domain, pdev); 2137 ret = domain_add_dev_info(si_domain, pdev);
2098 if (ret) 2138 if (ret)
2099 return ret; 2139 return ret;
@@ -2284,8 +2324,6 @@ int __init init_dmars(void)
2284 } 2324 }
2285 } 2325 }
2286 2326
2287 iommu_prepare_gfx_mapping();
2288
2289 iommu_prepare_isa(); 2327 iommu_prepare_isa();
2290 } 2328 }
2291 2329
@@ -2330,50 +2368,40 @@ error:
2330 return ret; 2368 return ret;
2331} 2369}
2332 2370
2333static inline u64 aligned_size(u64 host_addr, size_t size) 2371static inline unsigned long aligned_nrpages(unsigned long host_addr,
2334{ 2372 size_t size)
2335 u64 addr;
2336 addr = (host_addr & (~PAGE_MASK)) + size;
2337 return PAGE_ALIGN(addr);
2338}
2339
2340struct iova *
2341iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2342{ 2373{
2343 struct iova *piova; 2374 host_addr &= ~PAGE_MASK;
2375 host_addr += size + PAGE_SIZE - 1;
2344 2376
2345 /* Make sure it's in range */ 2377 return host_addr >> VTD_PAGE_SHIFT;
2346 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2347 if (!size || (IOVA_START_ADDR + size > end))
2348 return NULL;
2349
2350 piova = alloc_iova(&domain->iovad,
2351 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2352 return piova;
2353} 2378}
2354 2379
2355static struct iova * 2380static struct iova *intel_alloc_iova(struct device *dev,
2356__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, 2381 struct dmar_domain *domain,
2357 size_t size, u64 dma_mask) 2382 unsigned long nrpages, uint64_t dma_mask)
2358{ 2383{
2359 struct pci_dev *pdev = to_pci_dev(dev); 2384 struct pci_dev *pdev = to_pci_dev(dev);
2360 struct iova *iova = NULL; 2385 struct iova *iova = NULL;
2361 2386
2362 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2387 /* Restrict dma_mask to the width that the iommu can handle */
2363 iova = iommu_alloc_iova(domain, size, dma_mask); 2388 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2364 else { 2389
2390 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2365 /* 2391 /*
2366 * First try to allocate an io virtual address in 2392 * First try to allocate an io virtual address in
2367 * DMA_BIT_MASK(32) and if that fails then try allocating 2393 * DMA_BIT_MASK(32) and if that fails then try allocating
2368 * from higher range 2394 * from higher range
2369 */ 2395 */
2370 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2396 iova = alloc_iova(&domain->iovad, nrpages,
2371 if (!iova) 2397 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2372 iova = iommu_alloc_iova(domain, size, dma_mask); 2398 if (iova)
2373 } 2399 return iova;
2374 2400 }
2375 if (!iova) { 2401 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2376 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2402 if (unlikely(!iova)) {
2403 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2404 nrpages, pci_name(pdev));
2377 return NULL; 2405 return NULL;
2378 } 2406 }
2379 2407
@@ -2476,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2476 return 0; 2504 return 0;
2477 2505
2478 iommu = domain_get_iommu(domain); 2506 iommu = domain_get_iommu(domain);
2479 size = aligned_size((u64)paddr, size); 2507 size = aligned_nrpages(paddr, size);
2480 2508
2481 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2509 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2482 if (!iova) 2510 if (!iova)
2483 goto error; 2511 goto error;
2484 2512
2485 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2486
2487 /* 2513 /*
2488 * Check if DMAR supports zero-length reads on write only 2514 * Check if DMAR supports zero-length reads on write only
2489 * mappings.. 2515 * mappings..
@@ -2499,20 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2499 * might have two guest_addr mapping to the same host paddr, but this 2525 * might have two guest_addr mapping to the same host paddr, but this
2500 * is not a big problem 2526 * is not a big problem
2501 */ 2527 */
2502 ret = domain_page_mapping(domain, start_paddr, 2528 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2503 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2529 paddr >> VTD_PAGE_SHIFT, size, prot);
2504 size, prot);
2505 if (ret) 2530 if (ret)
2506 goto error; 2531 goto error;
2507 2532
2508 /* it's a non-present to present mapping. Only flush if caching mode */ 2533 /* it's a non-present to present mapping. Only flush if caching mode */
2509 if (cap_caching_mode(iommu->cap)) 2534 if (cap_caching_mode(iommu->cap))
2510 iommu_flush_iotlb_psi(iommu, 0, start_paddr, 2535 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2511 size >> VTD_PAGE_SHIFT);
2512 else 2536 else
2513 iommu_flush_write_buffer(iommu); 2537 iommu_flush_write_buffer(iommu);
2514 2538
2515 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2539 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2540 start_paddr += paddr & ~PAGE_MASK;
2541 return start_paddr;
2516 2542
2517error: 2543error:
2518 if (iova) 2544 if (iova)
@@ -2605,7 +2631,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2605{ 2631{
2606 struct pci_dev *pdev = to_pci_dev(dev); 2632 struct pci_dev *pdev = to_pci_dev(dev);
2607 struct dmar_domain *domain; 2633 struct dmar_domain *domain;
2608 unsigned long start_addr; 2634 unsigned long start_pfn, last_pfn;
2609 struct iova *iova; 2635 struct iova *iova;
2610 struct intel_iommu *iommu; 2636 struct intel_iommu *iommu;
2611 2637
@@ -2618,22 +2644,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2618 iommu = domain_get_iommu(domain); 2644 iommu = domain_get_iommu(domain);
2619 2645
2620 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2646 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2621 if (!iova) 2647 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2648 (unsigned long long)dev_addr))
2622 return; 2649 return;
2623 2650
2624 start_addr = iova->pfn_lo << PAGE_SHIFT; 2651 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2625 size = aligned_size((u64)dev_addr, size); 2652 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2626 2653
2627 pr_debug("Device %s unmapping: %zx@%llx\n", 2654 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2628 pci_name(pdev), size, (unsigned long long)start_addr); 2655 pci_name(pdev), start_pfn, last_pfn);
2629 2656
2630 /* clear the whole page */ 2657 /* clear the whole page */
2631 dma_pte_clear_range(domain, start_addr, start_addr + size); 2658 dma_pte_clear_range(domain, start_pfn, last_pfn);
2659
2632 /* free page tables */ 2660 /* free page tables */
2633 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2661 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2662
2634 if (intel_iommu_strict) { 2663 if (intel_iommu_strict) {
2635 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2664 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2636 size >> VTD_PAGE_SHIFT); 2665 last_pfn - start_pfn + 1);
2637 /* free iova */ 2666 /* free iova */
2638 __free_iova(&domain->iovad, iova); 2667 __free_iova(&domain->iovad, iova);
2639 } else { 2668 } else {
@@ -2691,14 +2720,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2691 int nelems, enum dma_data_direction dir, 2720 int nelems, enum dma_data_direction dir,
2692 struct dma_attrs *attrs) 2721 struct dma_attrs *attrs)
2693{ 2722{
2694 int i;
2695 struct pci_dev *pdev = to_pci_dev(hwdev); 2723 struct pci_dev *pdev = to_pci_dev(hwdev);
2696 struct dmar_domain *domain; 2724 struct dmar_domain *domain;
2697 unsigned long start_addr; 2725 unsigned long start_pfn, last_pfn;
2698 struct iova *iova; 2726 struct iova *iova;
2699 size_t size = 0;
2700 phys_addr_t addr;
2701 struct scatterlist *sg;
2702 struct intel_iommu *iommu; 2727 struct intel_iommu *iommu;
2703 2728
2704 if (iommu_no_mapping(pdev)) 2729 if (iommu_no_mapping(pdev))
@@ -2710,22 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2710 iommu = domain_get_iommu(domain); 2735 iommu = domain_get_iommu(domain);
2711 2736
2712 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2737 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2713 if (!iova) 2738 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2739 (unsigned long long)sglist[0].dma_address))
2714 return; 2740 return;
2715 for_each_sg(sglist, sg, nelems, i) {
2716 addr = page_to_phys(sg_page(sg)) + sg->offset;
2717 size += aligned_size((u64)addr, sg->length);
2718 }
2719 2741
2720 start_addr = iova->pfn_lo << PAGE_SHIFT; 2742 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2743 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2721 2744
2722 /* clear the whole page */ 2745 /* clear the whole page */
2723 dma_pte_clear_range(domain, start_addr, start_addr + size); 2746 dma_pte_clear_range(domain, start_pfn, last_pfn);
2747
2724 /* free page tables */ 2748 /* free page tables */
2725 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2749 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2726 2750
2727 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2751 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2728 size >> VTD_PAGE_SHIFT); 2752 (last_pfn - start_pfn + 1));
2729 2753
2730 /* free iova */ 2754 /* free iova */
2731 __free_iova(&domain->iovad, iova); 2755 __free_iova(&domain->iovad, iova);
@@ -2748,17 +2772,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2748static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2772static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2749 enum dma_data_direction dir, struct dma_attrs *attrs) 2773 enum dma_data_direction dir, struct dma_attrs *attrs)
2750{ 2774{
2751 phys_addr_t addr;
2752 int i; 2775 int i;
2753 struct pci_dev *pdev = to_pci_dev(hwdev); 2776 struct pci_dev *pdev = to_pci_dev(hwdev);
2754 struct dmar_domain *domain; 2777 struct dmar_domain *domain;
2755 size_t size = 0; 2778 size_t size = 0;
2756 int prot = 0; 2779 int prot = 0;
2757 size_t offset = 0; 2780 size_t offset_pfn = 0;
2758 struct iova *iova = NULL; 2781 struct iova *iova = NULL;
2759 int ret; 2782 int ret;
2760 struct scatterlist *sg; 2783 struct scatterlist *sg;
2761 unsigned long start_addr; 2784 unsigned long start_vpfn;
2762 struct intel_iommu *iommu; 2785 struct intel_iommu *iommu;
2763 2786
2764 BUG_ON(dir == DMA_NONE); 2787 BUG_ON(dir == DMA_NONE);
@@ -2771,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2771 2794
2772 iommu = domain_get_iommu(domain); 2795 iommu = domain_get_iommu(domain);
2773 2796
2774 for_each_sg(sglist, sg, nelems, i) { 2797 for_each_sg(sglist, sg, nelems, i)
2775 addr = page_to_phys(sg_page(sg)) + sg->offset; 2798 size += aligned_nrpages(sg->offset, sg->length);
2776 size += aligned_size((u64)addr, sg->length);
2777 }
2778 2799
2779 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2800 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2780 if (!iova) { 2801 if (!iova) {
2781 sglist->dma_length = 0; 2802 sglist->dma_length = 0;
2782 return 0; 2803 return 0;
@@ -2792,35 +2813,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2792 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2813 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2793 prot |= DMA_PTE_WRITE; 2814 prot |= DMA_PTE_WRITE;
2794 2815
2795 start_addr = iova->pfn_lo << PAGE_SHIFT; 2816 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2796 offset = 0; 2817
2797 for_each_sg(sglist, sg, nelems, i) { 2818 ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
2798 addr = page_to_phys(sg_page(sg)) + sg->offset; 2819 if (unlikely(ret)) {
2799 size = aligned_size((u64)addr, sg->length); 2820 /* clear the page */
2800 ret = domain_page_mapping(domain, start_addr + offset, 2821 dma_pte_clear_range(domain, start_vpfn,
2801 ((u64)addr) & PHYSICAL_PAGE_MASK, 2822 start_vpfn + size - 1);
2802 size, prot); 2823 /* free page tables */
2803 if (ret) { 2824 dma_pte_free_pagetable(domain, start_vpfn,
2804 /* clear the page */ 2825 start_vpfn + size - 1);
2805 dma_pte_clear_range(domain, start_addr, 2826 /* free iova */
2806 start_addr + offset); 2827 __free_iova(&domain->iovad, iova);
2807 /* free page tables */ 2828 return 0;
2808 dma_pte_free_pagetable(domain, start_addr,
2809 start_addr + offset);
2810 /* free iova */
2811 __free_iova(&domain->iovad, iova);
2812 return 0;
2813 }
2814 sg->dma_address = start_addr + offset +
2815 ((u64)addr & (~PAGE_MASK));
2816 sg->dma_length = sg->length;
2817 offset += size;
2818 } 2829 }
2819 2830
2820 /* it's a non-present to present mapping. Only flush if caching mode */ 2831 /* it's a non-present to present mapping. Only flush if caching mode */
2821 if (cap_caching_mode(iommu->cap)) 2832 if (cap_caching_mode(iommu->cap))
2822 iommu_flush_iotlb_psi(iommu, 0, start_addr, 2833 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2823 offset >> VTD_PAGE_SHIFT);
2824 else 2834 else
2825 iommu_flush_write_buffer(iommu); 2835 iommu_flush_write_buffer(iommu);
2826 2836
@@ -3325,7 +3335,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3325 int adjust_width; 3335 int adjust_width;
3326 3336
3327 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3337 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3328 spin_lock_init(&domain->mapping_lock);
3329 spin_lock_init(&domain->iommu_lock); 3338 spin_lock_init(&domain->iommu_lock);
3330 3339
3331 domain_reserve_special_ranges(domain); 3340 domain_reserve_special_ranges(domain);
@@ -3379,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3379 3388
3380static void vm_domain_exit(struct dmar_domain *domain) 3389static void vm_domain_exit(struct dmar_domain *domain)
3381{ 3390{
3382 u64 end;
3383
3384 /* Domain 0 is reserved, so dont process it */ 3391 /* Domain 0 is reserved, so dont process it */
3385 if (!domain) 3392 if (!domain)
3386 return; 3393 return;
@@ -3388,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3388 vm_domain_remove_all_dev_info(domain); 3395 vm_domain_remove_all_dev_info(domain);
3389 /* destroy iovas */ 3396 /* destroy iovas */
3390 put_iova_domain(&domain->iovad); 3397 put_iova_domain(&domain->iovad);
3391 end = DOMAIN_MAX_ADDR(domain->gaw);
3392 end = end & (~VTD_PAGE_MASK);
3393 3398
3394 /* clear ptes */ 3399 /* clear ptes */
3395 dma_pte_clear_range(domain, 0, end); 3400 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3396 3401
3397 /* free page tables */ 3402 /* free page tables */
3398 dma_pte_free_pagetable(domain, 0, end); 3403 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3399 3404
3400 iommu_free_vm_domain(domain); 3405 iommu_free_vm_domain(domain);
3401 free_domain_mem(domain); 3406 free_domain_mem(domain);
@@ -3504,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3504 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3509 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3505 prot |= DMA_PTE_SNP; 3510 prot |= DMA_PTE_SNP;
3506 3511
3507 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3512 max_addr = iova + size;
3508 if (dmar_domain->max_addr < max_addr) { 3513 if (dmar_domain->max_addr < max_addr) {
3509 int min_agaw; 3514 int min_agaw;
3510 u64 end; 3515 u64 end;
@@ -3522,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3522 } 3527 }
3523 dmar_domain->max_addr = max_addr; 3528 dmar_domain->max_addr = max_addr;
3524 } 3529 }
3525 3530 /* Round up size to next multiple of PAGE_SIZE, if it and
3526 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3531 the low bits of hpa would take us onto the next page */
3532 size = aligned_nrpages(hpa, size);
3533 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3534 hpa >> VTD_PAGE_SHIFT, size, prot);
3527 return ret; 3535 return ret;
3528} 3536}
3529 3537
@@ -3531,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3531 unsigned long iova, size_t size) 3539 unsigned long iova, size_t size)
3532{ 3540{
3533 struct dmar_domain *dmar_domain = domain->priv; 3541 struct dmar_domain *dmar_domain = domain->priv;
3534 dma_addr_t base;
3535 3542
3536 /* The address might not be aligned */ 3543 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3537 base = iova & VTD_PAGE_MASK; 3544 (iova + size - 1) >> VTD_PAGE_SHIFT);
3538 size = VTD_PAGE_ALIGN(size);
3539 dma_pte_clear_range(dmar_domain, base, base + size);
3540 3545
3541 if (dmar_domain->max_addr == base + size) 3546 if (dmar_domain->max_addr == iova + size)
3542 dmar_domain->max_addr = base; 3547 dmar_domain->max_addr = iova;
3543} 3548}
3544 3549
3545static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3550static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3549,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3549 struct dma_pte *pte; 3554 struct dma_pte *pte;
3550 u64 phys = 0; 3555 u64 phys = 0;
3551 3556
3552 pte = addr_to_dma_pte(dmar_domain, iova); 3557 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3553 if (pte) 3558 if (pte)
3554 phys = dma_pte_addr(pte); 3559 phys = dma_pte_addr(pte);
3555 3560