aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/intel-iommu.c779
-rw-r--r--drivers/pci/iova.c26
2 files changed, 444 insertions, 361 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 420afa887283..360fb67a30d7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -56,14 +56,32 @@
56#define MAX_AGAW_WIDTH 64 56#define MAX_AGAW_WIDTH 64
57 57
58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
59 60
60#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
61#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
62#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
63 64
64#ifndef PHYSICAL_PAGE_MASK 65
65#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
66#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
67 85
68/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
69static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
204 222
205static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
206{ 224{
207 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
208} 231}
209 232
210static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
211{ 234{
212 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
213} 236}
214 237
215static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -217,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
217 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
218} 241}
219 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
220/* 248/*
221 * This domain is a statically identity mapping domain. 249 * This domain is a statically identity mapping domain.
222 * 1. This domain creats a static 1:1 mapping to all usable memory. 250 * 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -244,7 +272,6 @@ struct dmar_domain {
244 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
245 273
246 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
247 spinlock_t mapping_lock; /* page table lock */
248 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
249 276
250 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -648,80 +675,78 @@ static inline int width_to_agaw(int width)
648 675
649static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
650{ 677{
651 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
652} 679}
653 680
654static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
655{ 682{
656 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
657} 684}
658 685
659static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
660{ 687{
661 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
662} 689}
663 690
664static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
665{ 692{
666 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
667} 694}
668 695
669static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
670{ 697{
671 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
672} 699}
673 700
674static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
675{ 703{
676 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
677 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
678 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
679 int offset; 707 int offset;
680 unsigned long flags;
681 708
682 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
683 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
684 addr &= (((u64)1) << addr_width) - 1;
685 parent = domain->pgd; 711 parent = domain->pgd;
686 712
687 spin_lock_irqsave(&domain->mapping_lock, flags);
688 while (level > 0) { 713 while (level > 0) {
689 void *tmp_page; 714 void *tmp_page;
690 715
691 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
692 pte = &parent[offset]; 717 pte = &parent[offset];
693 if (level == 1) 718 if (level == 1)
694 break; 719 break;
695 720
696 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
697 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
698 725
699 if (!tmp_page) { 726 if (!tmp_page)
700 spin_unlock_irqrestore(&domain->mapping_lock,
701 flags);
702 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
703 } 737 }
704 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
705 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
706 /*
707 * high level table always sets r/w, last level page
708 * table control read/write
709 */
710 dma_set_pte_readable(pte);
711 dma_set_pte_writable(pte);
712 domain_flush_cache(domain, pte, sizeof(*pte));
713 } 738 }
714 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
715 level--; 740 level--;
716 } 741 }
717 742
718 spin_unlock_irqrestore(&domain->mapping_lock, flags);
719 return pte; 743 return pte;
720} 744}
721 745
722/* return address's pte at specific level */ 746/* return address's pte at specific level */
723static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
724 int level) 748 unsigned long pfn,
749 int level)
725{ 750{
726 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
727 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -729,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
729 754
730 parent = domain->pgd; 755 parent = domain->pgd;
731 while (level <= total) { 756 while (level <= total) {
732 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
733 pte = &parent[offset]; 758 pte = &parent[offset];
734 if (level == total) 759 if (level == total)
735 return pte; 760 return pte;
@@ -742,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
742 return NULL; 767 return NULL;
743} 768}
744 769
745/* clear one page's page table */
746static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
747{
748 struct dma_pte *pte = NULL;
749
750 /* get last level pte */
751 pte = dma_addr_level_pte(domain, addr, 1);
752
753 if (pte) {
754 dma_clear_pte(pte);
755 domain_flush_cache(domain, pte, sizeof(*pte));
756 }
757}
758
759/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
760static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
761{ 774{
762 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
763 int npages; 776 struct dma_pte *first_pte, *pte;
764 777
765 start &= (((u64)1) << addr_width) - 1; 778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
766 end &= (((u64)1) << addr_width) - 1; 779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
767 /* in case it's partial page */
768 start &= PAGE_MASK;
769 end = PAGE_ALIGN(end);
770 npages = (end - start) / VTD_PAGE_SIZE;
771 780
772 /* we don't need lock here, nobody else touches the iova range */ 781 /* we don't need lock here; nobody else touches the iova range */
773 while (npages--) { 782 while (start_pfn <= last_pfn) {
774 dma_pte_clear_one(domain, start); 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
775 start += VTD_PAGE_SIZE; 784 if (!pte) {
785 start_pfn = align_to_level(start_pfn + 1, 2);
786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
793
794 domain_flush_cache(domain, first_pte,
795 (void *)pte - (void *)first_pte);
776 } 796 }
777} 797}
778 798
779/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
780static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
781 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
782{ 803{
783 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
784 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
785 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
786 int level; 807 int level;
787 u64 tmp; 808 unsigned long tmp;
788 809
789 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
790 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
791 812
792 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
793 level = 2; 814 level = 2;
794 while (level <= total) { 815 while (level <= total) {
795 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
796 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
797 return; 820 return;
798 821
799 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
800 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
801 if (pte) { 824 if (!pte) {
802 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
803 phys_to_virt(dma_pte_addr(pte))); 826 continue;
804 dma_clear_pte(pte);
805 domain_flush_cache(domain, pte, sizeof(*pte));
806 } 827 }
807 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
808 } 841 }
809 level++; 842 level++;
810 } 843 }
811 /* free pgd */ 844 /* free pgd */
812 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
813 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
814 domain->pgd = NULL; 847 domain->pgd = NULL;
815 } 848 }
@@ -1035,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1035} 1068}
1036 1069
1037static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1038 u64 addr, unsigned int pages) 1071 unsigned long pfn, unsigned int pages)
1039{ 1072{
1040 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1041 1075
1042 BUG_ON(addr & (~VTD_PAGE_MASK));
1043 BUG_ON(pages == 0); 1076 BUG_ON(pages == 0);
1044 1077
1045 /* 1078 /*
@@ -1054,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1054 else 1087 else
1055 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1056 DMA_TLB_PSI_FLUSH); 1089 DMA_TLB_PSI_FLUSH);
1057 if (did) 1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1058 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1059} 1097}
1060 1098
@@ -1279,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1279 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1280 struct iova *iova; 1318 struct iova *iova;
1281 int i; 1319 int i;
1282 u64 addr, size;
1283 1320
1284 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1285 1322
@@ -1302,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1302 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1303 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1304 continue; 1341 continue;
1305 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1306 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1307 size = r->end - addr; 1344 IOVA_PFN(r->end));
1308 size = PAGE_ALIGN(size);
1309 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1310 IOVA_PFN(size + addr) - 1);
1311 if (!iova) 1345 if (!iova)
1312 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1313 } 1347 }
@@ -1341,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1341 unsigned long sagaw; 1375 unsigned long sagaw;
1342 1376
1343 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1344 spin_lock_init(&domain->mapping_lock);
1345 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1346 1379
1347 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1388,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
1388{ 1421{
1389 struct dmar_drhd_unit *drhd; 1422 struct dmar_drhd_unit *drhd;
1390 struct intel_iommu *iommu; 1423 struct intel_iommu *iommu;
1391 u64 end;
1392 1424
1393 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1394 if (!domain) 1426 if (!domain)
@@ -1397,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
1397 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1398 /* destroy iovas */ 1430 /* destroy iovas */
1399 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1400 end = DOMAIN_MAX_ADDR(domain->gaw);
1401 end = end & (~PAGE_MASK);
1402 1432
1403 /* clear ptes */ 1433 /* clear ptes */
1404 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1405 1435
1406 /* free page tables */ 1436 /* free page tables */
1407 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1408 1438
1409 for_each_active_iommu(iommu, drhd) 1439 for_each_active_iommu(iommu, drhd)
1410 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1618,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
1618 tmp->devfn); 1648 tmp->devfn);
1619} 1649}
1620 1650
1621static int 1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1622domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1652 struct scatterlist *sg, unsigned long phys_pfn,
1623 u64 hpa, size_t size, int prot) 1653 unsigned long nr_pages, int prot)
1624{ 1654{
1625 u64 start_pfn, end_pfn; 1655 struct dma_pte *first_pte = NULL, *pte = NULL;
1626 struct dma_pte *pte; 1656 phys_addr_t uninitialized_var(pteval);
1627 int index; 1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1628 int addr_width = agaw_to_width(domain->agaw); 1658 unsigned long sg_res;
1629 1659
1630 hpa &= (((u64)1) << addr_width) - 1; 1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1631 1661
1632 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1662 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1633 return -EINVAL; 1663 return -EINVAL;
1634 iova &= PAGE_MASK; 1664
1635 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1665 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1636 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1666
1637 index = 0; 1667 if (sg)
1638 while (start_pfn < end_pfn) { 1668 sg_res = 0;
1639 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1669 else {
1640 if (!pte) 1670 sg_res = nr_pages + 1;
1641 return -ENOMEM; 1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 }
1673
1674 while (nr_pages--) {
1675 uint64_t tmp;
1676
1677 if (!sg_res) {
1678 sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot;
1682 }
1683 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1685 if (!pte)
1686 return -ENOMEM;
1687 }
1642 /* We don't need lock here, nobody else 1688 /* We don't need lock here, nobody else
1643 * touches the iova range 1689 * touches the iova range
1644 */ 1690 */
1645 BUG_ON(dma_pte_addr(pte)); 1691 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1646 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1692 if (tmp) {
1647 dma_set_pte_prot(pte, prot); 1693 static int dumps = 5;
1648 if (prot & DMA_PTE_SNP) 1694 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1649 dma_set_pte_snp(pte); 1695 iov_pfn, tmp, (unsigned long long)pteval);
1650 domain_flush_cache(domain, pte, sizeof(*pte)); 1696 if (dumps) {
1651 start_pfn++; 1697 dumps--;
1652 index++; 1698 debug_dma_dump_mappings(NULL);
1699 }
1700 WARN_ON(1);
1701 }
1702 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) {
1704 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte);
1706 pte = NULL;
1707 }
1708 iov_pfn++;
1709 pteval += VTD_PAGE_SIZE;
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg);
1653 } 1713 }
1654 return 0; 1714 return 0;
1655} 1715}
1656 1716
1717static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1718 struct scatterlist *sg, unsigned long nr_pages,
1719 int prot)
1720{
1721 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1722}
1723
1724static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 unsigned long phys_pfn, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1729}
1730
1657static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1731static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1658{ 1732{
1659 if (!iommu) 1733 if (!iommu)
@@ -1844,58 +1918,61 @@ error:
1844 1918
1845static int iommu_identity_mapping; 1919static int iommu_identity_mapping;
1846 1920
1921static int iommu_domain_identity_map(struct dmar_domain *domain,
1922 unsigned long long start,
1923 unsigned long long end)
1924{
1925 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1926 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1927
1928 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1929 dma_to_mm_pfn(last_vpfn))) {
1930 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1931 return -ENOMEM;
1932 }
1933
1934 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1935 start, end, domain->id);
1936 /*
1937 * RMRR range might have overlap with physical memory range,
1938 * clear it first
1939 */
1940 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1941
1942 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1943 last_vpfn - first_vpfn + 1,
1944 DMA_PTE_READ|DMA_PTE_WRITE);
1945}
1946
1847static int iommu_prepare_identity_map(struct pci_dev *pdev, 1947static int iommu_prepare_identity_map(struct pci_dev *pdev,
1848 unsigned long long start, 1948 unsigned long long start,
1849 unsigned long long end) 1949 unsigned long long end)
1850{ 1950{
1851 struct dmar_domain *domain; 1951 struct dmar_domain *domain;
1852 unsigned long size;
1853 unsigned long long base;
1854 int ret; 1952 int ret;
1855 1953
1856 printk(KERN_INFO 1954 printk(KERN_INFO
1857 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1955 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1858 pci_name(pdev), start, end); 1956 pci_name(pdev), start, end);
1859 if (iommu_identity_mapping) 1957
1860 domain = si_domain; 1958 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1861 else
1862 /* page table init */
1863 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1864 if (!domain) 1959 if (!domain)
1865 return -ENOMEM; 1960 return -ENOMEM;
1866 1961
1867 /* The address might not be aligned */ 1962 ret = iommu_domain_identity_map(domain, start, end);
1868 base = start & PAGE_MASK;
1869 size = end - base;
1870 size = PAGE_ALIGN(size);
1871 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1872 IOVA_PFN(base + size) - 1)) {
1873 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1874 ret = -ENOMEM;
1875 goto error;
1876 }
1877
1878 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1879 size, base, pci_name(pdev));
1880 /*
1881 * RMRR range might have overlap with physical memory range,
1882 * clear it first
1883 */
1884 dma_pte_clear_range(domain, base, base + size);
1885
1886 ret = domain_page_mapping(domain, base, base, size,
1887 DMA_PTE_READ|DMA_PTE_WRITE);
1888 if (ret) 1963 if (ret)
1889 goto error; 1964 goto error;
1890 1965
1891 /* context entry init */ 1966 /* context entry init */
1892 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 1967 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1893 if (!ret) 1968 if (ret)
1894 return 0; 1969 goto error;
1895error: 1970
1971 return 0;
1972
1973 error:
1896 domain_exit(domain); 1974 domain_exit(domain);
1897 return ret; 1975 return ret;
1898
1899} 1976}
1900 1977
1901static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1978static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1907,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1907 rmrr->end_address + 1); 1984 rmrr->end_address + 1);
1908} 1985}
1909 1986
1910struct iommu_prepare_data {
1911 struct pci_dev *pdev;
1912 int ret;
1913};
1914
1915static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1916 unsigned long end_pfn, void *datax)
1917{
1918 struct iommu_prepare_data *data;
1919
1920 data = (struct iommu_prepare_data *)datax;
1921
1922 data->ret = iommu_prepare_identity_map(data->pdev,
1923 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1924 return data->ret;
1925
1926}
1927
1928static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1929{
1930 int nid;
1931 struct iommu_prepare_data data;
1932
1933 data.pdev = pdev;
1934 data.ret = 0;
1935
1936 for_each_online_node(nid) {
1937 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1938 if (data.ret)
1939 return data.ret;
1940 }
1941 return data.ret;
1942}
1943
1944#ifdef CONFIG_DMAR_GFX_WA
1945static void __init iommu_prepare_gfx_mapping(void)
1946{
1947 struct pci_dev *pdev = NULL;
1948 int ret;
1949
1950 for_each_pci_dev(pdev) {
1951 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1952 !IS_GFX_DEVICE(pdev))
1953 continue;
1954 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1955 pci_name(pdev));
1956 ret = iommu_prepare_with_active_regions(pdev);
1957 if (ret)
1958 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1959 }
1960}
1961#else /* !CONFIG_DMAR_GFX_WA */
1962static inline void iommu_prepare_gfx_mapping(void)
1963{
1964 return;
1965}
1966#endif
1967
1968#ifdef CONFIG_DMAR_FLOPPY_WA 1987#ifdef CONFIG_DMAR_FLOPPY_WA
1969static inline void iommu_prepare_isa(void) 1988static inline void iommu_prepare_isa(void)
1970{ 1989{
@@ -1975,12 +1994,12 @@ static inline void iommu_prepare_isa(void)
1975 if (!pdev) 1994 if (!pdev)
1976 return; 1995 return;
1977 1996
1978 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 1997 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1979 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 1998 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1980 1999
1981 if (ret) 2000 if (ret)
1982 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2001 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1983 "floppy might not work\n"); 2002 "floppy might not work\n");
1984 2003
1985} 2004}
1986#else 2005#else
@@ -2008,16 +2027,30 @@ static int __init init_context_pass_through(void)
2008} 2027}
2009 2028
2010static int md_domain_init(struct dmar_domain *domain, int guest_width); 2029static int md_domain_init(struct dmar_domain *domain, int guest_width);
2030
2031static int __init si_domain_work_fn(unsigned long start_pfn,
2032 unsigned long end_pfn, void *datax)
2033{
2034 int *ret = datax;
2035
2036 *ret = iommu_domain_identity_map(si_domain,
2037 (uint64_t)start_pfn << PAGE_SHIFT,
2038 (uint64_t)end_pfn << PAGE_SHIFT);
2039 return *ret;
2040
2041}
2042
2011static int si_domain_init(void) 2043static int si_domain_init(void)
2012{ 2044{
2013 struct dmar_drhd_unit *drhd; 2045 struct dmar_drhd_unit *drhd;
2014 struct intel_iommu *iommu; 2046 struct intel_iommu *iommu;
2015 int ret = 0; 2047 int nid, ret = 0;
2016 2048
2017 si_domain = alloc_domain(); 2049 si_domain = alloc_domain();
2018 if (!si_domain) 2050 if (!si_domain)
2019 return -EFAULT; 2051 return -EFAULT;
2020 2052
2053 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2021 2054
2022 for_each_active_iommu(iommu, drhd) { 2055 for_each_active_iommu(iommu, drhd) {
2023 ret = iommu_attach_domain(si_domain, iommu); 2056 ret = iommu_attach_domain(si_domain, iommu);
@@ -2034,6 +2067,12 @@ static int si_domain_init(void)
2034 2067
2035 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2068 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2036 2069
2070 for_each_online_node(nid) {
2071 work_with_active_regions(nid, si_domain_work_fn, &ret);
2072 if (ret)
2073 return ret;
2074 }
2075
2037 return 0; 2076 return 0;
2038} 2077}
2039 2078
@@ -2078,6 +2117,47 @@ static int domain_add_dev_info(struct dmar_domain *domain,
2078 return 0; 2117 return 0;
2079} 2118}
2080 2119
2120static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2121{
2122 if (iommu_identity_mapping == 2)
2123 return IS_GFX_DEVICE(pdev);
2124
2125 /*
2126 * We want to start off with all devices in the 1:1 domain, and
2127 * take them out later if we find they can't access all of memory.
2128 *
2129 * However, we can't do this for PCI devices behind bridges,
2130 * because all PCI devices behind the same bridge will end up
2131 * with the same source-id on their transactions.
2132 *
2133 * Practically speaking, we can't change things around for these
2134 * devices at run-time, because we can't be sure there'll be no
2135 * DMA transactions in flight for any of their siblings.
2136 *
2137 * So PCI devices (unless they're on the root bus) as well as
2138 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2139 * the 1:1 domain, just in _case_ one of their siblings turns out
2140 * not to be able to map all of memory.
2141 */
2142 if (!pdev->is_pcie) {
2143 if (!pci_is_root_bus(pdev->bus))
2144 return 0;
2145 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2146 return 0;
2147 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2148 return 0;
2149
2150 /*
2151 * At boot time, we don't yet know if devices will be 64-bit capable.
2152 * Assume that they will -- if they turn out not to be, then we can
2153 * take them out of the 1:1 domain later.
2154 */
2155 if (!startup)
2156 return pdev->dma_mask > DMA_BIT_MASK(32);
2157
2158 return 1;
2159}
2160
2081static int iommu_prepare_static_identity_mapping(void) 2161static int iommu_prepare_static_identity_mapping(void)
2082{ 2162{
2083 struct pci_dev *pdev = NULL; 2163 struct pci_dev *pdev = NULL;
@@ -2087,16 +2167,19 @@ static int iommu_prepare_static_identity_mapping(void)
2087 if (ret) 2167 if (ret)
2088 return -EFAULT; 2168 return -EFAULT;
2089 2169
2090 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2091 for_each_pci_dev(pdev) { 2170 for_each_pci_dev(pdev) {
2092 ret = iommu_prepare_with_active_regions(pdev); 2171 if (iommu_should_identity_map(pdev, 1)) {
2093 if (ret) { 2172 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2094 printk(KERN_INFO "1:1 mapping to one domain failed.\n"); 2173 pci_name(pdev));
2095 return -EFAULT; 2174
2175 ret = domain_context_mapping(si_domain, pdev,
2176 CONTEXT_TT_MULTI_LEVEL);
2177 if (ret)
2178 return ret;
2179 ret = domain_add_dev_info(si_domain, pdev);
2180 if (ret)
2181 return ret;
2096 } 2182 }
2097 ret = domain_add_dev_info(si_domain, pdev);
2098 if (ret)
2099 return ret;
2100 } 2183 }
2101 2184
2102 return 0; 2185 return 0;
@@ -2251,6 +2334,10 @@ int __init init_dmars(void)
2251 * identity mapping if iommu_identity_mapping is set. 2334 * identity mapping if iommu_identity_mapping is set.
2252 */ 2335 */
2253 if (!iommu_pass_through) { 2336 if (!iommu_pass_through) {
2337#ifdef CONFIG_DMAR_BROKEN_GFX_WA
2338 if (!iommu_identity_mapping)
2339 iommu_identity_mapping = 2;
2340#endif
2254 if (iommu_identity_mapping) 2341 if (iommu_identity_mapping)
2255 iommu_prepare_static_identity_mapping(); 2342 iommu_prepare_static_identity_mapping();
2256 /* 2343 /*
@@ -2284,8 +2371,6 @@ int __init init_dmars(void)
2284 } 2371 }
2285 } 2372 }
2286 2373
2287 iommu_prepare_gfx_mapping();
2288
2289 iommu_prepare_isa(); 2374 iommu_prepare_isa();
2290 } 2375 }
2291 2376
@@ -2330,50 +2415,40 @@ error:
2330 return ret; 2415 return ret;
2331} 2416}
2332 2417
2333static inline u64 aligned_size(u64 host_addr, size_t size) 2418/* Returns a number of VTD pages, but aligned to MM page size */
2334{ 2419static inline unsigned long aligned_nrpages(unsigned long host_addr,
2335 u64 addr; 2420 size_t size)
2336 addr = (host_addr & (~PAGE_MASK)) + size;
2337 return PAGE_ALIGN(addr);
2338}
2339
2340struct iova *
2341iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2342{ 2421{
2343 struct iova *piova; 2422 host_addr &= ~PAGE_MASK;
2344 2423 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2345 /* Make sure it's in range */
2346 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2347 if (!size || (IOVA_START_ADDR + size > end))
2348 return NULL;
2349
2350 piova = alloc_iova(&domain->iovad,
2351 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2352 return piova;
2353} 2424}
2354 2425
2355static struct iova * 2426/* This takes a number of _MM_ pages, not VTD pages */
2356__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, 2427static struct iova *intel_alloc_iova(struct device *dev,
2357 size_t size, u64 dma_mask) 2428 struct dmar_domain *domain,
2429 unsigned long nrpages, uint64_t dma_mask)
2358{ 2430{
2359 struct pci_dev *pdev = to_pci_dev(dev); 2431 struct pci_dev *pdev = to_pci_dev(dev);
2360 struct iova *iova = NULL; 2432 struct iova *iova = NULL;
2361 2433
2362 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2434 /* Restrict dma_mask to the width that the iommu can handle */
2363 iova = iommu_alloc_iova(domain, size, dma_mask); 2435 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2364 else { 2436
2437 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2365 /* 2438 /*
2366 * First try to allocate an io virtual address in 2439 * First try to allocate an io virtual address in
2367 * DMA_BIT_MASK(32) and if that fails then try allocating 2440 * DMA_BIT_MASK(32) and if that fails then try allocating
2368 * from higher range 2441 * from higher range
2369 */ 2442 */
2370 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2443 iova = alloc_iova(&domain->iovad, nrpages,
2371 if (!iova) 2444 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2372 iova = iommu_alloc_iova(domain, size, dma_mask); 2445 if (iova)
2373 } 2446 return iova;
2374 2447 }
2375 if (!iova) { 2448 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2376 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2449 if (unlikely(!iova)) {
2450 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2451 nrpages, pci_name(pdev));
2377 return NULL; 2452 return NULL;
2378 } 2453 }
2379 2454
@@ -2415,16 +2490,24 @@ static int iommu_dummy(struct pci_dev *pdev)
2415} 2490}
2416 2491
2417/* Check if the pdev needs to go through non-identity map and unmap process.*/ 2492/* Check if the pdev needs to go through non-identity map and unmap process.*/
2418static int iommu_no_mapping(struct pci_dev *pdev) 2493static int iommu_no_mapping(struct device *dev)
2419{ 2494{
2495 struct pci_dev *pdev;
2420 int found; 2496 int found;
2421 2497
2498 if (unlikely(dev->bus != &pci_bus_type))
2499 return 1;
2500
2501 pdev = to_pci_dev(dev);
2502 if (iommu_dummy(pdev))
2503 return 1;
2504
2422 if (!iommu_identity_mapping) 2505 if (!iommu_identity_mapping)
2423 return iommu_dummy(pdev); 2506 return 0;
2424 2507
2425 found = identity_mapping(pdev); 2508 found = identity_mapping(pdev);
2426 if (found) { 2509 if (found) {
2427 if (pdev->dma_mask > DMA_BIT_MASK(32)) 2510 if (iommu_should_identity_map(pdev, 0))
2428 return 1; 2511 return 1;
2429 else { 2512 else {
2430 /* 2513 /*
@@ -2441,9 +2524,12 @@ static int iommu_no_mapping(struct pci_dev *pdev)
2441 * In case of a detached 64 bit DMA device from vm, the device 2524 * In case of a detached 64 bit DMA device from vm, the device
2442 * is put into si_domain for identity mapping. 2525 * is put into si_domain for identity mapping.
2443 */ 2526 */
2444 if (pdev->dma_mask > DMA_BIT_MASK(32)) { 2527 if (iommu_should_identity_map(pdev, 0)) {
2445 int ret; 2528 int ret;
2446 ret = domain_add_dev_info(si_domain, pdev); 2529 ret = domain_add_dev_info(si_domain, pdev);
2530 if (ret)
2531 return 0;
2532 ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2447 if (!ret) { 2533 if (!ret) {
2448 printk(KERN_INFO "64bit %s uses identity mapping\n", 2534 printk(KERN_INFO "64bit %s uses identity mapping\n",
2449 pci_name(pdev)); 2535 pci_name(pdev));
@@ -2452,7 +2538,7 @@ static int iommu_no_mapping(struct pci_dev *pdev)
2452 } 2538 }
2453 } 2539 }
2454 2540
2455 return iommu_dummy(pdev); 2541 return 0;
2456} 2542}
2457 2543
2458static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2544static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
@@ -2468,7 +2554,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2468 2554
2469 BUG_ON(dir == DMA_NONE); 2555 BUG_ON(dir == DMA_NONE);
2470 2556
2471 if (iommu_no_mapping(pdev)) 2557 if (iommu_no_mapping(hwdev))
2472 return paddr; 2558 return paddr;
2473 2559
2474 domain = get_valid_domain_for_dev(pdev); 2560 domain = get_valid_domain_for_dev(pdev);
@@ -2476,14 +2562,13 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2476 return 0; 2562 return 0;
2477 2563
2478 iommu = domain_get_iommu(domain); 2564 iommu = domain_get_iommu(domain);
2479 size = aligned_size((u64)paddr, size); 2565 size = aligned_nrpages(paddr, size);
2480 2566
2481 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2567 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2568 pdev->dma_mask);
2482 if (!iova) 2569 if (!iova)
2483 goto error; 2570 goto error;
2484 2571
2485 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2486
2487 /* 2572 /*
2488 * Check if DMAR supports zero-length reads on write only 2573 * Check if DMAR supports zero-length reads on write only
2489 * mappings.. 2574 * mappings..
@@ -2499,20 +2584,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2499 * might have two guest_addr mapping to the same host paddr, but this 2584 * might have two guest_addr mapping to the same host paddr, but this
2500 * is not a big problem 2585 * is not a big problem
2501 */ 2586 */
2502 ret = domain_page_mapping(domain, start_paddr, 2587 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2503 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2588 paddr >> VTD_PAGE_SHIFT, size, prot);
2504 size, prot);
2505 if (ret) 2589 if (ret)
2506 goto error; 2590 goto error;
2507 2591
2508 /* it's a non-present to present mapping. Only flush if caching mode */ 2592 /* it's a non-present to present mapping. Only flush if caching mode */
2509 if (cap_caching_mode(iommu->cap)) 2593 if (cap_caching_mode(iommu->cap))
2510 iommu_flush_iotlb_psi(iommu, 0, start_paddr, 2594 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2511 size >> VTD_PAGE_SHIFT);
2512 else 2595 else
2513 iommu_flush_write_buffer(iommu); 2596 iommu_flush_write_buffer(iommu);
2514 2597
2515 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2598 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2599 start_paddr += paddr & ~PAGE_MASK;
2600 return start_paddr;
2516 2601
2517error: 2602error:
2518 if (iova) 2603 if (iova)
@@ -2605,11 +2690,11 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2605{ 2690{
2606 struct pci_dev *pdev = to_pci_dev(dev); 2691 struct pci_dev *pdev = to_pci_dev(dev);
2607 struct dmar_domain *domain; 2692 struct dmar_domain *domain;
2608 unsigned long start_addr; 2693 unsigned long start_pfn, last_pfn;
2609 struct iova *iova; 2694 struct iova *iova;
2610 struct intel_iommu *iommu; 2695 struct intel_iommu *iommu;
2611 2696
2612 if (iommu_no_mapping(pdev)) 2697 if (iommu_no_mapping(dev))
2613 return; 2698 return;
2614 2699
2615 domain = find_domain(pdev); 2700 domain = find_domain(pdev);
@@ -2618,22 +2703,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2618 iommu = domain_get_iommu(domain); 2703 iommu = domain_get_iommu(domain);
2619 2704
2620 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2705 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2621 if (!iova) 2706 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2707 (unsigned long long)dev_addr))
2622 return; 2708 return;
2623 2709
2624 start_addr = iova->pfn_lo << PAGE_SHIFT; 2710 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2625 size = aligned_size((u64)dev_addr, size); 2711 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2626 2712
2627 pr_debug("Device %s unmapping: %zx@%llx\n", 2713 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2628 pci_name(pdev), size, (unsigned long long)start_addr); 2714 pci_name(pdev), start_pfn, last_pfn);
2629 2715
2630 /* clear the whole page */ 2716 /* clear the whole page */
2631 dma_pte_clear_range(domain, start_addr, start_addr + size); 2717 dma_pte_clear_range(domain, start_pfn, last_pfn);
2718
2632 /* free page tables */ 2719 /* free page tables */
2633 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2720 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2721
2634 if (intel_iommu_strict) { 2722 if (intel_iommu_strict) {
2635 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2723 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2636 size >> VTD_PAGE_SHIFT); 2724 last_pfn - start_pfn + 1);
2637 /* free iova */ 2725 /* free iova */
2638 __free_iova(&domain->iovad, iova); 2726 __free_iova(&domain->iovad, iova);
2639 } else { 2727 } else {
@@ -2691,17 +2779,13 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2691 int nelems, enum dma_data_direction dir, 2779 int nelems, enum dma_data_direction dir,
2692 struct dma_attrs *attrs) 2780 struct dma_attrs *attrs)
2693{ 2781{
2694 int i;
2695 struct pci_dev *pdev = to_pci_dev(hwdev); 2782 struct pci_dev *pdev = to_pci_dev(hwdev);
2696 struct dmar_domain *domain; 2783 struct dmar_domain *domain;
2697 unsigned long start_addr; 2784 unsigned long start_pfn, last_pfn;
2698 struct iova *iova; 2785 struct iova *iova;
2699 size_t size = 0;
2700 phys_addr_t addr;
2701 struct scatterlist *sg;
2702 struct intel_iommu *iommu; 2786 struct intel_iommu *iommu;
2703 2787
2704 if (iommu_no_mapping(pdev)) 2788 if (iommu_no_mapping(hwdev))
2705 return; 2789 return;
2706 2790
2707 domain = find_domain(pdev); 2791 domain = find_domain(pdev);
@@ -2710,22 +2794,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2710 iommu = domain_get_iommu(domain); 2794 iommu = domain_get_iommu(domain);
2711 2795
2712 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2796 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2713 if (!iova) 2797 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2798 (unsigned long long)sglist[0].dma_address))
2714 return; 2799 return;
2715 for_each_sg(sglist, sg, nelems, i) {
2716 addr = page_to_phys(sg_page(sg)) + sg->offset;
2717 size += aligned_size((u64)addr, sg->length);
2718 }
2719 2800
2720 start_addr = iova->pfn_lo << PAGE_SHIFT; 2801 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2802 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2721 2803
2722 /* clear the whole page */ 2804 /* clear the whole page */
2723 dma_pte_clear_range(domain, start_addr, start_addr + size); 2805 dma_pte_clear_range(domain, start_pfn, last_pfn);
2806
2724 /* free page tables */ 2807 /* free page tables */
2725 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2808 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2726 2809
2727 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2810 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2728 size >> VTD_PAGE_SHIFT); 2811 (last_pfn - start_pfn + 1));
2729 2812
2730 /* free iova */ 2813 /* free iova */
2731 __free_iova(&domain->iovad, iova); 2814 __free_iova(&domain->iovad, iova);
@@ -2748,21 +2831,20 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2748static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2831static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2749 enum dma_data_direction dir, struct dma_attrs *attrs) 2832 enum dma_data_direction dir, struct dma_attrs *attrs)
2750{ 2833{
2751 phys_addr_t addr;
2752 int i; 2834 int i;
2753 struct pci_dev *pdev = to_pci_dev(hwdev); 2835 struct pci_dev *pdev = to_pci_dev(hwdev);
2754 struct dmar_domain *domain; 2836 struct dmar_domain *domain;
2755 size_t size = 0; 2837 size_t size = 0;
2756 int prot = 0; 2838 int prot = 0;
2757 size_t offset = 0; 2839 size_t offset_pfn = 0;
2758 struct iova *iova = NULL; 2840 struct iova *iova = NULL;
2759 int ret; 2841 int ret;
2760 struct scatterlist *sg; 2842 struct scatterlist *sg;
2761 unsigned long start_addr; 2843 unsigned long start_vpfn;
2762 struct intel_iommu *iommu; 2844 struct intel_iommu *iommu;
2763 2845
2764 BUG_ON(dir == DMA_NONE); 2846 BUG_ON(dir == DMA_NONE);
2765 if (iommu_no_mapping(pdev)) 2847 if (iommu_no_mapping(hwdev))
2766 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 2848 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2767 2849
2768 domain = get_valid_domain_for_dev(pdev); 2850 domain = get_valid_domain_for_dev(pdev);
@@ -2771,12 +2853,11 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2771 2853
2772 iommu = domain_get_iommu(domain); 2854 iommu = domain_get_iommu(domain);
2773 2855
2774 for_each_sg(sglist, sg, nelems, i) { 2856 for_each_sg(sglist, sg, nelems, i)
2775 addr = page_to_phys(sg_page(sg)) + sg->offset; 2857 size += aligned_nrpages(sg->offset, sg->length);
2776 size += aligned_size((u64)addr, sg->length);
2777 }
2778 2858
2779 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2859 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2860 pdev->dma_mask);
2780 if (!iova) { 2861 if (!iova) {
2781 sglist->dma_length = 0; 2862 sglist->dma_length = 0;
2782 return 0; 2863 return 0;
@@ -2792,35 +2873,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2792 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2873 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2793 prot |= DMA_PTE_WRITE; 2874 prot |= DMA_PTE_WRITE;
2794 2875
2795 start_addr = iova->pfn_lo << PAGE_SHIFT; 2876 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2796 offset = 0; 2877
2797 for_each_sg(sglist, sg, nelems, i) { 2878 ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
2798 addr = page_to_phys(sg_page(sg)) + sg->offset; 2879 if (unlikely(ret)) {
2799 size = aligned_size((u64)addr, sg->length); 2880 /* clear the page */
2800 ret = domain_page_mapping(domain, start_addr + offset, 2881 dma_pte_clear_range(domain, start_vpfn,
2801 ((u64)addr) & PHYSICAL_PAGE_MASK, 2882 start_vpfn + size - 1);
2802 size, prot); 2883 /* free page tables */
2803 if (ret) { 2884 dma_pte_free_pagetable(domain, start_vpfn,
2804 /* clear the page */ 2885 start_vpfn + size - 1);
2805 dma_pte_clear_range(domain, start_addr, 2886 /* free iova */
2806 start_addr + offset); 2887 __free_iova(&domain->iovad, iova);
2807 /* free page tables */ 2888 return 0;
2808 dma_pte_free_pagetable(domain, start_addr,
2809 start_addr + offset);
2810 /* free iova */
2811 __free_iova(&domain->iovad, iova);
2812 return 0;
2813 }
2814 sg->dma_address = start_addr + offset +
2815 ((u64)addr & (~PAGE_MASK));
2816 sg->dma_length = sg->length;
2817 offset += size;
2818 } 2889 }
2819 2890
2820 /* it's a non-present to present mapping. Only flush if caching mode */ 2891 /* it's a non-present to present mapping. Only flush if caching mode */
2821 if (cap_caching_mode(iommu->cap)) 2892 if (cap_caching_mode(iommu->cap))
2822 iommu_flush_iotlb_psi(iommu, 0, start_addr, 2893 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2823 offset >> VTD_PAGE_SHIFT);
2824 else 2894 else
2825 iommu_flush_write_buffer(iommu); 2895 iommu_flush_write_buffer(iommu);
2826 2896
@@ -3325,7 +3395,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3325 int adjust_width; 3395 int adjust_width;
3326 3396
3327 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3397 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3328 spin_lock_init(&domain->mapping_lock);
3329 spin_lock_init(&domain->iommu_lock); 3398 spin_lock_init(&domain->iommu_lock);
3330 3399
3331 domain_reserve_special_ranges(domain); 3400 domain_reserve_special_ranges(domain);
@@ -3379,8 +3448,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3379 3448
3380static void vm_domain_exit(struct dmar_domain *domain) 3449static void vm_domain_exit(struct dmar_domain *domain)
3381{ 3450{
3382 u64 end;
3383
3384 /* Domain 0 is reserved, so dont process it */ 3451 /* Domain 0 is reserved, so dont process it */
3385 if (!domain) 3452 if (!domain)
3386 return; 3453 return;
@@ -3388,14 +3455,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3388 vm_domain_remove_all_dev_info(domain); 3455 vm_domain_remove_all_dev_info(domain);
3389 /* destroy iovas */ 3456 /* destroy iovas */
3390 put_iova_domain(&domain->iovad); 3457 put_iova_domain(&domain->iovad);
3391 end = DOMAIN_MAX_ADDR(domain->gaw);
3392 end = end & (~VTD_PAGE_MASK);
3393 3458
3394 /* clear ptes */ 3459 /* clear ptes */
3395 dma_pte_clear_range(domain, 0, end); 3460 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3396 3461
3397 /* free page tables */ 3462 /* free page tables */
3398 dma_pte_free_pagetable(domain, 0, end); 3463 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3399 3464
3400 iommu_free_vm_domain(domain); 3465 iommu_free_vm_domain(domain);
3401 free_domain_mem(domain); 3466 free_domain_mem(domain);
@@ -3504,7 +3569,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3504 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3569 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3505 prot |= DMA_PTE_SNP; 3570 prot |= DMA_PTE_SNP;
3506 3571
3507 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3572 max_addr = iova + size;
3508 if (dmar_domain->max_addr < max_addr) { 3573 if (dmar_domain->max_addr < max_addr) {
3509 int min_agaw; 3574 int min_agaw;
3510 u64 end; 3575 u64 end;
@@ -3522,8 +3587,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3522 } 3587 }
3523 dmar_domain->max_addr = max_addr; 3588 dmar_domain->max_addr = max_addr;
3524 } 3589 }
3525 3590 /* Round up size to next multiple of PAGE_SIZE, if it and
3526 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3591 the low bits of hpa would take us onto the next page */
3592 size = aligned_nrpages(hpa, size);
3593 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3594 hpa >> VTD_PAGE_SHIFT, size, prot);
3527 return ret; 3595 return ret;
3528} 3596}
3529 3597
@@ -3531,15 +3599,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3531 unsigned long iova, size_t size) 3599 unsigned long iova, size_t size)
3532{ 3600{
3533 struct dmar_domain *dmar_domain = domain->priv; 3601 struct dmar_domain *dmar_domain = domain->priv;
3534 dma_addr_t base;
3535 3602
3536 /* The address might not be aligned */ 3603 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3537 base = iova & VTD_PAGE_MASK; 3604 (iova + size - 1) >> VTD_PAGE_SHIFT);
3538 size = VTD_PAGE_ALIGN(size);
3539 dma_pte_clear_range(dmar_domain, base, base + size);
3540 3605
3541 if (dmar_domain->max_addr == base + size) 3606 if (dmar_domain->max_addr == iova + size)
3542 dmar_domain->max_addr = base; 3607 dmar_domain->max_addr = iova;
3543} 3608}
3544 3609
3545static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3610static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3549,7 +3614,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3549 struct dma_pte *pte; 3614 struct dma_pte *pte;
3550 u64 phys = 0; 3615 u64 phys = 0;
3551 3616
3552 pte = addr_to_dma_pte(dmar_domain, iova); 3617 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3553 if (pte) 3618 if (pte)
3554 phys = dma_pte_addr(pte); 3619 phys = dma_pte_addr(pte);
3555 3620
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 2287116e9822..46dd440e2315 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -1,9 +1,19 @@
1/* 1/*
2 * Copyright (c) 2006, Intel Corporation. 2 * Copyright © 2006-2009, Intel Corporation.
3 * 3 *
4 * This file is released under the GPLv2. 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
5 * 16 *
6 * Copyright (C) 2006-2008 Intel Corporation
7 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 17 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8 */ 18 */
9 19
@@ -123,7 +133,15 @@ move_left:
123 /* Insert the new_iova into domain rbtree by holding writer lock */ 133 /* Insert the new_iova into domain rbtree by holding writer lock */
124 /* Add new node and rebalance tree. */ 134 /* Add new node and rebalance tree. */
125 { 135 {
126 struct rb_node **entry = &((prev)), *parent = NULL; 136 struct rb_node **entry, *parent = NULL;
137
138 /* If we have 'prev', it's a valid place to start the
139 insertion. Otherwise, start from the root. */
140 if (prev)
141 entry = &prev;
142 else
143 entry = &iovad->rbroot.rb_node;
144
127 /* Figure out where to put new node */ 145 /* Figure out where to put new node */
128 while (*entry) { 146 while (*entry) {
129 struct iova *this = container_of(*entry, 147 struct iova *this = container_of(*entry,